In C parsing a string of integers separated by spaces

I am trying to use C to parse a file containing several lines of integers, separated by spaces, into a dynamic array of dynamic int arrays. Each row will be an array in an array of arrays. The number of rows and elements in each row is inconsistent.

What I have done so far is to use fgets to capture each line as a string.

I cannot, however, figure out how to parse a string of integers, separated by spaces.

I thought I could use sscanf (because fscanf can be used to parse a whole file of integers, separated by spaces). However, sscanf seems to have different functionality. sscanf only ever parses the first number in a string. I assume that since a string is a string, it is not a stream.

I was looking for a way to make a stream from a string, but this does not look like it is available in C (I cannot use non-standard libraries).

char* line;
char lineBuffer[BUFFER_SIZE];
FILE *filePtr;
int value;

...

while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {

    printf("%s\n", lineBuffer);

    while(sscanf(lineBuffer, "%d ", &value) > 0) {
        printf("%d\n", value);
    }
}

Is there something I can use to parse a string. If not, is there an alternative to this whole system? I would prefer not to use REGEX.

+4
source share
5 answers

strtol(), , , char :

    while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {

    printf("%s\n", lineBuffer);
    char* p = lineBuffer;
    while(p < lineBuffer+BUFFER_SIZE ) {
        char* end;
        long int value = strtol( p , &end , 10 );
        if( value == 0L && end == p )  //docs also suggest checking errno value
            break;

        printf("%ld\n", value);
        p = end ;
    }
}
+4

fgets() - .

2 : strtol() ( ) sscanf()

while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
  char *endptr;
  while (1) {  
    errno = 0;
    long num = strtol(line, &endptr, 10);
    if (line == endptr) break;  // no conversion
    if (errno) break;  // out of range or other error

    #if LONG_MIN < INT_MIN || LONG_MAX > INT_MAX
    // long and int may have different ranges
    if (num < INT_MIN || num > INT_MAX) {
      errno = ERANGE; 
      break;  // out of range
    }
    #endif

    int value = (int) num;
    printf("%d\n", value);
    line = endptr;
  } 
  while (isspace((unsigned char) *endptr)) endptr++;
  if (*endptr != '\0') Handle_ExtraGarbageAtEndOfLine();
}

"sscanf - ." . sscanf() "%n" , .

while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
  int n;
  while (1) {  
    n = 0;
    int value;
    if (sscanf(line, "%d %n", &value, &n) != 1) break;
    printf("%d\n", value);
    line += n;
  } 
  if (line[n] != '\0') Handle_ExtraGarbageAtEndOfLine();
}
+2

strtok() " " () , , strtok() NULL, , :

while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {

    printf("%s\n", lineBuffer);

    char *token=strtok(line," ");

    while(token!=NULL)
    {
        if(sscanf(token, "%d", &value) > 0)
             printf("%d\n", value);
         token=strtok(NULL," ");
    }
}
+1

, atol() . ;) , strtok sscanf, "", . ints, isdigit() ! Isspace() .

void bla()
{
    const char * input = "    1           3           4       6     ";
    size_t i;
    size_t len = strlen(input);
    for (i = 0; i < len; ++i)
    {
        if (isdigit(input[i]))
        {
            printf("%d\n", atol(&input[i]));
            while (i < len && isdigit(input[i]))
                ++i;
        }

    }
}

void bla1()
{ // positive and negative ints version
    const char * input = "    10           -3           42       6     ";
    size_t i;
    size_t len = strlen(input);
    for (i = 0; i < len; ++i)
    {
        if (!isspace(input[i]))
        {
            printf("%d\n", atol(&input[i]));
            while (i < len && !isspace(input[i]))
                ++i;
        }
    }
    /* Output: 
        10
        -3
        42
        6

    */
}

The next part of your question was (implicitly) how to handle dynamic arrays to store your int syntax values. Here's a solution based on the code above. The chunkSize parameter is too small for input, so I can check if the realloc code section also works.

typedef struct DataRow_tag
{
    int32_t *data;
    size_t length;
} DataRow_t;

// Returns a "bool" in C-style. Yes, there is stdbool.h in ansi c99 but it is disadviced.
// (Platform dependent trouble in the context of C/C++ interaction, often across library/DLL boundaries.
// Especially if you compile C with a C-compiler and the C++ code with C++ compiler. Which happens.
// Every now and then, sizeof(c++ bool) != sizeof(C bool) and you waste a lot of time finding the problem.)
// The caller takes ownership of the DataRow_t::data pointer and has to free() it when done using it.
// 0: false -> fail
// 1: true -> success!
int 
ReadRowWithUnknownNumberOfColumnsOfInt32
    ( const char * row      // Zero terminated string containing 1 row worth of data.
    , DataRow_t *result     // Pointer to the place the data will be stored at.
    )
{
    int success = 0;
    size_t chunkSize = 10; // Set this value to something most likely large enough for your application.

    // This function is not cleaning up your garbage, dude ;) Gimme a clean result structure!
    assert(NULL != result && NULL == result->data);
    if (NULL != result && NULL == result->data)
    {
        result->length = 0;
        size_t rowLength = strlen(row);
        const char *pInput = row;
        const char *pEnd = &row[rowLength-1];

        result->data = (int32_t*)malloc(chunkSize * sizeof(int32_t));
        if (NULL != result->data )
        {
            for (; pInput < pEnd; ++pInput)
            {
                assert(pInput <= pEnd);
                assert(*pInput != 0);
                if (!isspace(*pInput)) // ultra correct would be to cast to unsigned char first...says microsoft code analyzer in paranoia mode.
                {
                    long lval = atol(pInput); // what is a long anyway? 4 bytes, 2 bytes, 8 bytes? We only hope it will fit into our int32_t...
                    // TODO: we could test here if lval value fits in an int32_t...platform dependent!
                    result->data[result->length++] = lval;
                    if (result->length == chunkSize)
                    { // our buffer was too small... we need a bigger one.
                        chunkSize = chunkSize + chunkSize; // doubling our buffer, hoping it will be enough, now.
                        int32_t * temp = (int32_t*)realloc(result->data, chunkSize * sizeof(int32_t));
                        if (NULL == temp)
                        { // realloc is a funny function from the dark ages of c. It returns NULL if out of memory.
                            // So we cannot simply use result->data pointer for realloc call as this might end up with a memory leak.
                            free(result->data);
                            result->length = 0;
                            break;
                        }
                        else
                        {
                            result->data = temp;
                        }
                    }
                    while (pInput < pEnd && !isspace(*pInput))
                        ++pInput;
                }
            }
            if (pInput >= pEnd)
                success = 1;
            else
            { // make sure we do not leave result in some funny state.
                result->length = 0;
                free(result->data); // free(NULL) legal. If memblock is NULL, the pointer is ignored and free immediately returns.
                result->data = NULL;
            }
        }
    }

    return success;
}
void Bla2()
{
    const char * input = "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13";
    DataRow_t dataRow = { 0 };
    if (ReadRowWithUnknownNumberOfColumnsOfInt32(input, &dataRow))
    {
        for (size_t i = 0; i < dataRow.length; ++i)
        {
            printf("%d ", dataRow.data[i]);
        }
        printf("\n");

        free(dataRow.data);
        dataRow.data = NULL;
        dataRow.length = 0;
    }
}
0
source

You should use:

lineBuffer = (char *)malloc(sizeof(BUFFER_SIZE + 1));

than:

char lineBuffer[BUFFER_SIZE];

Your stack will be grateful to you!

0
source

All Articles