char* gets corrupted on the final iteration of a loop

115 Views Asked by At

So, I'm trying to build a string_split function to split a c-style string based on a delimiter.

Here is the code for the function:

char** string_split(char* input, char delim)
{
    char** split_strings = malloc(sizeof(char*));
    char* charPtr;

    size_t split_idx = 0;
    int extend = 0;

    for(charPtr = input; *charPtr != '\0'; ++charPtr)
    {
        if(*charPtr == delim || *(charPtr+1) == '\0')
        {
            if(*(charPtr+1) == '\0') extend = 1; //extend the range by one for the null byte at the end
            char* string_element = calloc(1, sizeof(char));

            for(size_t i = 0; input != charPtr+extend; ++input, ++i)
            {
                if(string_element[i] == '\0')
                {
                    //allocate another char and add a null byte to the end
                    string_element = realloc(string_element, sizeof(char) * (sizeof(string_element)/sizeof(char) + 1));
                    string_element[i+1] = '\0';
                }
                string_element[i] = *input;
            }
            printf("string elem: %s\n", string_element);
            split_strings[split_idx++] = string_element;
            
            //allocate another c-string if we're not at the end of the input
            split_strings = realloc(split_strings, sizeof(char*) *(sizeof(split_strings)/sizeof(char*) + 1));    

            //skip over the delimiter 
            input++;
            extend = 0;
        }
    }
    free(charPtr);
    free(input);
    return split_strings;
}

Essentially, the way it works is that there are two char*, input and charPtr. charPtr counts up from the start of the input string the the next instance of the delimiter, then input counts from the previous instance of the delimiter (or the start of the input string), and copies each char into a new char*. once the string is built it is added to a char** array.

There are also some twiddly bits for skipping over delimiters and dealing with the end points of the input string. the function is used as so:

int main()
{
    char* str = "mon,tue,wed,thur,fri";
    char delim = ',';
    char** split = string_split(str, delim);

    return 1;
}

Anyway, it works for the most part, except the first char* in the returned char** array is corrupted, and is just occupied by random junk.

For example printing the elements of split from main yields:

split: α↨▓
split: tue
split: wed
split: thur
split: fri

Whats odd is that the contents of split_strings[0], the array of char* which returns the desired tokens is mon, as it should be for this example, right up until the final loop of the main for-loop in the string_split function, specifically its the line:

split_strings[split_idx++] = string_element;

which turns its contents from mon to Junk. Any help is appreciated, thanks.

2

There are 2 best solutions below

0
On

The final function for anyone wonder, should be pretty fool proof.

char** string_split(char* input, char delim, bool skip_delim)
{
    assert(*input != '\0');

    char** split_strings = malloc(sizeof(char*));
    char* charPtr;

    size_t split_idx             = 0;
    size_t num_allocated_strings = 1;
    size_t extend                = 0;

    for(charPtr = input; *charPtr != '\0'; ++charPtr)
    {
        if(*charPtr == delim || *(charPtr+1) == '\0')
        {
            if(*(charPtr+1) == '\0') extend = 1; //extend the range by one for the null byte at the end
            char* string_element = calloc(1, sizeof(char));

            for(size_t i = 0; input != charPtr+extend; ++input, ++i)
            {
            if(string_element[i] == '\0')
            {
                //allocate another char and add a null byte to the end
                char* temp = realloc(string_element, sizeof(char) * (strlen(string_element) + 1));
                if(temp == NULL)
                {
                    free(string_element);
                    free(split_strings);
                    break;
                }
                string_element = temp;
                string_element[i+1] = '\0';
            }
            string_element[i] = *input;
            }
            split_strings[split_idx++] = string_element;
            num_allocated_strings++;
            
            //allocate another c-string
            char** temp = realloc(split_strings, sizeof(char*) * num_allocated_strings);    
            if(temp == NULL)
            {
                free(string_element);
                free(split_strings);
                break;
            }
            split_strings = temp;

            //skip over the delimiter if required
            if(skip_delim) input++;
            extend = 0;
        }
    }
    split_strings[num_allocated_strings-1] = NULL;
    return split_strings;
}
6
On

Your function is incorrect at least because it tries to free the passed string

char** string_split(char* input, char delim)
{

    //...

    free(charPtr);
    free(input);
    return split_strings;
}

that in the case of your program is a string literal

char* str = "mon,tue,wed,thur,fri";
char delim = ',';
char** split = string_split(str, delim);

You may not free a string literal.

And the first parameter shall have the qualifier const.

There are numerous other errors in your function.

For example the expression sizeof(string_element)/sizeof(char) used in this statement

string_element = realloc(string_element, sizeof(char) * (sizeof(string_element)/sizeof(char) + 1));

does not yield the number of characters early allocated to the array pointed to by the pointer string_element. And there is no great sense to reallocate the array for each new character.

The function can look for example the following way as it is shown in the demonstrative program below.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

char ** string_split( const char *s, char delim )
{
    size_t n = 1;
    char **a = calloc( n, sizeof( char * ) );
    
    while ( *s )
    {
        const char *p = strchr( s, delim );
        
        if ( p == NULL ) p = s + strlen( s );
        
        if (  p != s )
        {
            char *t = malloc( p - s + 1 );
            
            if ( t != NULL ) 
            {
                memcpy( t, s, p - s );
                t[p-s] = '\0';
            }
            
            char **tmp = realloc( a, ( n + 1 ) * sizeof( char * ) );
            
            if ( tmp == NULL )
            {
                free( t );              
                break;
            }
            
            a = tmp;
            
            a[n-1] = t;
            a[n] = NULL;
            ++n;
        }
        
        s = p + ( *p != '\0' ); 
    }
    
    return a;
}

int main(void) 
{
    char* str = "mon,tue,wed,thur,fri";
    char delim = ',';

    char **split = string_split( str, delim );
    
    for ( char **p = split; *p != NULL; ++p )
    {
        puts( *p );
    }
    
    for ( char **p = split; *p != NULL; ++p ) free( *p );
    free( split );
    
    return 0;
}

The program output is

mon
tue
wed
thur
fri