Load data functions not working correctly on CSV(Comma Delimited) file

48 Views Asked by At

I'm using Visual Studio to write c code for an EEG sleep study lab. The data is loaded from a CSV (Comma Delimited) Excel file that has 50 rows and 3000 columns. Each row represents a time series signal with 3000 data points each. When I run the code it gives me number of rows = 500 and number of columns = 316.

int main(int argc, char* argv[]) {

    FILE* file = fopen("EEG_SleepData_30sec_100Hz.csv", "r");
    if (file == NULL) {
        perror("Error opening file");
        return EXIT_FAILURE;
    }

    //Loads data in from file name specified
    int num_signals = num_rows_in_file(file);
    int signal_length = num_cols_in_file(file);
    printf("number of rows = %d  number of columns = %d\n", num_signals, signal_length);

    double** dataset = load_data(file, num_signals, signal_length);
    // Print the entire dataset
    for (int i = 0; i < num_signals; i++) {
        for (int j = 0; j < signal_length; j++) {
            printf("%lf ", dataset[i][j]);
        }
        printf("\n");
    }
    
    return 0;
}


double** load_data(FILE* file, int numrows, int numcols) {

    if (file != NULL) {
        double** dataset = (double**)calloc(numrows, sizeof(double*)); // Allocate each of our row pointers.

        if (dataset == NULL) {
            return NULL;
        }

        for (int i = 0; i < numrows; i++) {
            dataset[i] = (double*)calloc(numcols, sizeof(double)); // Allocate our columns.
            if (dataset[i] == NULL) {
                return NULL;
            }
        }

        for (int i = 0; i < numrows; i++) {
            for (int j = 0; j < numcols; j++) {
                fscanf(file, "%lf,", &dataset[i][j]);
            }
        }
        return dataset;
    }
    else {
        fprintf(stderr, "Unable to find file! Ensure it is in the Debug directory.");
        return NULL;
    }

}


int num_cols_in_file(FILE* file) {
    int numcols = 0;
    if (file) {
        char buf[3000]; // Make a buffer we'll use to grab a whole row.

        if (fgets(buf, sizeof(buf), file) != NULL) {
            // Tokenize our buffer, looking for how many columns we have (aka how many tokens we can create)
            char* token;
            char* next_token = NULL;

            token = strtok(buf, ", \n\r\t", &next_token);  // Include commas and additional whitespace as delimiters

            while (token != NULL) {
                token = strtok(NULL, ", \n\r\t", &next_token);
                numcols++;
            }

            rewind(file); // Reset our position to the beginning of the file.

            return numcols;
        }
        else {
            fprintf(stderr, "Failed to read first row.\n");
            return 0;
        }
    }
    else {
        fprintf(stderr, "File is unopened. Numcols only works on opened files.");
        return 0;
    }
}


int num_rows_in_file(FILE* file) {
    int numrows = 0;
    if (file) {
        char buf[3000]; // Make a buffer we'll use to grab a whole row.

        while (fgets(buf, sizeof(buf), file) != NULL) {
            numrows++;

        }

        rewind(file); // Reset our position to the beginning of the file.
        return numrows;
    }
    else {
        fprintf(stderr, "File is unopened. Numrows only works on opened files.");
        return 0;
    }
}

When I used a debug statement to see what the function was reading it printed values that were also very incorrect. I have used the same load_data, num_rows_in_file, and num_cols_in_file functions all semester with no issue, what is wrong with the code?

0

There are 0 best solutions below