FYI - this is a coding project so i cant make changes to the spreadsheet

I have to save column 1,2,4,5, and 14 in arrays.I printed these columns and this is the output. This is the output right now

Since I need to convert Column 1 and Column 14 to float arrays. How do i remove the quotations from all column arrays??

the values in csv are saved and seperated by commas and quotes

This is the spreadsheet file


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_COLS 18  // Maximum number of columns in the CSV file
#define MAX_ROWS 211 // Maximum number of rows in the CSV file

int main() {
    char filename[] = "statscan_diabetes.csv";
    FILE *fp = fopen(filename, "r");
    if (fp == NULL) {
        printf("Error opening file %s\n", filename);
        return 1;
    }

    char line[1024];
    char *token;
    int row_count = 0;
    char ref_date[MAX_ROWS][11];
    char geo[MAX_ROWS][100];
    char age_group[MAX_ROWS][20];
    char sex[MAX_ROWS][10];
    char value_str[MAX_ROWS][10];
    float value[MAX_ROWS];

    // Read the header line and ignore it
    fgets(line, 1024, fp);

    // Read each line of the file and store values in arrays
    while (fgets(line, 1024, fp) != NULL && row_count < MAX_ROWS) {
        token = strtok(line, ",");
        int col_count = 0;
        while (token != NULL && col_count < MAX_COLS) {
            if (col_count == 0) {
                strcpy(ref_date[row_count], token);
            } else if (col_count == 1) {
                strcpy(geo[row_count], token);
            } else if (col_count == 3) {
                strcpy(age_group[row_count], token);
            } else if (col_count == 4) {
                strcpy(sex[row_count], token);
            } else if (col_count == 13) {
                strcpy(value_str[row_count], token);
                
            }
            token = strtok(NULL, ",");
            col_count++;
        }
        row_count++;
    }

  double ontario_total = 0, quebec_total = 0, bc_total = 0, alberta_total = 0;
    int ontario_count = 0, quebec_count = 0, bc_count = 0, alberta_count = 0;



//for (int i=0;i<=row_count;i++)
 // {
//if(geo[i]=="Quebec")
//{
//  quebec_total=quebec_total+
//}
    
//  }

 //  for (int i = 0; i < row_count; i++) {
//    printf("%s\n", value_str[i]);
 // }
  //Print the values stored in the arrays
   for (int i = 0; i < row_count; i++) {
      printf("%s %s %s %s %s\n", ref_date[i], geo[i], age_group[i], sex[i], value_str[i]);
   }

    fclose(fp);
    return 0;
}

At first i tried using float arrays to store the numbers in Column 1 and 14 and tried printing them but the output gave me 0.0 as all values.

I realized since the numbers are surrounded by quotes its reading the quotes as well.

How do I remove the quotes from all arrays?

2

There are 2 best solutions below

0
chux - Reinstate Monica On

How do i remove the quotations from all column arrays??

Rather than strcpy(ref_date[row_count], token);, which risks buffer overflow and copies the undesired ", call a helper function and have it skip " and avoid copying too many characters.

void copy_token(size_t size, char *dest, const char *source);

And call it and like-wise for the other tokens

copy_token(sizeof ref_date[row_count], ref_date[row_count], token);

A possible implementation below is simply something thrown together. OP is unclear on what should happen if " lacks a match ", input is too long, etc.

void copy_token(size_t size, char *dest, const char *source) {
  if (size == 0) return;
  bool inside_quote = false;
  while (size > 1 && *source) {
    if (dest == '"') {
      inside_quote = !inside_quote;
    } else if (inside_quote) {
      *dest++ = *source;
      size--; 
    }
    source++;
  }
  *dest = '\0';
} 

For conversation to float, make a different helper function that skips over a leading " (if there), calls strotd(), checks for conversion success, looks to trailing non-numeric text, etc.

float parse_token(const char *source);

A weakness to OP's code is lack of error detection: overlong line, too long a token, missing ", too many ", non-numeric input for float, etc.

This is tolerable for a learner exercise, yet robust code would detect and handle bad input. Helper functions are a first step to divide & conquer to help achieve these goals.

0
Fe2O3 On

Be aware that CSV format uses double quote to 'encapsulate' (hide) a comma inside any data field. The format also "doubles up" double quotes inside fields.
Eg: "left,right","up","down" has 3 fields, not 4.
Eg: "She is 5'9"" tall",42 has only two fields.
If the CSV data contains fields that may contain one or more commas, you will need to use more advanced parsing to extract the information you want.

strtok() is not suitable for chopping up CSV strings as it considers ",,," to be a single separator, not 3 individual separators. You have to get your hands dirty dealing with individual characters.

Since the assignment is to write arcane code that is highly specific to a particular CSV layout, perhaps the following annotated code may be adapted to serve:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct { // collect the data fields in an instance of a struct
    char ref_date[11];
    char geo[100];
    char age_group[20];
    char sex[10];
    double value;
} rec_t;

int main( void ) {
    // I used my own test data file...
    char filename[] = "test.csv"; // "statscan_diabetes.csv";

    FILE *fp = fopen( filename, "r" );
    if( fp == NULL) {
        printf( "Error opening file %s\n", filename );
        return 1;
    }

    const int max = 150; // A fixed size. Should be dynamic, but that's for later.
    rec_t arr[ max ];

    char line[1024];
    fgets( line, sizeof line, fp ); // skip past the header line

    int row = 0;
    while( row < max && fgets( line, sizeof line, fp ) ) {
        rec_t rec = { 0 }; // a blank record to populate

        line[strcspn(line,"\n")] = '\0'; // trim LF from end of string

        int col = 0;
        for( char *cp = line; *cp; ) { // chew through characters.
            int qCnt = 0; // count quotes encountered
            char *sp = cp; // "start" of this column
            while( *cp ) { // search for its end
                if( *cp == '"' )
                    qCnt++, cp++; // count and advance for quotes
                else if( *cp == ',' && qCnt%2 == 0 ) {
                    // found end of this column (PAIRED quotes and comma)
                    // terminate and process further
                    *cp++ = '\0';
                    break;
                } else
                    cp++; // simply advance
            }

            // if column begins with quote, it must end with quote
            // dispose with both quotes
            if( sp[0] == '"' )
                sp++, sp[strlen(sp)-1] = '\0';

            // Another pass to collapse "quoted quote" down to a single instance
            for( int d = 0, ss = 0; sp[ss]; )
                sp[d++] = sp[ss++], ss += sp[ss] == '"';
            sp[d] = '\0';

            // Now, file away this column as appropriate
            // OP responsible for ensuring fields sizes are sufficient
            switch( col++ ) {
                case  0: strcpy( rec.ref_date, sp ); break;
                case  1: strcpy( rec.geo, sp ); break;
                case  3: strcpy( rec.age_group, sp); break;
                case  4: strcpy( rec.sex, sp ); break;
                case 13: rec.value = strtod( sp, NULL ); break;
            }
        }
        // preserve this single record into array of records
        memcpy( &arr[ row++ ], &rec, sizeof arr[0] );
    }
    fclose(fp);

    printf( "%d rows found\n", row );

    //Print the values stored in the arrays
    for( rec_t *p = arr; p < arr + row; p++ )
        printf( "%s - %s - %s - %s - %.0lf\n", p->ref_date, p->geo, p->age_group, p->sex, p->value );

    // sample "tabulation" of some characteristic of the records
    // Note the use of 'strcmp()' to find equal strings
    int ON_cnt = 0, QU_cnt = 0, BC_cnt = 0, AB_cnt = 0;
    for( int i = 0; i < row; i++ ) {
        QU_cnt += strcmp( arr[i].geo, "Quebec" ) == 0;
        ON_cnt += strcmp( arr[i].geo, "Ontario" ) == 0;
        AB_cnt += strcmp( arr[i].geo, "Alberta" ) == 0;
        BC_cnt += strcmp( arr[i].geo, "BC" ) == 0;
    }

    return 0;
}