Converting Greek words to uppercase

511 Views Asked by At

I have to create a function that reads a file called grwords.txt containing around 540000 words which are written in Greek letters.

I have to convert these words to uppercase and fill an array called char **words.

This is what I have so far.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <windows.h>
#include <ctype.h>


void fp();

int main(int argc, char *argv[]) {

    SetConsoleOutputCP(1253);

    fp();
    return 0;
}

void fp(){
    char **words;
    words = malloc(546490 * sizeof(int *));
    for (i = 0; i < 546490; i++)
             words[i] = malloc(24 * sizeof(int));
    FILE *file;
    char *word;
    size_t cnt;

    file = fopen("grwords.txt", "rt");
    if (file == NULL){
        printf("File cannot be opened.\n");
        exit(1);
    }
    cnt = 0;
    while (1==fscanf(file, "%24s",word)){
        if (cnt == 546490)
            break;
        strcpy(words[cnt++], word);
    }
    fclose(file);
}

I'm still trying to figure out pointers. I know that & makes a pointer from a value and * a value from a pointer. Updated the program and it successfully fills the array with the words from the file! I still have no idea how to convert Greek lowercase to uppercase.

1

There are 1 best solutions below

0
On BEST ANSWER

Handling Greek words can be dependent on your platform.

First of all, you need to understand how file handling works. Here is what I wrote:

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define bufSize 1024 // max lenght of word
// we are going to receive the .txt from cmd line
int main(int argc, char *argv[])
{
  FILE *fp;

  // Assume file has max 10 words
  const size_t N = 10;

  // Allocate a 2D array of N rows
  // and bufSize columns.
  // You can think of it like an array
  // of N strings, where every string
  // has, at most, bufSize length.
  char buf[N][bufSize];

  // make sure we got the .txt
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }

  // open the file
  if ((fp = fopen(argv[1], "r")) == NULL)
  { /* Open source file. */
    perror("fopen source-file");
    return 1;
  }

  // we will use that for toupper()
  char c;

  // counters
  int i = 0, j;


  while (fscanf(fp, "%1024s", buf[i]) == 1)
  { /* While we don't reach the end of source. */
    /* Read characters from source file to fill buffer. */

    // print what we read
    printf("%s\n", buf[i]);

    j = 0;
    // while we are on a letter of word placed
    // in buf[i]
    while (buf[i][j])
    {
      // make the letter capital and print it
      c = buf[i][j];
      putchar (toupper(c));
      j++;
    }
    i++;
    printf("\ndone with this word\n");
  }
  // close the file
  fclose(fp);

  return 0;
}

For this test.txt file:

Georgios
Samaras
Γιώργος
Σαμαράς

the code would run as:

./exe test.txt
Georgios
GEORGIOS
done with this word
Samaras
SAMARAS
done with this word
Γιώργος
Γιώργος
done with this word
Σαμαράς
Σαμαράς
done with this word

As you can see, I could read the Greek words, but failed to convert them in upper case ones.

Once you got how file handling goes, you need to use wide characters to read a file with Greek words.

So, by just modifying the above code, we get:

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>

#define bufSize 1024

int main(int argc, char *argv[])
{
  setlocale(LC_CTYPE, "en_GB.UTF-8");
  FILE *fp;
  const size_t N = 15;
  wchar_t buf[N][bufSize];
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }
  if ((fp = fopen(argv[1], "r")) == NULL)
  {
    perror("fopen source-file");
    return 1;
  }
  wchar_t c;
  int i = 0, j;
  while (fwscanf(fp, L"%ls", buf[i]) == 1)
  {
    wprintf( L"%ls\n\n", buf[i]);
    j = 0;
    while (buf[i][j])
    {
      c = buf[i][j];
      putwchar (towupper(c));
      j++;
    }
    i++;
    wprintf(L"\ndone with this word\n");
  }
  fclose(fp);
  return 0;
}

And now the output is this:

Georgios

GEORGIOS
done with this word
Samaras

SAMARAS
done with this word
Γιώργος

ΓΙΏΡΓΟΣ
done with this word
Σαμαράς

ΣΑΜΑΡΆΣ
done with this word

I see that you may want to create a function which reads the words. If you need a simple example of functions in C, you can visit my pseudo-site here.

As for the 2D array I mentioned above, this picture might help:

enter image description here

where N is the number of rows (equal to 4) and M is the number of columns (equal to 5). In the code above, N is N and M is bufSize. I explain more here, were you can also found code for dynamic allocation of a 2D array.

I know see that you are on Windows. I tested the code in Ubuntu.

For Windows you might want to take a good look at this question.

So, after you read all the above and understand them, you can see what you asked for with dynamic memory management.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>

#define bufSize 1024

wchar_t **get(int N, int M);
void free2Darray(wchar_t** p, int N);

int main(int argc, char *argv[])
{
  setlocale(LC_CTYPE, "en_GB.UTF-8");
  FILE *fp;
  const size_t N = 15;
  wchar_t** buf = get(N, bufSize);
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }
  if ((fp = fopen(argv[1], "r")) == NULL)
  {
    perror("fopen source-file");
    return 1;
  }
  wchar_t c;
  int i = 0, j;
  while (fwscanf(fp, L"%ls", buf[i]) == 1)
  {
    wprintf( L"%ls\n", buf[i]);
    j = 0;
    while (buf[i][j])
    {
      c = buf[i][j];
      putwchar (towupper(c));
      j++;
    }
    i++;
    wprintf(L"\ndone with this word\n");
  }
  fclose(fp);
  // NEVER FORGET, FREE THE DYNAMIC MEMORY
  free2Darray(buf, N);
  return 0;
}

// We return the pointer
wchar_t **get(int N, int M) /* Allocate the array */
{
    /* Check if allocation succeeded. (check for NULL pointer) */
    int i;
    wchar_t **table;
    table = malloc(N*sizeof(wchar_t *));
    for(i = 0 ; i < N ; i++)
        table[i] = malloc( M*sizeof(wchar_t) );
    return table;
}

void free2Darray(wchar_t** p, int N)
{
    int i;
    for(i = 0 ; i < N ; i++)
        free(p[i]);
    free(p);
}

Note that this code is expected to work on Linux (tested on Ubuntu 12.04), not on Windows (tested on Win 7).