Combined usage of getline, strcat and realloc functions

52 Views Asked by At

Good afternoon everyone! I wrote my implementation of the cat utility in C and ran into some problems. Here's my code:

#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void parse_args(int argc, char *argv[], int *b_flag, int *e_flag, int *n_flag,
                int *s_flag, int *t_flag, int *a_flag);
FILE *open_file(const char *filename, char *argv[]);
void process_file(FILE *file, int b_flag, int e_flag, int n_flag, int s_flag,
                  int t_flag, int a_flag);
int process_s_flag(char *buffer, int *last_empty_line, int s_flag);
void process_b_flag(char *buffer, int b_flag, int *line_number);
void process_n_flag(char *buffer, int n_flag, int s_flag, int b_flag,
                    int *line_number, int is_squeezable);
void process_e_flag(char *buffer, int e_flag, int a_flag);
void process_t_flag(char *buffer, int t_flag, int a_flag); 

int main(int argc, char *argv[]) {
    int b_flag = 0, e_flag = 0, n_flag = 0, s_flag = 0, t_flag = 0, a_flag = 0;
    parse_args(argc, argv, &b_flag, &e_flag, &n_flag, &s_flag, &t_flag, &a_flag);
    for (int i = optind; i < argc; i++) {
    FILE *file = open_file(argv[i], argv);
    process_file(file, b_flag, e_flag, n_flag, s_flag, t_flag, a_flag);
    fclose(file);
    }
return 0;
}

void parse_args(int argc, char *argv[], int *b_flag, int *e_flag, int *n_flag,
                int *s_flag, int *t_flag, int *a_flag) {
  int flags;
  const char *short_options = "beEnstTA";
  const struct option long_options[] = {
      {"number-nonblank", no_argument, b_flag, 1},
      {"show-all", no_argument, a_flag, 1},
      {"show-ends", no_argument, e_flag, 1},
      {"number", no_argument, n_flag, 1},
      {"squeeze-blank", no_argument, s_flag, 1},
      {"show-tabs", no_argument, t_flag, 1},
      {0, 0, 0, 0}};
  while ((flags = getopt_long(argc, argv, short_options, long_options, NULL)) !=
         -1) {
    switch (flags) {
      case 0:
        break;
      case 'b':
        (*b_flag)++;
        break;
      case 'e':
      case 'E':
        (*e_flag)++;
        break;
      case 'n':
        (*n_flag)++;
        break;
      case 't':
      case 'T':
        (*t_flag)++;
        break;
      case 's':
        (*s_flag)++;
        break;
      case 'A':
        (*a_flag)++;
        break;
      default:
        fprintf(stderr, "Usage: %s [OPTIONS] [FILE]...\n", argv[0]);
        exit(EXIT_FAILURE);
    }
  }
  if (argc == optind) {
    fprintf(stderr, "Usage: %s [OPTIONS] [FILE]...\n", argv[0]);
    exit(EXIT_FAILURE);
  }
}


FILE *open_file(const char *filename, char *argv[]) {
  FILE *file = fopen(filename, "r");
  if (file == NULL) {
    fprintf(stderr, "%s: %s: No such file or directory\n", argv[0], filename);
    exit(EXIT_FAILURE);
  }
  return file;
}

void process_file(FILE *file, int b_flag, int e_flag, int n_flag, int s_flag,
                  int t_flag, int a_flag) {
  char *buffer = NULL;
  size_t buffer_size = 0;
  int last_empty_line = 0;
  int line_number = 1;
  while ((getline(&buffer, &buffer_size, file)) != -1) {
    int is_squeezable = process_s_flag(buffer, &last_empty_line, s_flag);
    if (!is_squeezable || !s_flag) {
      process_t_flag(buffer, t_flag, a_flag);
      process_b_flag(buffer, b_flag, &line_number);
      process_n_flag(buffer, n_flag, s_flag, b_flag, &line_number,
                     is_squeezable);
      process_e_flag(buffer, e_flag, a_flag);
      fprintf(stdout, "%s", buffer);
    }
  }
  free(buffer);
}

int process_s_flag(char *buffer, int *last_empty_line, int s_flag) {
  int length = strlen(buffer);
  int is_empty_line = (length <= 1);
  int marker = 0;
  if (s_flag && is_empty_line) {
    if (*last_empty_line) {
      marker = 1;
    }
    *last_empty_line = 1;
  } else {
    *last_empty_line = 0;
  }
  return marker;
}

void process_b_flag(char *buffer, int b_flag, int *line_number) {
  int length = strlen(buffer);
  if (b_flag && length > 1) {
    char *tmp = strdup(buffer);
    buffer[0] = '\0';
    sprintf(buffer, "%6d\t", (*line_number)++);
    strcat(buffer, tmp);
    free(tmp);
  }
}

void process_n_flag(char *buffer, int n_flag, int s_flag, int b_flag,
                    int *line_number, int is_squeezable) {
  if (n_flag) {
    if (!s_flag || !is_squeezable) {
      if (!b_flag) {
        char *tmp = strdup(buffer);
        buffer[0] = '\0';
        sprintf(buffer, "%6d\t", (*line_number)++);
        strcat(buffer, tmp);
        free(tmp);
      }
    }
  }
}

void process_e_flag(char *buffer, int e_flag, int a_flag) {
  int length = strlen(buffer);
  if (e_flag || a_flag) {
    if (length > 0 && buffer[length - 1] == '\n') {
      buffer[length - 1] = '\0';
      sprintf(buffer + length - 1, "%s", "$\n");
    } else {
      sprintf(buffer + length, "%s", "$");
    }
  }
}

void process_t_flag(char *buffer, int t_flag, int a_flag) {
  int length = strlen(buffer);
  if (t_flag || a_flag) {
    for (int i = 0; i < length; i++) {
      if (buffer[i] == '\t') {
        memmove(buffer + i + 2, buffer + i + 1, length - i);
buffer[i] = '^';
    buffer[i + 1] = 'I';
    length++;
    i++;
      }
    }
  }
}

As you can see, I'm using getline to read lines from the strcat file to change them depending on the flag. While passing the tests, errors were displayed:

=119== Memcheck, a memory error detector
==119== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==119== Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
==119== 
==119== Invalid write of size 1
==119==    at 0x48A790D: strcat (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x109998: process_b_flag (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==    by 0x109815: process_file (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==  Address 0x48b15ec is 0 bytes after a block of size 44 alloc'd
==119==    at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x65F79F75BB8B0087: ???
==119==    by 0x1FFEFFD4FF: ???
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== 
==119== Invalid write of size 1
==119==    at 0x48A7917: strcat (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x109998: process_b_flag (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==    by 0x109815: process_file (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==  Address 0x48b15f1 is 5 bytes after a block of size 44 alloc'd
==119==    at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x65F79F75BB8B0087: ???
==119==    by 0x1FFEFFD4FF: ???
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== 
==119== Invalid read of size 1
==119==    at 0x48A7B1B: strlen (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x109A5F: process_e_flag (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==    by 0x10984C: process_file (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==  Address 0x48b15ec is 0 bytes after a block of size 44 alloc'd
==119==    at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x65F79F75BB8B0087: ???
==119==    by 0x1FFEFFD4FF: ???
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== 
==119== Invalid read of size 1
==119==    at 0x48A7B1B: strlen (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4048D72: fputs (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==  Address 0x48b15ec is 0 bytes after a block of size 44 alloc'd
==119==    at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x65F79F75BB8B0087: ???
==119==    by 0x1FFEFFD4FF: ???
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== 
==119== Invalid read of size 1
==119==    at 0x40496E4: ??? (in /lib/ld-musl-x86_64.so.1)
==119==  Address 0x48b15f0 is 4 bytes after a block of size 44 alloc'd
==119==    at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x65F79F75BB8B0087: ???
==119==    by 0x1FFEFFD4FF: ???
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== 
==119== Syscall param writev(vector[...]) points to unaddressable byte(s)
==119==    at 0x4047995: ??? (in /lib/ld-musl-x86_64.so.1)
==119==  Address 0x48b15ec is 0 bytes after a block of size 44 alloc'd
==119==    at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x65F79F75BB8B0087: ???
==119==    by 0x1FFEFFD4FF: ???
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== 
==119== Invalid read of size 1
==119==    at 0x40516A8: ??? (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x4049706: ??? (in /lib/ld-musl-x86_64.so.1)
==119==  Address 0x48b1809 is 0 bytes after a block of size 73 alloc'd
==119==    at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x65F79F75BB8B0087: ???
==119==    by 0x1FFEFFD4FF: ???
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== 
==119== Invalid read of size 1
==119==    at 0x40516A5: ??? (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x4049706: ??? (in /lib/ld-musl-x86_64.so.1)
==119==  Address 0x48b180a is 1 bytes after a block of size 73 alloc'd
==119==    at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119==    by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x65F79F75BB8B0087: ???
==119==    by 0x1FFEFFD4FF: ???
==119==    by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== 
==119== Syscall param writev(vector[...]) points to uninitialised byte(s)
==119==    at 0x4047995: ??? (in /lib/ld-musl-x86_64.so.1)
==119==    by 0x17C: ???
==119==  Address 0x409578b is in the BSS segment of /lib/ld-musl-x86_64.so.1
==119== 
==119== 
==119== HEAP SUMMARY:
==119==     in use at exit: 0 bytes in 0 blocks
==119==   total heap usage: 13 allocs, 13 frees, 2,013 bytes allocated
==119== 
==119== All heap blocks were freed -- no leaks are possible
==119== 
==119== Use --track-origins=yes to see where uninitialised values come from
==119== For lists of detected and suppressed errors, rerun with: -s
==119== ERROR SUMMARY: 103 errors from 9 contexts (suppressed: 0 from 0)

I understand why this is happening, I’m trying to put in buffer, in addition to the text itself, additional characters like line_number. So I decided to use realloc inside my functions:

void process_b_flag(char **buffer, int b_flag, int *line_number) {
  if (b_flag && *buffer != NULL) {
    size_t length = strlen(*buffer);
    if (length > 1) {
      char *tmp = strdup(*buffer);
      (*buffer)[0] = '\0';
      sprintf(*buffer, "%6d\t", (*line_number)++);
      strcat(*buffer, tmp);
      if (strlen(*buffer) + 1 > length) {
        char *new_buffer = realloc(*buffer, length + 100);
        if (new_buffer != NULL) {
          *buffer = new_buffer;
        } else {
          // Обработка ошибки realloc
          free(tmp);
          fprintf(stderr, "Memory reallocation error\n");
          exit(EXIT_FAILURE);
        }
      }
      free(tmp);
    }
  }
}

Unfortunately, this approach doesn't work for some reason. I don't understand why, but for some reason the «length” variable reflects the length of the previous line, not the current one. Accordingly, if there is a large difference between the number of characters in the previous and current lines, the error “invalid next size” is issued. Can someone explain to me why this happens and how to fix it?

1

There are 1 best solutions below

6
John Bollinger On

Some of your functions to manipulate the text passing through the program can or do attempt to increase the number of characters in the lines. When the buffer is already full to capacity, such attempts overrun it, producing undefined behavior. The Valgrind report you presented shows some instances where this is happening.

For example, in process_b_flag(), you have:

    char *tmp = strdup(buffer);
    buffer[0] = '\0';
    sprintf(buffer, "%6d\t", (*line_number)++);
    strcat(buffer, tmp);

That requires buffer to have at least 7 bytes of allocated capacity past the first null terminator within, and it's simply not safe to rely on that without checking.

Similarly, in process_e_flag(), this ...

    if (length > 0 && buffer[length - 1] == '\n') {
      buffer[length - 1] = '\0';
      sprintf(buffer + length - 1, "%s", "$\n");
    } else {
      sprintf(buffer + length, "%s", "$");
    }

... requires there to be at least one byte of unused space in the buffer past the string terminator, and it overruns the buffer if there isn't.

Those and any similar overruns directly explain several of Valgrind's "invalid write" reports, and they indirectly explain at least some of the "invalid read" reports.

You could consider outputting whatever line decoration you want without modifying the buffer contents read by getline(), or at least without ever incurring a need to lengthen it. For example, when you want line numbers, just output them without inserting them into the buffer.

But if you want or need to enlarge the buffer during your line processing, then it would fit cleanly into your program to follow getline()'s model: receive a pointer to the buffer pointer and a pointer to its current size. If you find that you need more space then reallocate, updating the buffer pointer and size.