How can I use regex to parse key:value pairs in a string?

93 Views Asked by At
void parseBuffer(char buffer[]) {
    regex_t regex;
    regmatch_t matches[3];

    // Define the regular expression pattern
    char pattern[] = "([^:]+):[[:space:]]*([^\\r\\n]+)[\\r\\n]*";

    // Compile the regular expression
    if (regcomp(&regex, pattern, REG_EXTENDED) != 0) {
        fprintf(stderr, "Failed to compile regex\n");
        exit(EXIT_FAILURE);
    }

    // Loop through the buffer to find matches
    char *ptr = buffer;
    while (regexec(&regex, ptr, 3, matches, 0) == 0) {
        // Extract key and value using the matched positions
        char key[BUFF_SIZE], value[BUFF_SIZE];
        strncpy(key, ptr + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
        strncpy(value, ptr + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
        key[matches[1].rm_eo - matches[1].rm_so] = '\0';
        value[matches[2].rm_eo - matches[2].rm_so] = '\0';

        // Print or process the key-value pair
        printf("Key: %s, Value: %s\n", key, value);

        // Move the pointer to the next position after the match
        ptr += matches[0].rm_eo;
    }

    // Free the compiled regex
    regfree(&regex);
}

For this input: Length: 10\r\nHello: hi\r\n\r\n", why does parsebuffer print the key as Length and the value as the rest of the string? I want to print 2 different keys and 2 different values (Length: 10 and Hello: hi).

1

There are 1 best solutions below

0
Oka On

In

char pattern[] = "([^:]+):[[:space:]]*([^\\r\\n]+)[\\r\\n]*";

\\r and \\n are not CR and LF, but two escaped \ characters, and the literal r and n characters.

In other words, "\\r\\n" is a string containing the ASCII bytes 5C 72 5C 6E.

You want to use the actual '\r' and '\n' characters (ASCII 0D and 0A).

char pattern[] = "([^:]+):[[:space:]]*([^\r\n]+)[\r\n]*";

A slightly refactored example:

#include <regex.h>
#include <stdio.h>
#include <stdlib.h>

static void parseBuffer(const char *buffer)
{
    regex_t regex;
    regmatch_t matches[3];

    const char *pattern = "([^:]+):[[:space:]]*([^\r\n]+)[\r\n]*";

    int ec = regcomp(&regex, pattern, REG_EXTENDED);

    if (ec) {
        char error[512];
        regerror(ec, &regex, error, sizeof error);
        fprintf(stderr, "%s\n", error);
        exit(EXIT_FAILURE);
    }

    size_t offset = 0;

    while (0 == regexec(&regex, buffer + offset, 3, matches, 0)) {
        size_t key_length = matches[1].rm_eo - matches[1].rm_so;
        size_t field_length = matches[2].rm_eo - matches[2].rm_so;

        printf("key[%zu]<%.*s> field[%zu]<%.*s>\n",
                key_length, (int) key_length, buffer + offset + matches[1].rm_so,
                field_length, (int) field_length, buffer + offset + matches[2].rm_so);

        offset += matches[0].rm_eo;
    }

    regfree(&regex);
}

int main(void)
{
    parseBuffer("Length: 10\r\nHello: hi\r\n\r\n");
}
key[6]<Length> field[2]<10>
key[5]<Hello> field[2]<hi>