python struct.pack equivalent in c++

10.3k Views Asked by At

I want a fixed length string from a number just like struct.pack present in python but in c++. I thought of itoa (i,buffer,2) but problem can be that its length will depend on platform. Is there any way to make it independent of platform ?

5

There are 5 best solutions below

0
On BEST ANSWER

If you're looking for a complete solution similar to Python's struct package, you might check out Google's Protocol Buffers Library. Using that will take care of a lot of issues (e.g. endian-ness, language-portability, cross-version compatibility) for you.

0
On

I made this implementation in c/c++ to compare the execution time of the pack function between python/php/dart/c++ https://github.com/dart-lang/sdk/issues/50708

#include <span>
#include <vector>
#include <cstdio>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include "time.h"
#include <map>

#define STRUCT_ENDIAN_NOT_SET 0
#define STRUCT_ENDIAN_BIG 1
#define STRUCT_ENDIAN_LITTLE 2

static int myendian = STRUCT_ENDIAN_NOT_SET;

void debug_print2(const char *str, std::vector<unsigned char> vec)
{
    std::cout << str;
    for (auto i : vec)
        std::cout << i;

    std::cout << "\r\n";
}

int struct_get_endian(void)
{
    int i = 0x00000001;
    if (((char *)&i)[0])
    {
        return STRUCT_ENDIAN_LITTLE;
    }
    else
    {
        return STRUCT_ENDIAN_BIG;
    }
}

static void struct_init(void)
{
    myendian = struct_get_endian();
}

static void pack_int16_t(unsigned char **bp, uint16_t val, int endian)
{
    if (endian == myendian)
    {
        *((*bp)++) = val;
        *((*bp)++) = val >> 8;
    }
    else
    {
        *((*bp)++) = val >> 8;
        *((*bp)++) = val;
    }
}

static void pack_int32_t(unsigned char **bp, uint32_t val, int endian)
{
    if (endian == myendian)
    {
        *((*bp)++) = val;
        *((*bp)++) = val >> 8;
        *((*bp)++) = val >> 16;
        *((*bp)++) = val >> 24;
    }
    else
    {
        *((*bp)++) = val >> 24;
        *((*bp)++) = val >> 16;
        *((*bp)++) = val >> 8;
        *((*bp)++) = val;
    }
}

static void pack_int64_t(unsigned char **bp, uint64_t val, int endian)
{
    if (endian == myendian)
    {
        *((*bp)++) = val;
        *((*bp)++) = val >> 8;
        *((*bp)++) = val >> 16;
        *((*bp)++) = val >> 24;
        *((*bp)++) = val >> 32;
        *((*bp)++) = val >> 40;
        *((*bp)++) = val >> 48;
        *((*bp)++) = val >> 56;
    }
    else
    {
        *((*bp)++) = val >> 56;
        *((*bp)++) = val >> 48;
        *((*bp)++) = val >> 40;
        *((*bp)++) = val >> 32;
        *((*bp)++) = val >> 24;
        *((*bp)++) = val >> 16;
        *((*bp)++) = val >> 8;
        *((*bp)++) = val;
    }
}

static int pack(void *b, const char *fmt, long long *values, int offset = 0)
{
    unsigned char *buf = (unsigned char *)b;

    int idx = 0;

    const char *p;
    unsigned char *bp;
    int ep = myendian;
    int endian;

    bp = buf + offset;
    auto bpp = &bp;

    if (STRUCT_ENDIAN_NOT_SET == myendian)
    {
        struct_init();
    }

    for (p = fmt; *p != '\0'; p++)
    {       
        auto value = values[idx];
        switch (*p)
        {
        case '=': // native 
            ep = myendian;
            break;
        case '<': // little-endian 
            endian = STRUCT_ENDIAN_LITTLE;
            ep = endian;
            break;
        case '>': // big-endian 
            endian = STRUCT_ENDIAN_BIG;
            ep = endian;
            break;
        case '!': // network (= big-endian) 
            endian = STRUCT_ENDIAN_BIG;
            ep = endian;
            break;
        case 'b':
            *bp++ = value;
            break;
        case 'c':
            *bp++ = value;
            break;
        case 'i':
            if (ep == STRUCT_ENDIAN_LITTLE)
            {
                *bp++ = value;
                *bp++ = value >> 8;
                *bp++ = value >> 16;
                *bp++ = value >> 24;
            }
            else
            {
                *bp++ = value >> 24;
                *bp++ = value >> 16;
                *bp++ = value >> 8;
                *bp++ = value;
            }
            break;
        case 'h':
            if (ep == STRUCT_ENDIAN_LITTLE)
            {
                *bp++ = value;
                *bp++ = value >> 8;
            }
            else
            {
                *bp++ = value >> 8;
                *bp++ = value;
            }
            break;
        case 'q':
            if (ep == STRUCT_ENDIAN_LITTLE)
            {
                *bp++ = value;
                *bp++ = value >> 8;
                *bp++ = value >> 16;
                *bp++ = value >> 24;
                *bp++ = value >> 32;
                *bp++ = value >> 40;
                *bp++ = value >> 48;
                *bp++ = value >> 56;
            }
            else
            {
                *bp++ = value >> 56;
                *bp++ = value >> 48;
                *bp++ = value >> 40;
                *bp++ = value >> 32;
                *bp++ = value >> 24;
                *bp++ = value >> 16;
                *bp++ = value >> 8;
                *bp++ = value;
            }
            break;
        }
        idx++;
    }
    return (bp - buf);
}

int main()
{

    time_t start, end;
    time(&start);
    // std::ios_base::sync_with_stdio(false);

    std::vector<unsigned char> myVector{};
    myVector.reserve(100000000 * 16);

    for (int i = 0; i < 100000000; i++) // 100000000
    {
        char bytes[BUFSIZ] = {'\0'};
        long long values[4] = {64, 65, 66, 67};
        pack(bytes, "iiii", values);

        for (int j = 0; j < 16; j++)
        {
            myVector.push_back(bytes[j]);
        }       
    }

    time(&end);
    auto v2 = std::vector<unsigned char>(myVector.begin(), myVector.begin() + 16);
    debug_print2("result: ", v2);

    double time_taken = double(end - start);
    std::cout << "pack time: " << std::fixed
              << time_taken << std::setprecision(5);
    std::cout << " sec " << std::endl;
    return 0;
}
0
On

Here's a start:

typedef std::vector<uint8_t> byte_buffer;

template <std::size_t N>
void append_fixed_width(byte_buffer& buf, uintmax_t val) {
    int shift = ((N - 1) * 8);
    while (shift >= 0) {
        uintmax_t mask = (0xff << shift);
        buf.push_back(uint8_t((val & mask) >> shift));
        shift -= 8;
    }
}

template <typename IntType>
void append_bytes(byte_buffer& buf, IntType val) {
    append_fixed_width<sizeof(IntType)>(buf, uintmax_t(val));
}

int main() { // usage example
     byte_buffer bytes;
     append_bytes(bytes, 1);   // appends sizeof(int) bytes
     append_bytes(bytes, 1ul); // appends sizeof(unsigned long) bytes
     append_bytes(bytes, 'a'); // appends sizeof(int) bytes :p
     append_bytes(bytes, char('a')); // appends 1 byte
     return 0;
}

Append_bytes will append any integer type into a byte buffer represented using a std::vector<uint8_t>. Values are packed in big endian byte order. If you need to change this, then tweak append_fixed_width to traverse the value in a different order.

These functions build a raw byte buffer so whomever is decoding it is responsible for knowing what is in there. IIRC, this is what struct.pack does as well; in other words, the caller of struct.unpack needs to provide the same format string. You can write a variant of append_fixed_width to pack a TLV instead:

template <typename TagType, typename ValueType>
void append_tlv(byte_buffer& buf, TagType t, ValueType val) {
    append_fixed_width<sizeof(TagType)>(buf, uintmax_t(t));
    append_fixed_width<sizeof(std::size_t)>(buf, uintmax_t(sizeof(ValueType)));
    append_fixed_width<sizeof(ValueType)>(buf, uintmax_t(val));
}

I would take a serious look at Jeremy's suggestion though. I wish that it had existed when I wrote all of the binary packing code that I have now.

0
On

You need to define an exact-width integer type through a typedef; you do that in a platform-specific manner. If you use C99, int16_t is predefined in <stdint.h>. You can then cast to that type, and type the memory representation of a variable:

int16_t val = (int16_t) orig_val;
void *buf = &val;

Notice that you still need to deal with endianness.

If you don't have C99, you can either use compile-time or run-time size tests. For compile-time tests, consider using autoconf, which already computes the sizes of the various primitive types, so that you can select a good type at compile time. At run-time, just have a series of sizeof tests. Notice that this is somewhat inappropriate for run-time, as the test will always come out with the same result. As an alternative to autoconf, you can also use compiler/system identification macros for a compile-time test.

0
On

The C++ way would be to use stringstream:

stringstream ss;
int number=/*your number here*/;
ss<<number;

and to get the buffer you'd use ss.str().c_str().