I want a fixed length string from a number just like struct.pack
present in python but in c++. I thought of itoa (i,buffer,2)
but problem can be that its length will depend on platform. Is there any way to make it independent of platform ?
python struct.pack equivalent in c++
10.3k Views Asked by Xinus AtThere are 5 best solutions below
I made this implementation in c/c++ to compare the execution time of the pack function between python/php/dart/c++ https://github.com/dart-lang/sdk/issues/50708
#include <span>
#include <vector>
#include <cstdio>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include "time.h"
#include <map>
#define STRUCT_ENDIAN_NOT_SET 0
#define STRUCT_ENDIAN_BIG 1
#define STRUCT_ENDIAN_LITTLE 2
static int myendian = STRUCT_ENDIAN_NOT_SET;
void debug_print2(const char *str, std::vector<unsigned char> vec)
{
std::cout << str;
for (auto i : vec)
std::cout << i;
std::cout << "\r\n";
}
int struct_get_endian(void)
{
int i = 0x00000001;
if (((char *)&i)[0])
{
return STRUCT_ENDIAN_LITTLE;
}
else
{
return STRUCT_ENDIAN_BIG;
}
}
static void struct_init(void)
{
myendian = struct_get_endian();
}
static void pack_int16_t(unsigned char **bp, uint16_t val, int endian)
{
if (endian == myendian)
{
*((*bp)++) = val;
*((*bp)++) = val >> 8;
}
else
{
*((*bp)++) = val >> 8;
*((*bp)++) = val;
}
}
static void pack_int32_t(unsigned char **bp, uint32_t val, int endian)
{
if (endian == myendian)
{
*((*bp)++) = val;
*((*bp)++) = val >> 8;
*((*bp)++) = val >> 16;
*((*bp)++) = val >> 24;
}
else
{
*((*bp)++) = val >> 24;
*((*bp)++) = val >> 16;
*((*bp)++) = val >> 8;
*((*bp)++) = val;
}
}
static void pack_int64_t(unsigned char **bp, uint64_t val, int endian)
{
if (endian == myendian)
{
*((*bp)++) = val;
*((*bp)++) = val >> 8;
*((*bp)++) = val >> 16;
*((*bp)++) = val >> 24;
*((*bp)++) = val >> 32;
*((*bp)++) = val >> 40;
*((*bp)++) = val >> 48;
*((*bp)++) = val >> 56;
}
else
{
*((*bp)++) = val >> 56;
*((*bp)++) = val >> 48;
*((*bp)++) = val >> 40;
*((*bp)++) = val >> 32;
*((*bp)++) = val >> 24;
*((*bp)++) = val >> 16;
*((*bp)++) = val >> 8;
*((*bp)++) = val;
}
}
static int pack(void *b, const char *fmt, long long *values, int offset = 0)
{
unsigned char *buf = (unsigned char *)b;
int idx = 0;
const char *p;
unsigned char *bp;
int ep = myendian;
int endian;
bp = buf + offset;
auto bpp = &bp;
if (STRUCT_ENDIAN_NOT_SET == myendian)
{
struct_init();
}
for (p = fmt; *p != '\0'; p++)
{
auto value = values[idx];
switch (*p)
{
case '=': // native
ep = myendian;
break;
case '<': // little-endian
endian = STRUCT_ENDIAN_LITTLE;
ep = endian;
break;
case '>': // big-endian
endian = STRUCT_ENDIAN_BIG;
ep = endian;
break;
case '!': // network (= big-endian)
endian = STRUCT_ENDIAN_BIG;
ep = endian;
break;
case 'b':
*bp++ = value;
break;
case 'c':
*bp++ = value;
break;
case 'i':
if (ep == STRUCT_ENDIAN_LITTLE)
{
*bp++ = value;
*bp++ = value >> 8;
*bp++ = value >> 16;
*bp++ = value >> 24;
}
else
{
*bp++ = value >> 24;
*bp++ = value >> 16;
*bp++ = value >> 8;
*bp++ = value;
}
break;
case 'h':
if (ep == STRUCT_ENDIAN_LITTLE)
{
*bp++ = value;
*bp++ = value >> 8;
}
else
{
*bp++ = value >> 8;
*bp++ = value;
}
break;
case 'q':
if (ep == STRUCT_ENDIAN_LITTLE)
{
*bp++ = value;
*bp++ = value >> 8;
*bp++ = value >> 16;
*bp++ = value >> 24;
*bp++ = value >> 32;
*bp++ = value >> 40;
*bp++ = value >> 48;
*bp++ = value >> 56;
}
else
{
*bp++ = value >> 56;
*bp++ = value >> 48;
*bp++ = value >> 40;
*bp++ = value >> 32;
*bp++ = value >> 24;
*bp++ = value >> 16;
*bp++ = value >> 8;
*bp++ = value;
}
break;
}
idx++;
}
return (bp - buf);
}
int main()
{
time_t start, end;
time(&start);
// std::ios_base::sync_with_stdio(false);
std::vector<unsigned char> myVector{};
myVector.reserve(100000000 * 16);
for (int i = 0; i < 100000000; i++) // 100000000
{
char bytes[BUFSIZ] = {'\0'};
long long values[4] = {64, 65, 66, 67};
pack(bytes, "iiii", values);
for (int j = 0; j < 16; j++)
{
myVector.push_back(bytes[j]);
}
}
time(&end);
auto v2 = std::vector<unsigned char>(myVector.begin(), myVector.begin() + 16);
debug_print2("result: ", v2);
double time_taken = double(end - start);
std::cout << "pack time: " << std::fixed
<< time_taken << std::setprecision(5);
std::cout << " sec " << std::endl;
return 0;
}
Here's a start:
typedef std::vector<uint8_t> byte_buffer;
template <std::size_t N>
void append_fixed_width(byte_buffer& buf, uintmax_t val) {
int shift = ((N - 1) * 8);
while (shift >= 0) {
uintmax_t mask = (0xff << shift);
buf.push_back(uint8_t((val & mask) >> shift));
shift -= 8;
}
}
template <typename IntType>
void append_bytes(byte_buffer& buf, IntType val) {
append_fixed_width<sizeof(IntType)>(buf, uintmax_t(val));
}
int main() { // usage example
byte_buffer bytes;
append_bytes(bytes, 1); // appends sizeof(int) bytes
append_bytes(bytes, 1ul); // appends sizeof(unsigned long) bytes
append_bytes(bytes, 'a'); // appends sizeof(int) bytes :p
append_bytes(bytes, char('a')); // appends 1 byte
return 0;
}
Append_bytes
will append any integer type into a byte buffer represented using a std::vector<uint8_t>
. Values are packed in big endian byte order. If you need to change this, then tweak append_fixed_width
to traverse the value in a different order.
These functions build a raw byte buffer so whomever is decoding it is responsible for knowing what is in there. IIRC, this is what struct.pack
does as well; in other words, the caller of struct.unpack
needs to provide the same format string. You can write a variant of append_fixed_width
to pack a TLV instead:
template <typename TagType, typename ValueType>
void append_tlv(byte_buffer& buf, TagType t, ValueType val) {
append_fixed_width<sizeof(TagType)>(buf, uintmax_t(t));
append_fixed_width<sizeof(std::size_t)>(buf, uintmax_t(sizeof(ValueType)));
append_fixed_width<sizeof(ValueType)>(buf, uintmax_t(val));
}
I would take a serious look at Jeremy's suggestion though. I wish that it had existed when I wrote all of the binary packing code that I have now.
You need to define an exact-width integer type through a typedef; you do that in a platform-specific manner. If you use C99, int16_t
is predefined in <stdint.h>
. You can then cast to that type, and type the memory representation of a variable:
int16_t val = (int16_t) orig_val;
void *buf = &val;
Notice that you still need to deal with endianness.
If you don't have C99, you can either use compile-time or run-time size tests. For compile-time tests, consider using autoconf, which already computes the sizes of the various primitive types, so that you can select a good type at compile time. At run-time, just have a series of sizeof tests. Notice that this is somewhat inappropriate for run-time, as the test will always come out with the same result. As an alternative to autoconf, you can also use compiler/system identification macros for a compile-time test.
If you're looking for a complete solution similar to Python's struct package, you might check out Google's Protocol Buffers Library. Using that will take care of a lot of issues (e.g. endian-ness, language-portability, cross-version compatibility) for you.