I am working on a Linux system, and I think standard Linux std::string
supports both Unicode and ASCII characters. So, I want to use std::string
in my code, but I receive strings from an application in the format of std::basic_string<unsigned short int, TRAIT_CLASS>
(to support both Windows and Linux). The TRAITS_CLASS
is as below:
class TRAITS_CLASS
{
public:
typedef unsigned short char_type;
typedef unsigned short int_type;
typedef size_t pos_type;
typedef size_t off_type;
typedef int state_type;
static inline void assign(unsigned short &dest, const unsigned short &src)
{
dest = src;
}
static inline bool eq(const unsigned short &left, const unsigned short &right)
{
return left == right;
}
static inline bool lt(const unsigned short &left, const unsigned short &right)
{
return left < right;
}
static int compare(const unsigned short *p1, const unsigned short *p2, size_t count)
{
for (; 0 < count; --count, ++p1, ++p2)
{
if (!eq(*p1, *p2))
{
return lt(*p1, *p2) ? -1 : 1;
}
}
return 0;
}
static size_t length(const unsigned short *p)
{
size_t count = 0;
while (*p++)
{
++count;
}
return count;
}
static unsigned short* copy(unsigned short *p1, const unsigned short *p2, size_t count)
{
unsigned short *res = p1;
for (; 0 < count; --count, ++p1, ++p2)
{
assign(*p1, *p2);
}
return res;
}
static const unsigned short* find(const unsigned short *p, size_t count,
const unsigned short &value)
{
for (; 0 < count; --count, ++p)
{
if (eq(*p, value))
{
return p;
}
}
return 0;
}
static unsigned short* move(unsigned short *dest, const unsigned short *src, size_t count)
{
unsigned short *res = dest;
if ((src < dest) && (dest < src + count))
{
for (dest += count, src += count; 0 < count; --count)
{
assign(*--dest, *--src);
}
}
else
{
for (; 0 < count; --count, ++dest, ++src)
{
assign(*dest, *src);
}
}
return res;
}
static unsigned short* assign(unsigned short *dest, size_t count, unsigned short value)
{
unsigned short *res = dest;
for (; 0 < count; --count, ++dest)
{
assign(*dest, value);
}
return res;
}
static inline unsigned short to_char_type(const int_type &arg)
{
return static_cast<unsigned short>(arg);
}
static inline int_type to_int_type(const unsigned short &value)
{
return static_cast<int_type>(value);
}
static inline bool eq_int_type(const int_type &left, const int_type &right)
{
return left == right;
}
static inline int_type eof()
{
return static_cast<int_type>(EOF);
}
static inline int_type not_eof(const int_type &value)
{
return value != eof() ? value : 1;
}
};
How can I assign a normal std::string
to the above std::basic_string
template? Like:
basic_string<unsigned short int, TRAIT_ClASS> temp = u"string";
If assignment is not possible, how can I use the above basic_string
template?
std::string
(akastd::basic_string<char>
) has no concept of Unicode or ASCII, all it knows about ischar
elements, nothing more. You might be confused by the fact that Linux apps typically use UTF-8 strings, and UTF-8 can be stored in astd::string
(or preferably instd::u8string
akastd:::basic_string<char8_t>
in C++20). But it is your code's job to assign such responsibility to any usages ofstd::string
.You cannot directly assign a
std::string
to/from anotherstd::basic_string<CharT>
whereCharT
is a different character type thanchar
.You would have to use a type-cast to work around that, assuming the data is compatible - which in your example is NOT the case!
char
is 1 byte in size, butunsigned short int
is 2 bytes. So, your other application'sbasic_string
s are most likely using UCS-2/UTF-16, which you can't store in astd::string
(well, not the way you want, anyway), but you can store in astd::u16string
(akastd::basic_string<char16_t>
), or in astd::wstring
(akastd::basic_string<wchar_t>
) on Windows, eg:If you absolutely need to use
std::string
in your code, then you will have to convert between UTF-8 (or whatever otherchar
-compatible charset you want) and the other application's 16bit format (assuming UCS-2/UTF-16), such as withstd::wstring_convert
or a 3rd party Unicode library like libiconv, ICU, etc.