How to assign a std::string to std::basic_string<unsigned short int, TRAITS_CLASS>(Unicode2String) on Linux

627 Views Asked by At

I am working on a Linux system, and I think standard Linux std::string supports both Unicode and ASCII characters. So, I want to use std::string in my code, but I receive strings from an application in the format of std::basic_string<unsigned short int, TRAIT_CLASS> (to support both Windows and Linux). The TRAITS_CLASS is as below:

class TRAITS_CLASS
{
    public:
        typedef unsigned short char_type;
        typedef unsigned short int_type;
        typedef size_t pos_type;
        typedef size_t off_type;
        typedef int state_type;

        static inline void assign(unsigned short &dest, const unsigned short &src)
        {
            dest = src;
        }

        static inline bool eq(const unsigned short &left, const unsigned short &right)
        {
            return left == right;
        }

        static inline bool lt(const unsigned short &left, const unsigned short &right)
        {
            return left < right;
        }

        static int compare(const unsigned short *p1, const unsigned short *p2, size_t count)
        {
            for (; 0 < count; --count, ++p1, ++p2)
            {
                if (!eq(*p1, *p2))
                {
                    return lt(*p1, *p2) ? -1 : 1;
                }
            }
            return 0;
        }

        static size_t length(const unsigned short *p)
        {
            size_t count = 0;
            while (*p++)
            {
                ++count;
            }
            return count;
        }

        static unsigned short* copy(unsigned short *p1, const unsigned short *p2, size_t count)
        {
            unsigned short *res = p1;
            for (; 0 < count; --count, ++p1, ++p2)
            {
                assign(*p1, *p2);
            }
            return res;
        }

        static const unsigned short* find(const unsigned short *p, size_t count,
                const unsigned short &value)
        {
            for (; 0 < count; --count, ++p)
            {
                if (eq(*p, value))
                {
                    return p;
                }
            }
            return 0;
        }

        static unsigned short* move(unsigned short *dest, const unsigned short *src, size_t count)
        {
            unsigned short *res = dest;
            if ((src < dest) && (dest < src + count))
            {
                for (dest += count, src += count; 0 < count; --count)
                {
                    assign(*--dest, *--src);
                }
            }
            else
            {
                for (; 0 < count; --count, ++dest, ++src)
                {
                    assign(*dest, *src);
                }
            }
            return res;
        }

        static unsigned short* assign(unsigned short *dest, size_t count, unsigned short value)
        {
            unsigned short *res = dest;
            for (; 0 < count; --count, ++dest)
            {
                assign(*dest, value);
            }
            return res;
        }

        static inline unsigned short to_char_type(const int_type &arg)
        {
            return static_cast<unsigned short>(arg);
        }

        static inline int_type to_int_type(const unsigned short &value)
        {
            return static_cast<int_type>(value);
        }

        static inline bool eq_int_type(const int_type &left, const int_type &right)
        {
            return left == right;
        }

        static inline int_type eof()
        {
            return static_cast<int_type>(EOF);
        }

        static inline int_type not_eof(const int_type &value)
        {
            return value != eof() ? value : 1;
        }
};

How can I assign a normal std::string to the above std::basic_string template? Like:

basic_string<unsigned short int, TRAIT_ClASS> temp = u"string";

If assignment is not possible, how can I use the above basic_string template?

1

There are 1 best solutions below

0
On

I think standard Linux std::string supports both Unicode and ASCII characters

std::string (aka std::basic_string<char>) has no concept of Unicode or ASCII, all it knows about is char elements, nothing more. You might be confused by the fact that Linux apps typically use UTF-8 strings, and UTF-8 can be stored in a std::string (or preferably in std::u8string aka std:::basic_string<char8_t> in C++20). But it is your code's job to assign such responsibility to any usages of std::string.

How can I assign a normal std::string to the above std::basic_string template?

You cannot directly assign a std::string to/from another std::basic_string<CharT> where CharT is a different character type than char.

You would have to use a type-cast to work around that, assuming the data is compatible - which in your example is NOT the case! char is 1 byte in size, but unsigned short int is 2 bytes. So, your other application's basic_strings are most likely using UCS-2/UTF-16, which you can't store in a std::string (well, not the way you want, anyway), but you can store in a std::u16string (aka std::basic_string<char16_t>), or in a std::wstring (aka std::basic_string<wchar_t>) on Windows, eg:

std::basic_string<unsigned short int, TRAITS_CLASS> temp =
    reinterpret_cast<const unsigned short int*>(u"string");

// or:
std::basic_string<unsigned short int, TRAITS_CLASS> temp(
    reinterpret_cast<const unsigned short int*>(u"string"),
    6);
std::u16string str = u"string";

std::basic_string<unsigned short int, TRAITS_CLASS> temp =
    reinterpret_cast<const unsigned short int*>(str.c_str());

// or:
std::basic_string<unsigned short int, TRAITS_CLASS> temp(
    reinterpret_cast<const unsigned short int*>(str.c_str()),
    str.size());
std::basic_string<unsigned short int, TRAITS_CLASS> temp = ...;

std::u16string str =
    reinterpret_cast<const char16_t*>(temp.c_str());

// or:
std::u16string str(
    reinterpret_cast<const char16_t*>(temp.c_str()),
    temp.size());

If you absolutely need to use std::string in your code, then you will have to convert between UTF-8 (or whatever other char-compatible charset you want) and the other application's 16bit format (assuming UCS-2/UTF-16), such as with std::wstring_convert or a 3rd party Unicode library like libiconv, ICU, etc.