I am trying to convert cp1251 text to utf-8. What I am doing here is creating the buffer of hex numbers of given symbols in cp1251 to later convert those hex symbols to utf-8. The problem is that sometimes the converted string has some trash symbols in the end.
The output of converting the same string many times (203 ñòåï ÒÖÍÐ.466219.007 Èíòåðàêòèâíûé êîìïëåêñ NextPanel 43/NAUO1):
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1омплекс NextPanelВ 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO14V
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1sгцf¤№
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1п}тУї0bЊ.Z¶ї¬ЁЌ€/ГїаА Om›Ґї
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1sгцf¤№
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1™™™™Щ?
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
char *ConvertWindows1251ToUtf8(string stringToConvert)
{
// string tmpString = stringToConvert.ToCString();
const char *tmpCharArray = stringToConvert.c_str();
vector<char> charBuffer;
char *buffer = new char[char_traits<char>::length(tmpCharArray)];
int latter = 0;
for (int i = 0; i < std::char_traits<char>::length(tmpCharArray); i++)
{
string tmpHexLatter;
int hexLatter = 0xFF & tmpCharArray[i];
stringstream ss;
ss << hex << hexLatter;
tmpHexLatter = ss.str();
if (hexLatter != 0xc3)
{
if (hexLatter != 0x30 && hexLatter != 0x31 && hexLatter != 0x32 && hexLatter != 0x33 && hexLatter != 0x34 && hexLatter != 0x35 && hexLatter != 0x36 && hexLatter != 0x37 && hexLatter != 0x38 && hexLatter != 0x39 && hexLatter != 0x61 && hexLatter != 0x62 && hexLatter != 0x63 && hexLatter != 0x64 && hexLatter != 0x65 && hexLatter != 0x66 && hexLatter != 0x67 && hexLatter != 0x68 && hexLatter != 0x69 && hexLatter != 0x6A && hexLatter != 0x6B && hexLatter != 0x6C && hexLatter != 0x6D && hexLatter != 0x6E && hexLatter != 0x6F && hexLatter != 0x70 && hexLatter != 0x71 && hexLatter != 0x72 && hexLatter != 0x73 && hexLatter != 0x74 && hexLatter != 0x75 && hexLatter != 0x76 && hexLatter != 0x77 && hexLatter != 0x78 && hexLatter != 0x79 && hexLatter != 0x7A && hexLatter != 0x41 && hexLatter != 0x42 && hexLatter != 0x43 && hexLatter != 0x44 && hexLatter != 0x45 && hexLatter != 0x46 && hexLatter != 0x47 && hexLatter != 0x48 && hexLatter != 0x49 && hexLatter != 0x4A && hexLatter != 0x4B && hexLatter != 0x4C && hexLatter != 0x4D && hexLatter != 0x4E && hexLatter != 0x4F && hexLatter != 0x50 && hexLatter != 0x51 && hexLatter != 0x52 && hexLatter != 0x53 && hexLatter != 0x54 && hexLatter != 0x55 && hexLatter != 0x56 && hexLatter != 0x57 && hexLatter != 0x58 && hexLatter != 0x59 && hexLatter != 0x5A && hexLatter != 0x2E)
hexLatter += 64;
if (hexLatter == 0x60)
hexLatter = 0xA0;
if (hexLatter == 0x6F)
hexLatter = 0x2F;
stringstream ss;
ss << hex << hexLatter;
string tmpHex = ss.str();
tmpHexLatter = "0x" + tmpHex;
latter = stoi(tmpHexLatter, {}, 16);
charBuffer.push_back((char)latter);
}
}
for (int i = 0; i < charBuffer.size(); i++)
{
buffer[i] = charBuffer[i];
}
return g_convert(buffer, -1, "utf-8", "Windows-1251", NULL, NULL, NULL);
/*string tmpStr = stringToConvert.ToCString();
std::unique_ptr<gchar, void (*)(gpointer)> p(g_convert(tmpStr.c_str(), -1, "utf-8", "Windows-1251", NULL, NULL, NULL), g_free);
return TCollection_AsciiString(p.get());*/
}
You don't need
bufferat all, you can passcharBuffer.data()or evenstringToConvert.c_str()tog_convert().But, more importantly, both
bufferandcharBufferare not null-terminated, and you are not otherwise passing the final length tog_convert(), sog_convert()will end up either reaching out of bounds, or try to convert uninitialized data, either way leading to undefined behavior, which is why you see garbage on the end of the result.On a side note, you don't need the
"0x"prefix when callingstd::stoi()withbase=16.Also, why are you returning a
char*instead of astd::string? Who is responsible for allocating and freeing the memory? You really should letstd::stringhandle that.