Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // g++ test.cpp -std=c++14 -Wall
- #include <iostream>
- #include <cstring>
- #include <string>
- #include <vector>
- #include <locale>
- using namespace std;
- class ByteMap
- { // Класс проверки кодировки
- private:
- char *_data;
- size_t _len;
- public:
- ByteMap(const char *data, size_t len) :
- _data(new (std::nothrow) char[len*8]), _len(len*8) {
- const unsigned char mask = 1 << 7;
- if (nullptr == data) {
- std::cerr << "Error ByteMap no init\n";
- return;
- }
- size_t i, z=0;
- for (i = 0; i < len; i++) {
- unsigned char tmp = data[i];
- for (short j = 7; j >= 0; j--) {
- _data[z++] = (mask&tmp) ? '1' : '0';
- tmp <<= 1;
- }
- }
- }
- ~ByteMap() {
- delete [] _data;
- }
- friend std::wostream & operator << (std::wostream &, const ByteMap &);
- };
- std::wostream & operator << (std::wostream &o, const ByteMap &b) {
- for (size_t j = 0; j < b._len; j++) {
- if (!(j % 8) && (j > 0))
- std::wcout << ' ';
- std::wcout << b._data[j];
- }
- return o;
- }
- std::wstring _toUtf8(const std::string &str) {
- auto getSymbolLen = [](const char *str, size_t n) {
- if( 0xf0 == (0xf8&str[n]))
- return 4;
- else if( 0xe0 == (0xf0&str[n]))
- return 3;
- else if( 0xc0 == (0xe0&str[n]))
- return 2;
- return 1;
- };
- const char *c_str = str.c_str();
- wchar_t wChar;
- char wcharSymbol[sizeof(wchar_t)] = {0};
- std::vector<wchar_t> characters;
- for (size_t i = 0; i < str.size();) {
- size_t symbLen = getSymbolLen(str.c_str(), i);
- memset(wcharSymbol, 0, sizeof(wchar_t));
- for (size_t j = 0; j < symbLen; ++j)
- wcharSymbol[j] = c_str[i+j];
- for (size_t j = 0; j < sizeof(wchar_t); ++j)
- *(((unsigned char*)(&wChar))+j)
- = wcharSymbol[j];
- i += symbLen;
- characters.push_back(wChar);
- }
- return std::wstring(characters.begin(), characters.end());
- }
- int main() {
- locale::global(std::locale("") );
- wcout.imbue(std::locale());
- cout.imbue(std::locale());
- std::string str("добро");
- std::wstring wstr = _toUtf8(str);
- std::wstring wstr2 = L"добро";
- std::wcout << wstr << endl;
- std::wcout << wstr2 << endl;
- std::wcout << L"wstring_construct\n" << ByteMap((char*)(wstr.c_str()), 5*4) << endl << endl;
- std::wcout << L"string_utf8\n"<< ByteMap((char*)(str.c_str()), 5*4) << endl << endl;
- std::wcout << L"wstring_native\n" << ByteMap((char*)(wstr2.c_str()), 5*4) << endl << endl;
- std::cout << str << "\n";
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement