Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <fstream>
- #include <string>
- #include <locale>
- #include <iomanip>
- #include <string>
- #include <codecvt>
- // utility wrapper to adapt locale-bound facets for wstring/wbuffer convert
- template<class Facet>
- struct deletable_facet : Facet
- {
- template<class ...Args>
- deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {}
- ~deletable_facet() {}
- };
- int main()
- {
- std::cout << "sizeof(char32_t) = " << sizeof(char32_t) << std::endl;
- std::cout << "sizeof(wchar_t) = " << sizeof(wchar_t) << std::endl;
- // UTF-8 narrow multibyte encoding
- std::string data = u8"z\u00df\u6c34\U0001f34c";
- std::ofstream("text.txt") << data;
- // using system-supplied locale's codecvt facet
- std::wifstream fin("text.txt");
- // reading from wifstream will use codecvt<wchar_t, char, mbstate_t>
- fin.imbue(std::locale(""));
- std::cout << "The UTF-8 file contains the following UCS4 code points: \n";
- for (wchar_t c; fin >> c; )
- std::cout << "U+" << std::hex << std::setw(4) << std::setfill('0') << c << '\n';
- // using standard (locale-independent) codecvt facet
- std::wstring_convert<
- deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32;
- std::u32string str32 = conv32.from_bytes(data);
- std::cout << "The UTF-8 string contains the following UCS4 code points: \n";
- for (char32_t c : str32)
- std::cout << "U+" << std::hex << std::setw(4) << std::setfill('0') << c << '\n';
- std::locale::global(std::locale(""));
- std::wstring_convert<
- deletable_facet<std::codecvt<wchar_t, char, std::mbstate_t>>> wconv;
- std::wstring wstr = wconv.from_bytes(data);
- std::cout << "The UTF-8 string contains the following UCS4 code points: \n";
- for (wchar_t c : wstr)
- std::cout << "U+" << std::hex << std::setw(4) << std::setfill('0') << c << '\n';
- }
Advertisement
Add Comment
Please, Sign In to add comment