Untitled

#include <iostream>
#include <fstream>
#include <string>
#include <locale>
#include <iomanip>
#include <string>
#include <codecvt>

// utility wrapper to adapt locale-bound facets for wstring/wbuffer convert
template<class Facet>
struct deletable_facet : Facet
{
    template<class ...Args>
    deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {}
    ~deletable_facet() {}
};

int main()
{
    std::cout << "sizeof(char32_t) = " << sizeof(char32_t) << std::endl;
    std::cout << "sizeof(wchar_t)  = " << sizeof(wchar_t)  << std::endl;

    // UTF-8 narrow multibyte encoding
    std::string data = u8"z\u00df\u6c34\U0001f34c";
    std::ofstream("text.txt") << data;

    // using system-supplied locale's codecvt facet
    std::wifstream fin("text.txt");
    // reading from wifstream will use codecvt<wchar_t, char, mbstate_t>
    fin.imbue(std::locale(""));
    std::cout << "The UTF-8 file contains the following UCS4 code points: \n";
    for (wchar_t c; fin >> c; )
        std::cout << "U+" << std::hex << std::setw(4) << std::setfill('0') << c << '\n';

    // using standard (locale-independent) codecvt facet
    std::wstring_convert<
        deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32;
    std::u32string str32 = conv32.from_bytes(data);

    std::cout << "The UTF-8 string contains the following UCS4 code points: \n";
    for (char32_t c : str32)
        std::cout << "U+" << std::hex << std::setw(4) << std::setfill('0') << c << '\n';

    std::locale::global(std::locale(""));
    std::wstring_convert<
        deletable_facet<std::codecvt<wchar_t, char, std::mbstate_t>>> wconv;
    std::wstring wstr = wconv.from_bytes(data);
    std::cout << "The UTF-8 string contains the following UCS4 code points: \n";
    for (wchar_t c : wstr)
        std::cout << "U+" << std::hex << std::setw(4) << std::setfill('0') << c << '\n';
}