Advertisement
alfps

UTF-8 replace example.

May 21st, 2020
1,616
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.37 KB | None | 0 0
  1. // Source encoding: UTF-8. "π" should be a lowercase Greek pi.
  2.  
  3. #include <iterator>         // std::(begin, end)
  4. #include <string>           // std::string
  5. #include <string_view>      // std::string_view
  6. #include <unordered_map>    // std::unordered_map
  7. #include <regex>            // std::regex
  8.    
  9. namespace text {
  10.     using   std::string, std::string_view,
  11.             std::cmatch, std::csub_match, std::regex;
  12.    
  13.     template< class Key, class Value > using Map_ = std::unordered_map<Key, Value>;
  14.  
  15.     const Map_<string_view, string_view>  ascii_replacements    =
  16.     {
  17.         { "å", "aa" }, { "ä", "ae" }, { "ö", "oe" },
  18.         { "Å", "Aa" }, { "Ä", "Ae" }, { "Ö", "Oe" }
  19.     };
  20.  
  21.     auto nonascii_chars_regex_spec()
  22.         -> string
  23.     {
  24.         string result;
  25.         for( const auto& key_and_value : ascii_replacements ) {
  26.             if( not result.empty() ) { result += "|"; }
  27.             result += key_and_value.first;      // The key, i.e. a nonascii char.
  28.         }
  29.         return result;
  30.     }
  31.  
  32.     const auto nonascii_chars_regex = regex( nonascii_chars_regex_spec() );
  33.  
  34.     auto to_ascii( string_view s )
  35.         -> string
  36.     {
  37.         string result;
  38.         cmatch match;
  39.         while( regex_search( s.data(), s.data() + s.length(), match, nonascii_chars_regex )) {
  40.             const csub_match preceding_text = match.prefix();
  41.             result += string_view( preceding_text.first, preceding_text.length() );
  42.             result += ascii_replacements.at( match.str() );
  43.            
  44.             const csub_match the_rest = match.suffix();
  45.             s = string_view( the_rest.first, the_rest.length() );
  46.         }        
  47.         const csub_match the_rest = match.suffix();
  48.         result += string_view( the_rest.first, the_rest.length() );
  49.         return result;
  50.     }
  51. }  // namespace text
  52.  
  53. #include <iostream>
  54. #include <iomanip>
  55. using   std::cout, std::endl, std::left, std::setw;
  56.  
  57. auto main()
  58.     -> int
  59. {
  60.      const auto& swedish_text   = "Blåbär til gröten – et naturlig val!";
  61.      const auto w = setw( 20 );
  62.      cout << left;
  63.      cout << w << "Original text:"      << " '" << swedish_text << "'." << endl;
  64.      cout << w << "Known non-ASCII:"    << " '" << text::nonascii_chars_regex_spec() << "'." << endl;
  65.      cout << w << "ASCII text:"         << " '" << text::to_ascii( swedish_text ) << "'." << endl;
  66. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement