Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Source encoding: UTF-8. "π" should be a lowercase Greek pi.
- #include <iterator> // std::(begin, end)
- #include <string> // std::string
- #include <string_view> // std::string_view
- #include <unordered_map> // std::unordered_map
- #include <regex> // std::regex
- namespace text {
- using std::string, std::string_view,
- std::cmatch, std::csub_match, std::regex;
- template< class Key, class Value > using Map_ = std::unordered_map<Key, Value>;
- const Map_<string_view, string_view> ascii_replacements =
- {
- { "å", "aa" }, { "ä", "ae" }, { "ö", "oe" },
- { "Å", "Aa" }, { "Ä", "Ae" }, { "Ö", "Oe" }
- };
- auto nonascii_chars_regex_spec()
- -> string
- {
- string result;
- for( const auto& key_and_value : ascii_replacements ) {
- if( not result.empty() ) { result += "|"; }
- result += key_and_value.first; // The key, i.e. a nonascii char.
- }
- return result;
- }
- const auto nonascii_chars_regex = regex( nonascii_chars_regex_spec() );
- auto to_ascii( string_view s )
- -> string
- {
- string result;
- cmatch match;
- while( regex_search( s.data(), s.data() + s.length(), match, nonascii_chars_regex )) {
- const csub_match preceding_text = match.prefix();
- result += string_view( preceding_text.first, preceding_text.length() );
- result += ascii_replacements.at( match.str() );
- const csub_match the_rest = match.suffix();
- s = string_view( the_rest.first, the_rest.length() );
- }
- const csub_match the_rest = match.suffix();
- result += string_view( the_rest.first, the_rest.length() );
- return result;
- }
- } // namespace text
- #include <iostream>
- #include <iomanip>
- using std::cout, std::endl, std::left, std::setw;
- auto main()
- -> int
- {
- const auto& swedish_text = "Blåbär til gröten – et naturlig val!";
- const auto w = setw( 20 );
- cout << left;
- cout << w << "Original text:" << " '" << swedish_text << "'." << endl;
- cout << w << "Known non-ASCII:" << " '" << text::nonascii_chars_regex_spec() << "'." << endl;
- cout << w << "ASCII text:" << " '" << text::to_ascii( swedish_text ) << "'." << endl;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement