Advertisement
ALTracer

txt_filter.cpp (XML toupper)

Mar 29th, 2022
1,507
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.20 KB | None | 0 0
  1. /* txt_filter: Operates on plaintext/xml files,
  2.  * replacing any <tagnames> to uppercase <TAGNAMES>.
  3.  * Equivalent to running
  4. #!/bin/sh
  5. cat input.xml | \
  6. sed 's@\<\([a-z]\+\)\>@\U\1@g' | \
  7. sed 's@\<\([a-z]\+\)\( \+.*\)\>@\U\1\L\2@g' > output.xml
  8.  */
  9.  
  10. #include <iostream>
  11. #include <fstream>
  12. #include <string>
  13.  
  14. using namespace std;
  15.  
  16. int main()
  17. {
  18.     const string filename = "input.txt";
  19.     const string filename_out = "output.txt";
  20.     ifstream istrm(filename, ios::in);
  21.     if (!istrm.is_open()) {
  22.         cerr << "failed to open " << filename << endl;
  23.         return 1;
  24.     }
  25.     ofstream ostrm(filename_out, ios::out);
  26.  
  27.     string linebuf, tagname;
  28.     string tag, tag_upper;
  29.     int count_replaced = 0;
  30. //  locale loc = locale();
  31.     while (!istrm.eof()) {
  32.         getline(istrm, linebuf, '\n');
  33. //      cerr << linebuf << endl;
  34.  
  35.         // Find an opening XML tag: '<', alphabetics [a-zA-Z], '>'
  36.         const size_t pos1 = linebuf.find('<');
  37.         const size_t pos2 = linebuf.find('>');
  38.         if ((pos1 == -1UL) || (pos2 == -1UL) ||
  39.             ( !isalpha(linebuf[pos1+1]) && !(linebuf[pos1+1] == '/') )
  40.         ) {
  41.             ; // No tag, skip filtering
  42.         } else {
  43.             // Found a line containing a tag, ex.
  44.             // '   <ingredient amount="3" unit="spoon">Flour</ingredient>'
  45.  
  46.             // First pass: find the end of tag name
  47.             const size_t pos_tag_start = pos1 + 1;
  48.             size_t pos_tag_end = pos2;
  49.             tag = linebuf.substr(pos_tag_start, pos_tag_end - pos_tag_start);
  50.             clog << "Parsing tagline: " << tag << endl;
  51.             for (size_t i = 0; i<tag.length(); i++) {
  52.                 if ((tag[i] == ' ') || (tag[i] == '\t') || (tag[i] == '\n')) {
  53.                     pos_tag_end = i + pos_tag_start;
  54.                     break;
  55.                 }
  56.             }
  57. //          tag_upper = toupper(tag, loc);
  58.  
  59.             // Second pass: filter only the name
  60.             tagname = linebuf.substr(pos_tag_start, pos_tag_end - pos_tag_start);
  61.             clog << "Found tag: " << tagname << endl;
  62.             tag_upper = tagname; // allocate by copy, then patch
  63.             for (size_t i = 0; i<tag_upper.length(); i++) {
  64.                 tag_upper[i] = toup1per(tagname[i]);
  65.             }
  66.             linebuf.replace(pos_tag_start, pos_tag_end - pos_tag_start, tag_upper);
  67.             count_replaced++;
  68.         }
  69. //      cout << linebuf << endl;
  70.         ostrm << linebuf << endl;
  71.     }
  72.     cout << "Replaced " << count_replaced << " tags to uppercase." << endl;
  73.     return 0;
  74. }
  75.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement