faiuwle

IStream autotokenizer

Jan 23rd, 2012
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.44 KB | None | 0 0
  1. #include <string>
  2. #include <vector>
  3. #include <istream>
  4. #include <cstdarg>
  5. using namespace std;
  6.  
  7. // [...]
  8.  
  9. vector<string> tokenize (istream &in, bool cont, int num, ...)  {
  10.   vector<string> tokens;
  11.  
  12.   if (!in.good () || num < 1) return tokens;
  13.  
  14.   string line;
  15.   bool foundLine = false;
  16.  
  17.   // read lines until we find something tokenizable
  18.   while (in.good ())  {
  19.     line = "";
  20.     while (line == "" && in.good ()) // skip blank lines
  21.       getline (in, line, '\n');
  22.  
  23.     if (line == "") break;  // eof
  24.  
  25.     // comments are defined as lines where the first
  26.     // non-whitespace character is #
  27.     // we would also like to skip lines containing only whitespace
  28.     int firstChar = 0; // index of first non-whitespace
  29.  
  30.     while (firstChar < line.size ())  {
  31.       if (line[firstChar] == ' ')
  32.         firstChar++;
  33.       else break;
  34.     }
  35.  
  36.     if (firstChar >= line.size ()) // line contains only whitespace
  37.       continue;
  38.  
  39.     if (line[firstChar] == '#')    // comment
  40.       continue;
  41.  
  42.     foundLine = true;
  43.     break;
  44.   }
  45.  
  46.   if (!foundLine)  // eof
  47.     return tokens;
  48.  
  49.   // tokenizing to a vector of size 1 is easy
  50.   if (num == 1)  {  
  51.     tokens.push_back (line);
  52.     return tokens;
  53.   }
  54.  
  55.   // tokenize!
  56.   va_list delimList;
  57.   va_start (delimList, num);
  58.   char current;  // last-used delimiter saved for later
  59.   int lastDelim = -1;  // index of last-used delimiter
  60.  
  61.   for (int x = 0; x < (num-1); x++)  {
  62.     // cstdarg doesn't like casting to char, so we cast to int first
  63.     current = va_arg (delimList, int);
  64.     int nextDelim = line.find (current, lastDelim+1);
  65.     string tok = line.substr (lastDelim+1, nextDelim-lastDelim-1);
  66.  
  67.     tokens.push_back (tok);
  68.  
  69.     lastDelim = nextDelim;
  70.  
  71.     // break if we didn't actually find the last delimiter
  72.     if (lastDelim == string::npos) break;
  73.   }
  74.  
  75.   va_end (delimList);
  76.  
  77.   // don't continue - we want exactly num tokens (or less)
  78.   if (!cont)  {
  79.     // add the final token, unless we've already run out of line
  80.     if (lastDelim != string::npos)
  81.       tokens.push_back (line.substr (lastDelim+1));
  82.     return tokens;
  83.   }
  84.  
  85.   // continue tokenizing using final delimiter until line is exhausted
  86.   while (lastDelim != string::npos)  {
  87.     int nextDelim = line.find (current, lastDelim+1);
  88.     string tok = line.substr (lastDelim+1, nextDelim-lastDelim-1);
  89.  
  90.     if (tok != "") tokens.push_back (tok);
  91.  
  92.     lastDelim = nextDelim;
  93.   }
  94.  
  95.   return tokens;
  96. }
Add Comment
Please, Sign In to add comment