Advertisement
Guest User

General-purpose tokenizer with multi-delimiters in C++

a guest
Oct 12th, 2015
168
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.80 KB | None | 0 0
  1. #include <iostream>
  2. #include <fstream>
  3. #include <algorithm>
  4. #include <vector>
  5. #include <string>
  6.  
  7. constexpr auto str_end = std::string::npos;
  8.  
  9. using std::vector;
  10. using std::string;
  11. using std::ostream;
  12. using std::ifstream;
  13. using std::getline;
  14. using std::cout;
  15. using std::endl;
  16. using std::stoi;
  17. using std::stod;
  18.  
  19. // stream vector elements out
  20. template <typename T>
  21. ostream& operator<<(ostream& os, const vector<T>& vec) {
  22.   for (const auto& elem : vec) os << elem << ' ';
  23.   return os;
  24. }
  25.  
  26. // Declare a generic conversion function...
  27. template <typename T>
  28. T decode(const string& x);
  29.  
  30. template <>
  31. inline string decode(const string& x) {
  32.   return x;
  33. }
  34.  
  35. template <>
  36. inline int decode(const string& x) {
  37.   return stoi(x);
  38. }
  39.  
  40. template <>
  41. inline double decode(const string& x) {
  42.   return stod(x);
  43. }
  44.  
  45. // given a string with delimiters inside, parse it into
  46. //  individual tokens stored in a vector<T>
  47. template <typename T>
  48. void tokenize(const string& str, vector<T>& tokens,
  49.               const string& delimiters = " ") {
  50.   auto last_pos = str.find_first_not_of(delimiters, 0);     // first token
  51.   auto curr_pos = str.find_first_of(delimiters, last_pos);  // next delim
  52.  
  53.   while (curr_pos != str_end || last_pos != str_end) {
  54.     // Add the extracted token after converting it to the proper type.
  55.     tokens.emplace_back(decode<T>(str.substr(last_pos, curr_pos - last_pos)));
  56.  
  57.     last_pos = str.find_first_not_of(delimiters, curr_pos);  // next token
  58.     curr_pos = str.find_first_of(delimiters, last_pos);      // next delim
  59.   }
  60. }
  61.  
  62. int main() {
  63.   ifstream fs{"data"};
  64.   string tmp{""};
  65.   const string delims{"[,]"};
  66.   // vector<string> tokens;
  67.   // vector<int> tokens;
  68.   vector<double> tokens;
  69.  
  70.   while (getline(fs, tmp)) tokenize(tmp, tokens, delims);
  71.  
  72.   cout << tokens << endl;
  73. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement