Advertisement
Guest User

Untitled

a guest
Oct 24th, 2014
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.01 KB | None | 0 0
  1. #include "stdafx.h"
  2. #include <iostream>
  3. #include <fstream>
  4. #include <iterator>
  5. #include <string>
  6. #include <cstring>
  7. #include "/.../dirent.h"
  8. #include "boostregex.hpp"
  9. #include <boost/tokenizer.hpp>
  10.  
  11. using namespace std;
  12. using namespace boost;
  13. int main() {
  14.  
  15. DIR* dir;
  16. dirent* pdir;
  17.  
  18. dir = opendir("D:/.../dataset/"); // open current directory
  19.  
  20. int number_of_words=0;
  21.  
  22. char filename[300];
  23. int i=0;
  24.  
  25. while (pdir = readdir(dir))
  26. {
  27. string fileString;
  28.  
  29. strcpy(filename, "D:/.../dataset/");
  30. strcat(filename, pdir->d_name);
  31. ifstream file(filename);
  32. std::istream_iterator<std::string> beg(file), end;
  33.  
  34. number_of_words = distance(beg,end);
  35. int *wordIndexes = new int[number_of_words +1];
  36.  
  37. int index = 0;
  38. wordIndexes[0] = 0;
  39.  
  40. cout<<"Number of words in file: "<<number_of_words<<endl;
  41.  
  42. ifstream files(filename);
  43. if (file.is_open())
  44. {
  45. string output;
  46. while (!files.eof())
  47. {
  48. //read word by word
  49. files >> output;
  50. fileString += " ";
  51. fileString += output;
  52. }
  53.  
  54. string fileStringTokenized;
  55. tokenizer<> tok(fileString);
  56.  
  57. for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end(); ++beg)
  58. {
  59. string currentWord;
  60. currentWord = *beg;
  61.  
  62. index += currentWord.size();
  63. wordIndexes[i] = index;
  64.  
  65. i++;
  66.  
  67. //cout<<*beg<<"n";
  68. fileStringTokenized += " ";
  69. fileStringTokenized += *beg;
  70. }
  71.  
  72.  
  73. cout<<"Number of characters: "<<fileStringTokenized.size()<<endl;
  74. const char *charString = fileStringTokenized.c_str();
  75. //cout<<charString;
  76. cout<<endl;
  77.  
  78. }
  79. file.close();
  80. delete []wordIndexes;
  81. }
  82. closedir(dir);
  83. return 0;
  84. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement