Untitled

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <iterator>
#include <string>
#include <cstring>
#include "/.../dirent.h"
#include "boostregex.hpp"
#include <boost/tokenizer.hpp>

using namespace std;
using namespace boost;
int main() {

    DIR*     dir;
    dirent*  pdir;

    dir = opendir("D:/.../dataset/");     // open current directory

    int number_of_words=0;

    char filename[300];
    int i=0;

    while (pdir = readdir(dir))
    {
        string fileString;

        strcpy(filename, "D:/.../dataset/");
        strcat(filename, pdir->d_name);
        ifstream file(filename);
        std::istream_iterator<std::string> beg(file), end;

        number_of_words = distance(beg,end);
        int *wordIndexes = new int[number_of_words +1];

        int index = 0;
        wordIndexes[0] = 0;

        cout<<"Number of words in file: "<<number_of_words<<endl;

        ifstream files(filename);
         if (file.is_open())
         {
             string output;
             while (!files.eof())
             {
                    //read word by word
                    files >> output;
                    fileString += " ";
                    fileString += output;
             }

             string fileStringTokenized;
             tokenizer<> tok(fileString);

             for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end(); ++beg)
             {
                 string currentWord;
                 currentWord = *beg;

                 index += currentWord.size();
                 wordIndexes[i] = index;

                 i++;

                 //cout<<*beg<<"n";
                 fileStringTokenized += " ";
                 fileStringTokenized += *beg;
             }


             cout<<"Number of characters: "<<fileStringTokenized.size()<<endl;
             const char *charString = fileStringTokenized.c_str();
             //cout<<charString;
             cout<<endl;

         }
         file.close();
         delete []wordIndexes;
    }
    closedir(dir);
    return 0;
}