Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <bits/stdc++.h>
- #include<iostream>
- #include<vector>
- #include<string>
- #include<fstream>
- #include<cstdlib>
- #include<ctime>
- #include<dirent.h>
- using namespace std;
- vector<string> dir_list;
- #define all(a) a.begin(),a.end()
- #define Unique(a) sort(all(a)),a.erase(unique(all(a)),a.end())
- #define pSS pair <string, string>
- struct MyData {
- string Sentence, Level, Valid;
- MyData() {}
- MyData(string a, string b, string c) {
- Sentence = a;
- Level = b;
- Valid = c;
- }
- };
- void Modify(string &s) {
- string nw;
- int l, r;
- l = 0;
- r = s.size() - 1;
- for(int i = 0; i < s.size(); i++) {
- l = i;
- if(s[i] != '\n' && s[i] != ' ') break;
- }
- for(int i = r; i >= 0; i--) {
- r = i;
- if(s[i] != '\n' && s[i] != ' ') break;
- }
- for(int i = l; i <= r; i++) nw += s[i];
- s = nw;
- }
- void Writing() {
- vector <MyData> Data;
- int sz;
- srand(time(0));
- map <string, vector <pSS> > Mp;
- // ifstream input("D:\\Input\\original.txt");
- ifstream input("output.csv");
- if(!input.is_open()) {
- cerr << "ERROR : File can't be Opened\n";
- exit(0);
- }
- string Sentence;
- string Level;
- string Valid;
- while(input.good()) {
- getline(input, Sentence, ',');
- getline(input, Level, ',');
- getline(input, Valid, ',');
- string tp;
- Modify(Sentence);
- Modify(Level);
- Modify(Valid);
- // cerr << "Data : " << Sentence << '\n';
- // cerr << "Level : " << Level << '\n';
- // cerr << "Valid : " << Valid << '\n';
- // cerr << "----------------------------\n";
- Mp[Sentence].push_back(make_pair(Level, Valid));
- Data.push_back(MyData(Sentence, Level,Valid));
- }
- double fivePercent = Data.size() / 100.0;
- int First85 = fivePercent * 85;
- int First95 = ceil(fivePercent * 95);
- sz = Data.size();
- vector <MyData> Front, Mid, Last;
- ofstream outFile;
- outFile.open("First85.csv");
- random_shuffle(all(Data));
- for(int i = 0; i < First85; i++) {
- outFile << Data[i].Sentence << "," << Data[i].Level << "," << Data[i].Valid << '\n';
- // cerr << Data[i].Sentence << ' ' << Data[i].Level << " " << Data[i].Valid << "\n";
- }
- outFile.close();
- outFile.open("Mid10.csv");
- vector <string> Temp;
- for(int i = First85; i < First95; i++) {
- Temp.push_back(Data[i].Sentence);
- } Unique(Temp);
- for(int i = 0; i < Temp.size(); i++) {
- vector < pSS > &OtherStuff = Mp[Temp[i]];
- vector <string> str;
- string org = "-1";
- for(int j = 0; j < OtherStuff.size(); j++) {
- // cout << Temp[i] << " : (" << OtherStuff[i].first << ") : (" << OtherStuff[i].second << ")" << "\n";
- if(OtherStuff[j].second == "1") {
- org = OtherStuff[j].first;
- }
- else str.push_back(OtherStuff[j].first);
- }
- // cout << Temp[i] << " " << org << '\n';
- outFile << Temp[i] << "," << org;
- for(int j = 0; j < str.size(); j++) outFile << "," << str[j];
- outFile << "\n";
- }
- outFile.close();
- outFile.open("Last5.csv");
- Temp.clear();
- for(int i = First95; i < Data.size(); i++) {
- Temp.push_back(Data[i].Sentence);
- } Unique(Temp);
- for(int i = 0; i < Temp.size(); i++) {
- vector < pSS > &OtherStuff = Mp[Temp[i]];
- vector <string> str;
- string org;
- for(int j = 0; j < OtherStuff.size(); j++) {
- if(OtherStuff[j].second == "1") org = OtherStuff[j].first;
- else str.push_back(OtherStuff[j].first);
- }
- outFile << Temp[i] << "," << org;
- for(int j = 0; j < str.size(); j++) outFile << "," << str[j];
- outFile << "\n";
- }
- outFile.close();
- }
- void func(char *ss){
- ofstream myfile;
- myfile.open("dir.txt");
- DIR *dir;
- string create;
- for(int i=0;ss[i];i++)create+=ss[i];
- //ss+="\\" ;
- struct dirent *ent;
- if ((dir = opendir (ss)) != NULL) {
- /* print all the files and directories within directory */
- while ((ent = readdir (dir)) != NULL) {
- //printf ("%s\n", ent->d_name);
- myfile<<ss<<"\\"<<(ent->d_name)<<endl;
- }
- closedir (dir);
- }
- else {
- /* could not open directory */
- perror ("");
- return ;
- }
- }
- struct data{
- string line;
- int file;
- int k;
- };
- vector<data> input,output;
- void Task1(){
- ifstream infile;
- ofstream myfile;
- string s;
- data temp,temp2;
- infile.open("dir.txt");
- myfile.open("output.csv");
- if(!infile){
- cout<<"Unable to open dir file"<<endl;
- return ;
- }
- while(getline(infile,s)){
- dir_list.push_back(s);
- }
- infile.close();
- for(int i=0;i<dir_list.size();i++){
- s=dir_list[i];
- infile.open(s);
- if(!infile){
- cout<<"Unable to open file : "<<s<<endl;
- continue;
- }
- while(getline(infile,s)){
- temp.line=s;
- temp.file=i+1;
- temp.k=0;
- input.push_back(temp);
- }
- infile.close();
- }
- int len=dir_list.size();
- srand((unsigned)time(0));
- for(int i=0;i<input.size();i++){
- temp=input[i];
- for(int j=0;j<9;j++){
- int r=temp.file;
- while(r==temp.file){
- r=rand();
- r=(r%len)+1;
- }
- temp2=temp;
- temp2.file=r;
- output.push_back(temp2);
- }
- temp.k=1;
- output.push_back(temp);
- }
- for(int i=0;i<output.size();i++){
- int r=rand();
- r%=(output.size());
- swap(output[i],output[r]);
- }
- for(int i=0;i<output.size();i++){
- if(i>0) myfile<<", ";
- myfile<<output[i].line<<", "<<output[i].file<<", "<<output[i].k;
- }
- myfile.close();
- return ;
- }
- int main(){
- char dir[100];
- gets(dir);
- func(dir);
- Task1();
- Writing();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement