Guest User

Untitled

a guest
Apr 19th, 2019
72
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include "pch.h"
  2. #include <iostream>
  3. #include <fstream>
  4. #include <ctime>
  5. #include <string>
  6. #include <vector>
  7. #include <algorithm>
  8. #include <unordered_map>
  9. #include <chrono>
  10.  
  11. struct sample {
  12. int id;
  13. int instanceId;
  14.  
  15. int datumIzvjestavanja;
  16. long long klijentId;
  17. long long oznakaPartije;
  18. int datumOtvaranja;
  19. int planiraniDatumZatvaranja;
  20. int datumZatvaranja;
  21. float ugovoreniIznos;
  22. int valuta;
  23. float stanjePrethodniKvartal;
  24. float stanjeKvartal;
  25. int vrstaKlijenta;
  26. std::string proizvod;
  27. char vrstaProizvoda;
  28. float visinaKamate;
  29. char tipKamate;
  30. int starost;
  31. char prijevremeniRaskid;
  32.  
  33. sample () {}
  34. sample (const std::string& str, const char delim = ',') {
  35. std::size_t curr = str.find (delim);
  36. std::size_t prev = 0;
  37. id = stoi (str.substr (prev, curr - prev));
  38.  
  39. prev = curr + 1;
  40. curr = str.find (delim, prev);
  41. instanceId = stoi (str.substr (prev, curr - prev));
  42.  
  43. prev = curr + 1;
  44. curr = str.find (delim, prev);
  45. datumIzvjestavanja = dateToInt (str.substr (prev, curr - prev));
  46.  
  47. prev = curr + 1;
  48. curr = str.find (delim, prev);
  49. klijentId = stoll (str.substr (prev, curr - prev));
  50.  
  51. prev = curr + 1;
  52. curr = str.find (delim, prev);
  53. oznakaPartije = stoll (str.substr (prev, curr - prev));
  54.  
  55. prev = curr + 1;
  56. curr = str.find (delim, prev);
  57. datumOtvaranja = dateToInt (str.substr (prev, curr - prev));
  58.  
  59. prev = curr + 1;
  60. curr = str.find (delim, prev);
  61. planiraniDatumZatvaranja = dateToInt (str.substr (prev, curr - prev));
  62.  
  63. prev = curr + 1;
  64. curr = str.find (delim, prev);
  65. datumZatvaranja = dateToInt (str.substr (prev, curr - prev));
  66.  
  67. prev = curr + 1;
  68. curr = str.find (delim, prev);
  69. ugovoreniIznos = stof (str.substr (prev, curr - prev));
  70.  
  71. prev = curr + 1;
  72. curr = str.find (delim, prev);
  73. std::string kv = str.substr (prev, curr - prev);
  74. stanjePrethodniKvartal = kv.size () > 0 ? stof (kv) : 0;
  75.  
  76. prev = curr + 1;
  77. curr = str.find (delim, prev);
  78. stanjeKvartal = stof (str.substr (prev, curr - prev));
  79.  
  80. prev = curr + 1;
  81. curr = str.find (delim, prev);
  82. valuta = stoi (str.substr (prev, curr - prev));
  83.  
  84. prev = curr + 1;
  85. curr = str.find (delim, prev);
  86. vrstaKlijenta = stoi (str.substr (prev, curr - prev));
  87.  
  88. prev = curr + 1;
  89. curr = str.find (delim, prev);
  90. proizvod = str.substr (prev, curr - prev);
  91.  
  92. prev = curr + 1;
  93. curr = str.find (delim, prev);
  94. vrstaProizvoda = str.substr (prev, curr - prev) [0];
  95.  
  96. prev = curr + 1;
  97. curr = str.find (delim, prev);
  98. std::string kam = str.substr (prev, curr - prev);
  99. visinaKamate = kam.size () > 0 ? stof (kam) : 0;
  100.  
  101. prev = curr + 1;
  102. curr = str.find (delim, prev);
  103. tipKamate = str.substr (prev, curr - prev) [0];
  104.  
  105. prev = curr + 1;
  106. curr = str.find (delim, prev);
  107. starost = stoi (str.substr (prev, curr - prev));
  108.  
  109. prev = curr + 1;
  110. curr = str.find (delim, prev);
  111. prijevremeniRaskid = str.substr (prev, curr - prev) [0];
  112. }
  113.  
  114. private:
  115. int dateToInt (const std::string date) {
  116. if (date.size () == 0 || date == "") {
  117. return -1;
  118. }
  119.  
  120. struct std::tm a = {
  121. 0,
  122. 0,
  123. 0,
  124. stoi (date.substr (0, 2)),
  125. stoi (date.substr (3, 2)) - 1,
  126. stoi (date.substr (6, 4)) - 1900
  127. };
  128. std::time_t t = std::mktime (&a);
  129. return std::difftime (t, 0) / (60 * 60 * 24);
  130. }
  131. };
  132.  
  133. int main () {
  134. /* write you input file name in here */
  135. std::cout << "Opening input file... ";
  136. auto start = std::chrono::steady_clock::now ();
  137. std::ifstream infile ("C:/Users/User/Desktop/MOZGALO/MOZGALO2019/training_dataset_enc.csv");
  138. std::string str (200, ' ');
  139. auto end = std::chrono::steady_clock::now ();
  140. std::cout
  141. << std::chrono::duration_cast<
  142. std::chrono::milliseconds>(end - start).count ()
  143. << "ms"
  144. << std::endl;
  145.  
  146. // count number of samples in file
  147. std::cout << "Counting number of samples... ";
  148. start = std::chrono::steady_clock::now ();
  149. int count =
  150. std::count (
  151. std::istreambuf_iterator<char> (infile),
  152. std::istreambuf_iterator<char> (),
  153. '\n') - 1;
  154. end = std::chrono::steady_clock::now ();
  155. std::cout
  156. << std::chrono::duration_cast<
  157. std::chrono::seconds>(end - start).count ()
  158. << "s"
  159. << std::endl;
  160.  
  161. // first row of file must have number of samples for optimization reasons
  162. std::cout << "Reserving memory for sample vector... ";
  163. start = std::chrono::steady_clock::now ();
  164. std::vector<sample> samples (count);
  165. end = std::chrono::steady_clock::now ();
  166. std::cout
  167. << std::chrono::duration_cast<
  168. std::chrono::milliseconds>(end - start).count ()
  169. << "ms"
  170. << std::endl;
  171.  
  172. // populating samples vector
  173. // we do this 2 dummy getline() calls to skip file header
  174. std::cout << "Populating sample vector... ";
  175. start = std::chrono::steady_clock::now ();
  176. infile.clear ();
  177. infile.seekg (0);
  178. count = 0;
  179. std::getline (infile, str, '\n');
  180. while (infile >> str) {
  181. samples [count++] = sample (str);
  182. }
  183. infile.close ();
  184. end = std::chrono::steady_clock::now ();
  185. std::cout
  186. << std::chrono::duration_cast<
  187. std::chrono::seconds>(end - start).count ()
  188. << "s"
  189. << std::endl;
  190.  
  191. // here we filter by oznaka_partije so that we take the one where
  192. // datum_zatvaranja is lowest and not NaN
  193. std::cout << "Filtering fetched data... ";
  194. start = std::chrono::steady_clock::now ();
  195. std::unordered_map<int, sample*> oznake;
  196. for (auto& s : samples) {
  197. if (oznake.find (s.oznakaPartije) == oznake.end ()) {
  198. oznake [s.oznakaPartije] = &s;
  199. continue;
  200. }
  201. int datumZatvaranja = oznake [s.oznakaPartije]->datumZatvaranja;
  202. if ((datumZatvaranja == -1 && s.datumZatvaranja > -1) ||
  203. (s.datumZatvaranja > -1 && s.datumZatvaranja < datumZatvaranja)) {
  204. oznake [s.oznakaPartije] = &s;
  205. }
  206. }
  207. end = std::chrono::steady_clock::now ();
  208. std::cout
  209. << std::chrono::duration_cast<
  210. std::chrono::milliseconds>(end - start).count ()
  211. << "ms"
  212. << std::endl;
  213.  
  214. // writing into result file
  215. // we use buffer because we don't want to call write method for each row
  216. /* write you result file name in here */
  217. std::cout << "Writing into output file... ";
  218. start = std::chrono::steady_clock::now ();
  219. std::ofstream outfile ("C:/Users/User/Desktop/MOZGALO/MOZGALO2019/training_dataset_filtered.csv");
  220. outfile
  221. << "KLIJENT_ID,OZNAKA_PARTIJE,DATUM_OTVARANJA,PLANIRANI_DATUM_ZATVARANJA,"
  222. << "DATUM_ZATVARANJA,UGOVORENI_IZNOS,VALUTA,VRSTA_KLIJENTA,"
  223. << "PROIZVOD,VRSTA_PROIZVODA,VISINA_KAMATE,TIP_KAMATE,STAROST,"
  224. << "PRIJEVREMENI_RASKID"
  225. << std::endl;
  226. int thr = 64000;
  227. std::string buffer;
  228. buffer.reserve (thr + 1000);
  229. for (const auto& oznaka : oznake) {
  230. if (buffer.length () > thr) {
  231. outfile << buffer;
  232. buffer.resize (0);
  233. }
  234. buffer.append (std::to_string (oznaka.second->klijentId));
  235. buffer.append (",");
  236. buffer.append (std::to_string (oznaka.second->oznakaPartije));
  237. buffer.append (",");
  238. buffer.append (std::to_string (oznaka.second->datumOtvaranja));
  239. buffer.append (",");
  240. buffer.append (std::to_string (oznaka.second->planiraniDatumZatvaranja));
  241. buffer.append (",");
  242. buffer.append (std::to_string (oznaka.second->datumZatvaranja));
  243. buffer.append (",");
  244. buffer.append (std::to_string (oznaka.second->ugovoreniIznos));
  245. buffer.append (",");
  246. buffer.append (std::to_string (oznaka.second->valuta));
  247. buffer.append (",");
  248. buffer.append (std::to_string (oznaka.second->vrstaKlijenta));
  249. buffer.append (",");
  250. buffer.append (oznaka.second->proizvod);
  251. buffer.append (",");
  252. buffer.append (std::string (1, oznaka.second->vrstaProizvoda));
  253. buffer.append (",");
  254. buffer.append (std::to_string (oznaka.second->visinaKamate));
  255. buffer.append (",");
  256. buffer.append (std::string (1, oznaka.second->tipKamate));
  257. buffer.append (",");
  258. buffer.append (std::to_string (oznaka.second->starost));
  259. buffer.append (",");
  260.  
  261. char prijevremeniRaskid =
  262. oznaka.second->planiraniDatumZatvaranja > -1 &&
  263. oznaka.second->datumZatvaranja > -1 &&
  264. oznaka.second->datumZatvaranja + 10 <
  265. oznaka.second->planiraniDatumZatvaranja ?
  266. 'Y' :
  267. 'N';
  268. buffer.append (std::string (1, prijevremeniRaskid));
  269. buffer.append ("\n");
  270. }
  271. outfile << buffer;
  272. outfile.close ();
  273. end = std::chrono::steady_clock::now ();
  274. std::cout
  275. << std::chrono::duration_cast<
  276. std::chrono::seconds>(end - start).count ()
  277. << "s"
  278. << std::endl;
  279.  
  280. std::cout << std::endl << "Done" << std::endl;
  281. std::getchar ();
  282. }
RAW Paste Data