SHARE
TWEET

Untitled

a guest Apr 19th, 2019 71 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include "pch.h"
  2. #include <iostream>
  3. #include <fstream>
  4. #include <ctime>
  5. #include <string>
  6. #include <vector>
  7. #include <algorithm>
  8. #include <unordered_map>
  9. #include <chrono>
  10.  
  11. struct sample {
  12.     int id;
  13.     int instanceId;
  14.  
  15.     int datumIzvjestavanja;
  16.     long long klijentId;
  17.     long long oznakaPartije;
  18.     int datumOtvaranja;
  19.     int planiraniDatumZatvaranja;
  20.     int datumZatvaranja;
  21.     float ugovoreniIznos;
  22.     int valuta;
  23.     float stanjePrethodniKvartal;
  24.     float stanjeKvartal;
  25.     int vrstaKlijenta;
  26.     std::string proizvod;
  27.     char vrstaProizvoda;
  28.     float visinaKamate;
  29.     char tipKamate;
  30.     int starost;
  31.     char prijevremeniRaskid;
  32.  
  33.     sample () {}
  34.     sample (const std::string& str, const char delim = ',') {
  35.         std::size_t curr = str.find (delim);
  36.         std::size_t prev = 0;
  37.         id = stoi (str.substr (prev, curr - prev));
  38.  
  39.         prev = curr + 1;
  40.         curr = str.find (delim, prev);
  41.         instanceId = stoi (str.substr (prev, curr - prev));
  42.  
  43.         prev = curr + 1;
  44.         curr = str.find (delim, prev);
  45.         datumIzvjestavanja = dateToInt (str.substr (prev, curr - prev));
  46.  
  47.         prev = curr + 1;
  48.         curr = str.find (delim, prev);
  49.         klijentId = stoll (str.substr (prev, curr - prev));
  50.  
  51.         prev = curr + 1;
  52.         curr = str.find (delim, prev);
  53.         oznakaPartije = stoll (str.substr (prev, curr - prev));
  54.  
  55.         prev = curr + 1;
  56.         curr = str.find (delim, prev);
  57.         datumOtvaranja = dateToInt (str.substr (prev, curr - prev));
  58.  
  59.         prev = curr + 1;
  60.         curr = str.find (delim, prev);
  61.         planiraniDatumZatvaranja = dateToInt (str.substr (prev, curr - prev));
  62.  
  63.         prev = curr + 1;
  64.         curr = str.find (delim, prev);
  65.         datumZatvaranja = dateToInt (str.substr (prev, curr - prev));
  66.  
  67.         prev = curr + 1;
  68.         curr = str.find (delim, prev);
  69.         ugovoreniIznos = stof (str.substr (prev, curr - prev));
  70.  
  71.         prev = curr + 1;
  72.         curr = str.find (delim, prev);
  73.         std::string kv = str.substr (prev, curr - prev);
  74.         stanjePrethodniKvartal = kv.size () > 0 ? stof (kv) : 0;
  75.  
  76.         prev = curr + 1;
  77.         curr = str.find (delim, prev);
  78.         stanjeKvartal = stof (str.substr (prev, curr - prev));
  79.  
  80.         prev = curr + 1;
  81.         curr = str.find (delim, prev);
  82.         valuta = stoi (str.substr (prev, curr - prev));
  83.  
  84.         prev = curr + 1;
  85.         curr = str.find (delim, prev);
  86.         vrstaKlijenta = stoi (str.substr (prev, curr - prev));
  87.  
  88.         prev = curr + 1;
  89.         curr = str.find (delim, prev);
  90.         proizvod = str.substr (prev, curr - prev);
  91.  
  92.         prev = curr + 1;
  93.         curr = str.find (delim, prev);
  94.         vrstaProizvoda = str.substr (prev, curr - prev) [0];
  95.  
  96.         prev = curr + 1;
  97.         curr = str.find (delim, prev);
  98.         std::string kam = str.substr (prev, curr - prev);
  99.         visinaKamate = kam.size () > 0 ? stof (kam) : 0;
  100.  
  101.         prev = curr + 1;
  102.         curr = str.find (delim, prev);
  103.         tipKamate = str.substr (prev, curr - prev) [0];
  104.  
  105.         prev = curr + 1;
  106.         curr = str.find (delim, prev);
  107.         starost = stoi (str.substr (prev, curr - prev));
  108.  
  109.         prev = curr + 1;
  110.         curr = str.find (delim, prev);
  111.         prijevremeniRaskid = str.substr (prev, curr - prev) [0];
  112.     }
  113.  
  114. private:
  115.     int dateToInt (const std::string date) {
  116.         if (date.size () == 0 || date == "") {
  117.             return -1;
  118.         }
  119.  
  120.         struct std::tm a = {
  121.             0,
  122.             0,
  123.             0,
  124.             stoi (date.substr (0, 2)),
  125.             stoi (date.substr (3, 2)) - 1,
  126.             stoi (date.substr (6, 4)) - 1900
  127.         };
  128.         std::time_t t = std::mktime (&a);
  129.         return std::difftime (t, 0) / (60 * 60 * 24);
  130.     }
  131. };
  132.  
  133. int main () {
  134.     /* write you input file name in here */
  135.     std::cout << "Opening input file... ";
  136.     auto start = std::chrono::steady_clock::now ();
  137.     std::ifstream infile ("C:/Users/User/Desktop/MOZGALO/MOZGALO2019/training_dataset_enc.csv");
  138.     std::string str (200, ' ');
  139.     auto end = std::chrono::steady_clock::now ();
  140.     std::cout
  141.         << std::chrono::duration_cast<
  142.         std::chrono::milliseconds>(end - start).count ()
  143.         << "ms"
  144.         << std::endl;
  145.  
  146.     // count number of samples in file
  147.     std::cout << "Counting number of samples... ";
  148.     start = std::chrono::steady_clock::now ();
  149.     int count =
  150.         std::count (
  151.             std::istreambuf_iterator<char> (infile),
  152.             std::istreambuf_iterator<char> (),
  153.             '\n') - 1;
  154.     end = std::chrono::steady_clock::now ();
  155.     std::cout
  156.         << std::chrono::duration_cast<
  157.         std::chrono::seconds>(end - start).count ()
  158.         << "s"
  159.         << std::endl;
  160.  
  161.     // first row of file must have number of samples for optimization reasons
  162.     std::cout << "Reserving memory for sample vector... ";
  163.     start = std::chrono::steady_clock::now ();
  164.     std::vector<sample> samples (count);
  165.     end = std::chrono::steady_clock::now ();
  166.     std::cout
  167.         << std::chrono::duration_cast<
  168.         std::chrono::milliseconds>(end - start).count ()
  169.         << "ms"
  170.         << std::endl;
  171.  
  172.     // populating samples vector
  173.     // we do this 2 dummy getline() calls to skip file header
  174.     std::cout << "Populating sample vector... ";
  175.     start = std::chrono::steady_clock::now ();
  176.     infile.clear ();
  177.     infile.seekg (0);
  178.     count = 0;
  179.     std::getline (infile, str, '\n');
  180.     while (infile >> str) {
  181.         samples [count++] = sample (str);
  182.     }
  183.     infile.close ();
  184.     end = std::chrono::steady_clock::now ();
  185.     std::cout
  186.         << std::chrono::duration_cast<
  187.         std::chrono::seconds>(end - start).count ()
  188.         << "s"
  189.         << std::endl;
  190.  
  191.     // here we filter by oznaka_partije so that we take the one where
  192.     // datum_zatvaranja is lowest and not NaN
  193.     std::cout << "Filtering fetched data... ";
  194.     start = std::chrono::steady_clock::now ();
  195.     std::unordered_map<int, sample*> oznake;
  196.     for (auto& s : samples) {
  197.         if (oznake.find (s.oznakaPartije) == oznake.end ()) {
  198.             oznake [s.oznakaPartije] = &s;
  199.             continue;
  200.         }
  201.         int datumZatvaranja = oznake [s.oznakaPartije]->datumZatvaranja;
  202.         if ((datumZatvaranja == -1 && s.datumZatvaranja > -1) ||
  203.             (s.datumZatvaranja > -1 && s.datumZatvaranja < datumZatvaranja)) {
  204.             oznake [s.oznakaPartije] = &s;
  205.         }
  206.     }
  207.     end = std::chrono::steady_clock::now ();
  208.     std::cout
  209.         << std::chrono::duration_cast<
  210.         std::chrono::milliseconds>(end - start).count ()
  211.         << "ms"
  212.         << std::endl;
  213.  
  214.     // writing into result file
  215.     // we use buffer because we don't want to call write method for each row
  216.     /* write you result file name in here */
  217.     std::cout << "Writing into output file... ";
  218.     start = std::chrono::steady_clock::now ();
  219.     std::ofstream outfile ("C:/Users/User/Desktop/MOZGALO/MOZGALO2019/training_dataset_filtered.csv");
  220.     outfile
  221.         << "KLIJENT_ID,OZNAKA_PARTIJE,DATUM_OTVARANJA,PLANIRANI_DATUM_ZATVARANJA,"
  222.         << "DATUM_ZATVARANJA,UGOVORENI_IZNOS,VALUTA,VRSTA_KLIJENTA,"
  223.         << "PROIZVOD,VRSTA_PROIZVODA,VISINA_KAMATE,TIP_KAMATE,STAROST,"
  224.         << "PRIJEVREMENI_RASKID"
  225.         << std::endl;
  226.     int thr = 64000;
  227.     std::string buffer;
  228.     buffer.reserve (thr + 1000);
  229.     for (const auto& oznaka : oznake) {
  230.         if (buffer.length () > thr) {
  231.             outfile << buffer;
  232.             buffer.resize (0);
  233.         }
  234.         buffer.append (std::to_string (oznaka.second->klijentId));
  235.         buffer.append (",");
  236.         buffer.append (std::to_string (oznaka.second->oznakaPartije));
  237.         buffer.append (",");
  238.         buffer.append (std::to_string (oznaka.second->datumOtvaranja));
  239.         buffer.append (",");
  240.         buffer.append (std::to_string (oznaka.second->planiraniDatumZatvaranja));
  241.         buffer.append (",");
  242.         buffer.append (std::to_string (oznaka.second->datumZatvaranja));
  243.         buffer.append (",");
  244.         buffer.append (std::to_string (oznaka.second->ugovoreniIznos));
  245.         buffer.append (",");
  246.         buffer.append (std::to_string (oznaka.second->valuta));
  247.         buffer.append (",");
  248.         buffer.append (std::to_string (oznaka.second->vrstaKlijenta));
  249.         buffer.append (",");
  250.         buffer.append (oznaka.second->proizvod);
  251.         buffer.append (",");
  252.         buffer.append (std::string (1, oznaka.second->vrstaProizvoda));
  253.         buffer.append (",");
  254.         buffer.append (std::to_string (oznaka.second->visinaKamate));
  255.         buffer.append (",");
  256.         buffer.append (std::string (1, oznaka.second->tipKamate));
  257.         buffer.append (",");
  258.         buffer.append (std::to_string (oznaka.second->starost));
  259.         buffer.append (",");
  260.  
  261.         char prijevremeniRaskid =
  262.             oznaka.second->planiraniDatumZatvaranja > -1 &&
  263.             oznaka.second->datumZatvaranja > -1 &&
  264.             oznaka.second->datumZatvaranja + 10 <
  265.             oznaka.second->planiraniDatumZatvaranja ?
  266.             'Y' :
  267.             'N';
  268.         buffer.append (std::string (1, prijevremeniRaskid));
  269.         buffer.append ("\n");
  270.     }
  271.     outfile << buffer;
  272.     outfile.close ();
  273.     end = std::chrono::steady_clock::now ();
  274.     std::cout
  275.         << std::chrono::duration_cast<
  276.         std::chrono::seconds>(end - start).count ()
  277.         << "s"
  278.         << std::endl;
  279.  
  280.     std::cout << std::endl << "Done" << std::endl;
  281.     std::getchar ();
  282. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top