Advertisement
Guest User

Untitled

a guest
May 27th, 2017
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 13.85 KB | None | 0 0
  1. /*
  2.  * CRAWLER.CPP
  3.  *
  4.  * Methods for Crawler class - File Crawler
  5.  * ========================================
  6.  *
  7.  * Ondrej Konecny
  8.  *
  9.  * 27 April 2009
  10.  */
  11.  
  12. // NAME SPACING =================================================== //
  13. using namespace std;
  14.  
  15.  
  16. // INCLUDES ========================================================= //
  17.  
  18. #include <stdlib.h>
  19. #include <iostream>
  20.  
  21. #include <dirent.h>
  22.  
  23. #include "crawler.h"
  24.  
  25. // CONSTRUCTOR ========================================================= //
  26. Crawler::Crawler(int argc, char *argv[])
  27. {
  28.     //--- initializing of variables
  29.     par_startable = 0;
  30.     par_finding = 0;
  31.     par_needle = "";
  32.     par_extension = "txt";
  33.     par_path = ".";
  34.  
  35.  
  36.     list_counter = 0;
  37.  
  38.  
  39.     //--- - - - - - -
  40.  
  41.     string argument;
  42.  
  43.     color(30);
  44.     cout << "   FILE CRAWLER 0.9 beta   (c) 2009 Ondrej Konecny, Kamil Havlicek              " << endl;
  45.  
  46.  
  47.     if (argc > 1) {
  48.         argument = string(argv[1]);
  49.         if (argument == "?" || argument.substr(0, 2) == "/?" || argument.substr(0, 2) == "\?") {
  50.            help();
  51.  
  52.         } else {
  53.  
  54.             for (int i = 1; i < argc; i++) {
  55.  
  56.                 argument = string(argv[i]); // because substr is from string module
  57.  
  58.                 if (argument.substr(0, 2) == "F=" || argument.substr(0, 2) == "f=") {
  59.  
  60.                     par_startable = 1;
  61.                     par_finding = 1;
  62.                     par_needle = argument.substr(2, argument.length() - 2);
  63.                     color(9);
  64.                     cout << "   FIND = ";
  65.                     color(15);
  66.                     cout << par_needle << endl;
  67.  
  68.                 } else if (argument.substr(0, 2) == "E=" || argument.substr(0, 2) == "e=") {
  69.  
  70.                     par_startable = 1;
  71.                     par_extension = argument.substr(2, argument.length() - 2);
  72.                     color(9);
  73.                     cout << "   EXT  = ";
  74.                     color(15);
  75.                     cout << par_extension << endl;
  76.  
  77.                 } else if (argument.substr(0, 2) == "P=" || argument.substr(0, 2) == "p=") {
  78.  
  79.                     par_startable = 1;
  80.                     par_path = argument.substr(2, argument.length() - 2);
  81.                     color(9);
  82.                     cout << "   PATH = ";
  83.                     color(15);
  84.                     cout << par_path << endl;
  85.  
  86.                 }
  87.             }
  88.  
  89.             // global variables of parametres was set
  90.             // now we can call main starting function
  91.             if (par_startable == 1) {
  92.  
  93.                 color(8);
  94.                 cout << "\n\n>>";
  95.                 color(10);
  96.                 cout << " Browsing...\n\n" << endl;
  97.  
  98.                 browse(par_path);
  99.                 list_sort();
  100.                 //list_print();
  101.  
  102.  
  103.                 write_files();
  104.             } else {
  105.                 color(15);
  106.                 cout << "Parameter error. Write parameter \"?\" for more information." << endl << endl << endl;
  107.             }
  108.         }
  109.  
  110.     } else {
  111.  
  112.        color(15);
  113.        cout << "No parameter found. Write parameter \"?\" for more information." << endl << endl << endl;
  114.  
  115.     }
  116.  
  117. }
  118.  
  119.  
  120.  
  121.  
  122.  
  123. // DESTRUCTOR  ========================================================= //
  124. Crawler::~Crawler()
  125. {
  126.     color(8); //get back system gray color
  127.     cout << endl; // new line
  128.  
  129.  
  130.     system("PAUSE");
  131. }
  132.  
  133.  
  134.  
  135. // START (construcctor calls this method) ================================= //
  136. void Crawler::browse(string browsed_path)
  137. {
  138.     //piece of code for browsing
  139.  
  140.     DIR *p_directory;
  141.     struct dirent *p_element; //file or directory in open directory
  142.  
  143.     p_directory = opendir(browsed_path.c_str());
  144.  
  145.     if (!p_directory) {
  146.         cout << "Open error";
  147.  
  148.     } else {
  149.         //successfully opened
  150.  
  151.         // we can browse its content
  152.  
  153.         string test_path;
  154.  
  155.         int is_file;
  156.  
  157.         while ((p_element = readdir(p_directory))) {
  158.  
  159.             test_path = browsed_path + "/" + p_element->d_name;
  160.  
  161.             if (opendir(test_path.c_str())) {
  162.                 //it's a dir
  163.                 is_file = 0;
  164.  
  165.                 if (string(p_element->d_name) != "." && string(p_element->d_name) != "..") {
  166.                     browse(test_path);
  167.  
  168.                     // adding path to end of array
  169.                     list_add(test_path, is_file);
  170.  
  171.                 }
  172.  
  173.             } else {
  174.                 //it's a file
  175.                 color(10);
  176.  
  177.                 //calling exploring functions
  178.  
  179.                 is_file = 1;
  180.  
  181.                 // adding path to end of array
  182.  
  183.  
  184.                 if (par_extension == "") {
  185.                     list_add(test_path, is_file);
  186.                 } else {
  187.  
  188.                     if (test_path.substr(test_path.rfind(".") + 1 , par_extension.size()) == par_extension && (test_path.size() - test_path.rfind(".") - 1 == par_extension.size())) {
  189.                         list_add(test_path, is_file);
  190.                     }
  191.                 }
  192.             }
  193.         }
  194.         closedir(p_directory);
  195.     }
  196. }
  197.  
  198. // LIST_ADD ================================================================ //
  199.  
  200. void Crawler::list_add(string test_path, int is_file)
  201. {
  202.      //DISC_STRUCTURE *pom = (DISC_STRUCTURE*) malloc(sizeof(DISC_STRUCTURE));
  203.      DISC_STRUCTURE *list_cell = new DISC_STRUCTURE[sizeof(DISC_STRUCTURE)];
  204.  
  205.  
  206.  
  207.      list_cell->is_file = is_file;
  208.      list_cell->path = test_path;
  209.      list_cell->p_next = NULL;
  210.  
  211.      if (++list_counter == 1) {
  212.          list_begin = list_cell;
  213.      } else {
  214.          list_cell_previous->p_next = list_cell;
  215.      }
  216.  
  217.      list_cell_previous = list_cell;
  218.  
  219. }
  220.  
  221.  
  222. // LIST_SORT ================================================================ //
  223. void Crawler::list_sort()
  224. {
  225.  
  226.     DISC_STRUCTURE *p_cell;
  227.  
  228.     DISC_STRUCTURE temp_structure;
  229.  
  230.     p_cell = list_begin;
  231.  
  232.     int changed;
  233.  
  234.     while (p_cell) {
  235.  
  236.           changed = 0;
  237.  
  238.           p_cell = p_cell->p_next;
  239.  
  240.  
  241.           if ( p_cell->p_next == NULL) {
  242.  
  243.              p_cell = list_begin;
  244.  
  245.           } else {
  246.  
  247.               if ( strcmp(((p_cell->p_next)->path).c_str(), (p_cell->path).c_str() ) < 0) {
  248.  
  249.                    temp_structure.path = p_cell->path;
  250.                    temp_structure.is_file = p_cell->is_file;
  251.  
  252.                    p_cell->path = (p_cell->p_next)->path;
  253.                    p_cell->is_file = (p_cell->p_next)->is_file;
  254.  
  255.                    (p_cell->p_next)->path = temp_structure.path;
  256.                    (p_cell->p_next)->is_file = temp_structure.is_file;
  257.  
  258.                    changed = 1;
  259.  
  260.               }
  261.           }
  262.  
  263.           if (changed == 0) {
  264.               break;
  265.           }
  266.     }
  267. }
  268.  
  269.  
  270. // LIST_PRINT ================================================================ //
  271. void Crawler::list_print() {
  272.  
  273.     DISC_STRUCTURE *p_cell;
  274.     p_cell = list_begin;
  275.  
  276.     while (p_cell) {
  277.         cout << p_cell->path << " - " << p_cell->is_file;
  278.         p_cell = p_cell->p_next;
  279.         cout << endl;
  280.     }
  281.  
  282. }
  283.  
  284.  
  285. // WRITE_FILES ================================================================ //
  286. void Crawler::write_files() {
  287.  
  288.     //summary counts
  289.     COUNT_LINES sum_count_lines;
  290.     sum_count_lines.for_mac = 0;
  291.     sum_count_lines.for_win = 0;
  292.     sum_count_lines.for_unix = 0;
  293.  
  294.     int sum_count_needles = 0;
  295.     long long int sum_count_bytes = 0;
  296.     int count_directories = 0;
  297.     int count_files = 0;
  298.  
  299.  
  300.     DISC_STRUCTURE *p_cell;
  301.     p_cell = list_begin;
  302.  
  303.     FILE *file;
  304.  
  305.     while (p_cell) {
  306.  
  307.         if (p_cell->is_file == 1) {
  308.             count_files++;
  309.  
  310.             color(8);
  311.             cout << ">> ";
  312.  
  313.             color(14);
  314.             cout << p_cell->path;
  315.  
  316.             cout << endl;
  317.  
  318.             file = fopen( (p_cell->path).c_str() , "rb");
  319.  
  320.             if (file == NULL) {
  321.                color(12);
  322.                cout << "\tAccess denied" << endl;
  323.                color(14);
  324.             } else {
  325.                filescanner(file);
  326.                fclose(file);
  327.  
  328.  
  329.                sum_count_lines.for_mac += count_lines.for_mac;
  330.                sum_count_lines.for_unix += count_lines.for_unix;
  331.                sum_count_lines.for_win += count_lines.for_win;
  332.  
  333.                sum_count_bytes += count_bytes;
  334.                sum_count_needles += count_needles;
  335.  
  336.             }
  337.  
  338.         } else {
  339.             count_directories++;
  340.         }
  341.  
  342.         p_cell = p_cell->p_next; //next cell
  343.  
  344.     }
  345.  
  346.  
  347.     //write on screen summary
  348.  
  349.     cout << endl;
  350.     color(7); cout << " +----------------------=====:::| ";
  351.     color(9); cout << "Informations ";
  352.     color(7); cout << "|:::=====----------------------+";
  353.     cout << endl;
  354.     cout << endl;
  355.     color (15);
  356.  
  357.     cout << "    " << "MAC Lines"; color(8); cout << " ............... "; color(15);
  358.     cout << sum_count_lines.for_mac << endl;
  359.  
  360.     cout << "    " << "WIN Lines"; color(8); cout << " ............... "; color(15);
  361.     cout << sum_count_lines.for_win <<  endl;
  362.  
  363.     cout << "    " << "UNIX Lines"; color(8); cout << " .............. "; color(15);
  364.     cout << sum_count_lines.for_unix << endl;
  365.  
  366.     cout << "    " << "Total bytes"; color(8); cout << " ............. "; color(15);
  367.     cout << sum_count_bytes << endl;
  368.  
  369.     cout << "    " << "Total needles"; color(8); cout << " ........... "; color(15);
  370.     cout << sum_count_needles << endl;
  371.  
  372.     cout << "    " << "Total files"; color(8); cout << " ............. "; color(15);
  373.     cout << count_files << endl;
  374.  
  375.     cout << "    " << "Total directories"; color(8); cout << " ....... "; color(15);
  376.     cout << count_directories << endl;
  377.  
  378.     cout << endl;
  379.     color(7); cout << " +----------------------------------------------------------------------------+";
  380.     cout << endl;
  381.     color (15);
  382.  
  383.  
  384.  
  385.  
  386. }
  387.  
  388.  
  389.  
  390.  
  391. // ISTHERELINE ================================================================ //
  392.  
  393. void Crawler::isthereline(int current_char, int past_char, COUNT_LINES *count_lines)
  394. {
  395.  
  396.     if (current_char == 10 ) {
  397.         //unix (only 10)
  398.         count_lines->for_unix++;
  399.  
  400.         if (past_char == 13) {
  401.             //windows (13 then 10)
  402.             count_lines->for_win++;
  403.         }
  404.  
  405.     } else if (current_char == 13) {
  406.         //macinotsh (only 13)
  407.         count_lines->for_mac++;
  408.     }
  409.  
  410. }
  411.  
  412.  
  413.  
  414. // FILESCANNER ================================================================ //
  415. void Crawler::filescanner(FILE *file)
  416. {
  417.  
  418.  
  419.     //finding needle (and counting of needles), line-counting, byte-counting
  420.     int character;
  421.     int past_character;
  422.  
  423.  
  424.  
  425.     count_bytes = -1;
  426.  
  427.     count_lines.for_mac = 0;
  428.     count_lines.for_win = 0;
  429.     count_lines.for_unix = 0;
  430.  
  431.     color(12);
  432.  
  433.     count_needles = 0;
  434.  
  435.     if (par_finding == 0) {
  436.  
  437.         do {
  438.             character = fgetc(file);
  439.             count_bytes++;
  440.             isthereline(character, past_character, &count_lines);
  441.             past_character = character;
  442.         } while (character != EOF);
  443.  
  444.     } else {
  445.  
  446.         // finding enabled
  447.         int *queue_past_characters;
  448.         queue_past_characters = new int[par_needle.length() + 1];
  449.  
  450.         int needle_found;
  451.  
  452.         count_needles = 0;
  453.  
  454.         do {
  455.             character = fgetc(file);
  456.             count_bytes++;
  457.             isthereline(character, past_character, &count_lines);
  458.  
  459.             past_character = character;
  460.  
  461.  
  462.             //--- begin of finding needle
  463.             for (unsigned int i = 1; i <= par_needle.length(); i++) {
  464.                 queue_past_characters[i-1] = queue_past_characters[i];
  465.             }
  466.             queue_past_characters[par_needle.length()] = character;
  467.  
  468.  
  469.  
  470.             needle_found = 1;
  471.  
  472.             char one_character;
  473.             for (unsigned int i=0; i < par_needle.length(); i++) {
  474.  
  475.                 par_needle.copy(&one_character, 1, i);
  476.  
  477.                 if (queue_past_characters[i] !=  (int)one_character ) {
  478.                     needle_found = 0;
  479.                     break;
  480.                 }
  481.             }
  482.  
  483.             if (needle_found == 1) {
  484.                 count_needles++;
  485.             }
  486.  
  487.             //--- end of findings
  488.  
  489.             color(10);
  490.  
  491.         } while (character != EOF);
  492.  
  493.     }
  494.  
  495.  
  496.     if (count_bytes != 0) {
  497.         count_lines.for_mac++;
  498.         count_lines.for_win++;
  499.         count_lines.for_unix++;
  500.     }
  501.     color(8);
  502.     cout << "\t";
  503.     cout << count_bytes << " bytes" << endl;
  504.  
  505.     cout << "\t";
  506.     cout << count_lines.for_mac << " lines MAC" << endl;
  507.  
  508.     cout << "\t";
  509.     cout << count_lines.for_win << " lines WIN" << endl;
  510.  
  511.     cout << "\t";
  512.     cout << count_lines.for_unix << " lines UNIX" << endl;
  513.  
  514.     if (par_needle != "") {
  515.         cout << "\t";
  516.         cout << count_needles << " x needles " << endl;
  517.     }
  518.  
  519. }
  520.  
  521.  
  522. // HELP (called by parameter /?)  ========================================== //
  523. void Crawler::help()
  524. {
  525.     cout << endl;
  526.     color(7); cout << " +--------------------------=====:::| ";
  527.     color(9); cout << "Help ";
  528.     color(7); cout << "|:::=====--------------------------+";
  529.     cout << endl;
  530.     cout << endl;
  531.     color (15);
  532.  
  533.     cout << "    " << "?  "; color(8); cout << " ............... "; color(15);
  534.     cout << "Print help." << endl;
  535.  
  536.     cout << "    " << "P= "; color(8); cout << " ............... "; color(15);
  537.     cout << "Set path of file" <<  endl;
  538.  
  539.     cout << "    " << "F= "; color(8); cout << " ............... "; color(15);
  540.     cout << "Searching words" << endl;
  541.  
  542.     cout << "    " << "E= "; color(8); cout << " ............... "; color(15);
  543.     cout << "Filtr of extention" << endl;
  544.     cout << endl;
  545.     color(7); cout << " +----------------------------------------------------------------------------+";
  546.     cout << endl;
  547.     color (15);
  548.  
  549. }
  550.  
  551. // COLOR (sets color of command line)  ======================================= //
  552. void Crawler::color (int number_of_color)
  553. {
  554.    //DOS color
  555.    HANDLE dos_color;
  556.    dos_color = GetStdHandle(STD_OUTPUT_HANDLE);
  557.    SetConsoleTextAttribute(dos_color, number_of_color);
  558. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement