Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * CRAWLER.CPP
- *
- * Methods for Crawler class - File Crawler
- * ========================================
- *
- * Ondrej Konecny
- *
- * 27 April 2009
- */
- // NAME SPACING =================================================== //
- using namespace std;
- // INCLUDES ========================================================= //
- #include <stdlib.h>
- #include <iostream>
- #include <dirent.h>
- #include "crawler.h"
- // CONSTRUCTOR ========================================================= //
- Crawler::Crawler(int argc, char *argv[])
- {
- //--- initializing of variables
- par_startable = 0;
- par_finding = 0;
- par_needle = "";
- par_extension = "txt";
- par_path = ".";
- list_counter = 0;
- //--- - - - - - -
- string argument;
- color(30);
- cout << " FILE CRAWLER 0.9 beta (c) 2009 Ondrej Konecny, Kamil Havlicek " << endl;
- if (argc > 1) {
- argument = string(argv[1]);
- if (argument == "?" || argument.substr(0, 2) == "/?" || argument.substr(0, 2) == "\?") {
- help();
- } else {
- for (int i = 1; i < argc; i++) {
- argument = string(argv[i]); // because substr is from string module
- if (argument.substr(0, 2) == "F=" || argument.substr(0, 2) == "f=") {
- par_startable = 1;
- par_finding = 1;
- par_needle = argument.substr(2, argument.length() - 2);
- color(9);
- cout << " FIND = ";
- color(15);
- cout << par_needle << endl;
- } else if (argument.substr(0, 2) == "E=" || argument.substr(0, 2) == "e=") {
- par_startable = 1;
- par_extension = argument.substr(2, argument.length() - 2);
- color(9);
- cout << " EXT = ";
- color(15);
- cout << par_extension << endl;
- } else if (argument.substr(0, 2) == "P=" || argument.substr(0, 2) == "p=") {
- par_startable = 1;
- par_path = argument.substr(2, argument.length() - 2);
- color(9);
- cout << " PATH = ";
- color(15);
- cout << par_path << endl;
- }
- }
- // global variables of parametres was set
- // now we can call main starting function
- if (par_startable == 1) {
- color(8);
- cout << "\n\n>>";
- color(10);
- cout << " Browsing...\n\n" << endl;
- browse(par_path);
- list_sort();
- //list_print();
- write_files();
- } else {
- color(15);
- cout << "Parameter error. Write parameter \"?\" for more information." << endl << endl << endl;
- }
- }
- } else {
- color(15);
- cout << "No parameter found. Write parameter \"?\" for more information." << endl << endl << endl;
- }
- }
- // DESTRUCTOR ========================================================= //
- Crawler::~Crawler()
- {
- color(8); //get back system gray color
- cout << endl; // new line
- system("PAUSE");
- }
- // START (construcctor calls this method) ================================= //
- void Crawler::browse(string browsed_path)
- {
- //piece of code for browsing
- DIR *p_directory;
- struct dirent *p_element; //file or directory in open directory
- p_directory = opendir(browsed_path.c_str());
- if (!p_directory) {
- cout << "Open error";
- } else {
- //successfully opened
- // we can browse its content
- string test_path;
- int is_file;
- while ((p_element = readdir(p_directory))) {
- test_path = browsed_path + "/" + p_element->d_name;
- if (opendir(test_path.c_str())) {
- //it's a dir
- is_file = 0;
- if (string(p_element->d_name) != "." && string(p_element->d_name) != "..") {
- browse(test_path);
- // adding path to end of array
- list_add(test_path, is_file);
- }
- } else {
- //it's a file
- color(10);
- //calling exploring functions
- is_file = 1;
- // adding path to end of array
- if (par_extension == "") {
- list_add(test_path, is_file);
- } else {
- if (test_path.substr(test_path.rfind(".") + 1 , par_extension.size()) == par_extension && (test_path.size() - test_path.rfind(".") - 1 == par_extension.size())) {
- list_add(test_path, is_file);
- }
- }
- }
- }
- closedir(p_directory);
- }
- }
- // LIST_ADD ================================================================ //
- void Crawler::list_add(string test_path, int is_file)
- {
- //DISC_STRUCTURE *pom = (DISC_STRUCTURE*) malloc(sizeof(DISC_STRUCTURE));
- DISC_STRUCTURE *list_cell = new DISC_STRUCTURE[sizeof(DISC_STRUCTURE)];
- list_cell->is_file = is_file;
- list_cell->path = test_path;
- list_cell->p_next = NULL;
- if (++list_counter == 1) {
- list_begin = list_cell;
- } else {
- list_cell_previous->p_next = list_cell;
- }
- list_cell_previous = list_cell;
- }
- // LIST_SORT ================================================================ //
- void Crawler::list_sort()
- {
- DISC_STRUCTURE *p_cell;
- DISC_STRUCTURE temp_structure;
- p_cell = list_begin;
- int changed;
- while (p_cell) {
- changed = 0;
- p_cell = p_cell->p_next;
- if ( p_cell->p_next == NULL) {
- p_cell = list_begin;
- } else {
- if ( strcmp(((p_cell->p_next)->path).c_str(), (p_cell->path).c_str() ) < 0) {
- temp_structure.path = p_cell->path;
- temp_structure.is_file = p_cell->is_file;
- p_cell->path = (p_cell->p_next)->path;
- p_cell->is_file = (p_cell->p_next)->is_file;
- (p_cell->p_next)->path = temp_structure.path;
- (p_cell->p_next)->is_file = temp_structure.is_file;
- changed = 1;
- }
- }
- if (changed == 0) {
- break;
- }
- }
- }
- // LIST_PRINT ================================================================ //
- void Crawler::list_print() {
- DISC_STRUCTURE *p_cell;
- p_cell = list_begin;
- while (p_cell) {
- cout << p_cell->path << " - " << p_cell->is_file;
- p_cell = p_cell->p_next;
- cout << endl;
- }
- }
- // WRITE_FILES ================================================================ //
- void Crawler::write_files() {
- //summary counts
- COUNT_LINES sum_count_lines;
- sum_count_lines.for_mac = 0;
- sum_count_lines.for_win = 0;
- sum_count_lines.for_unix = 0;
- int sum_count_needles = 0;
- long long int sum_count_bytes = 0;
- int count_directories = 0;
- int count_files = 0;
- DISC_STRUCTURE *p_cell;
- p_cell = list_begin;
- FILE *file;
- while (p_cell) {
- if (p_cell->is_file == 1) {
- count_files++;
- color(8);
- cout << ">> ";
- color(14);
- cout << p_cell->path;
- cout << endl;
- file = fopen( (p_cell->path).c_str() , "rb");
- if (file == NULL) {
- color(12);
- cout << "\tAccess denied" << endl;
- color(14);
- } else {
- filescanner(file);
- fclose(file);
- sum_count_lines.for_mac += count_lines.for_mac;
- sum_count_lines.for_unix += count_lines.for_unix;
- sum_count_lines.for_win += count_lines.for_win;
- sum_count_bytes += count_bytes;
- sum_count_needles += count_needles;
- }
- } else {
- count_directories++;
- }
- p_cell = p_cell->p_next; //next cell
- }
- //write on screen summary
- cout << endl;
- color(7); cout << " +----------------------=====:::| ";
- color(9); cout << "Informations ";
- color(7); cout << "|:::=====----------------------+";
- cout << endl;
- cout << endl;
- color (15);
- cout << " " << "MAC Lines"; color(8); cout << " ............... "; color(15);
- cout << sum_count_lines.for_mac << endl;
- cout << " " << "WIN Lines"; color(8); cout << " ............... "; color(15);
- cout << sum_count_lines.for_win << endl;
- cout << " " << "UNIX Lines"; color(8); cout << " .............. "; color(15);
- cout << sum_count_lines.for_unix << endl;
- cout << " " << "Total bytes"; color(8); cout << " ............. "; color(15);
- cout << sum_count_bytes << endl;
- cout << " " << "Total needles"; color(8); cout << " ........... "; color(15);
- cout << sum_count_needles << endl;
- cout << " " << "Total files"; color(8); cout << " ............. "; color(15);
- cout << count_files << endl;
- cout << " " << "Total directories"; color(8); cout << " ....... "; color(15);
- cout << count_directories << endl;
- cout << endl;
- color(7); cout << " +----------------------------------------------------------------------------+";
- cout << endl;
- color (15);
- }
- // ISTHERELINE ================================================================ //
- void Crawler::isthereline(int current_char, int past_char, COUNT_LINES *count_lines)
- {
- if (current_char == 10 ) {
- //unix (only 10)
- count_lines->for_unix++;
- if (past_char == 13) {
- //windows (13 then 10)
- count_lines->for_win++;
- }
- } else if (current_char == 13) {
- //macinotsh (only 13)
- count_lines->for_mac++;
- }
- }
- // FILESCANNER ================================================================ //
- void Crawler::filescanner(FILE *file)
- {
- //finding needle (and counting of needles), line-counting, byte-counting
- int character;
- int past_character;
- count_bytes = -1;
- count_lines.for_mac = 0;
- count_lines.for_win = 0;
- count_lines.for_unix = 0;
- color(12);
- count_needles = 0;
- if (par_finding == 0) {
- do {
- character = fgetc(file);
- count_bytes++;
- isthereline(character, past_character, &count_lines);
- past_character = character;
- } while (character != EOF);
- } else {
- // finding enabled
- int *queue_past_characters;
- queue_past_characters = new int[par_needle.length() + 1];
- int needle_found;
- count_needles = 0;
- do {
- character = fgetc(file);
- count_bytes++;
- isthereline(character, past_character, &count_lines);
- past_character = character;
- //--- begin of finding needle
- for (unsigned int i = 1; i <= par_needle.length(); i++) {
- queue_past_characters[i-1] = queue_past_characters[i];
- }
- queue_past_characters[par_needle.length()] = character;
- needle_found = 1;
- char one_character;
- for (unsigned int i=0; i < par_needle.length(); i++) {
- par_needle.copy(&one_character, 1, i);
- if (queue_past_characters[i] != (int)one_character ) {
- needle_found = 0;
- break;
- }
- }
- if (needle_found == 1) {
- count_needles++;
- }
- //--- end of findings
- color(10);
- } while (character != EOF);
- }
- if (count_bytes != 0) {
- count_lines.for_mac++;
- count_lines.for_win++;
- count_lines.for_unix++;
- }
- color(8);
- cout << "\t";
- cout << count_bytes << " bytes" << endl;
- cout << "\t";
- cout << count_lines.for_mac << " lines MAC" << endl;
- cout << "\t";
- cout << count_lines.for_win << " lines WIN" << endl;
- cout << "\t";
- cout << count_lines.for_unix << " lines UNIX" << endl;
- if (par_needle != "") {
- cout << "\t";
- cout << count_needles << " x needles " << endl;
- }
- }
- // HELP (called by parameter /?) ========================================== //
- void Crawler::help()
- {
- cout << endl;
- color(7); cout << " +--------------------------=====:::| ";
- color(9); cout << "Help ";
- color(7); cout << "|:::=====--------------------------+";
- cout << endl;
- cout << endl;
- color (15);
- cout << " " << "? "; color(8); cout << " ............... "; color(15);
- cout << "Print help." << endl;
- cout << " " << "P= "; color(8); cout << " ............... "; color(15);
- cout << "Set path of file" << endl;
- cout << " " << "F= "; color(8); cout << " ............... "; color(15);
- cout << "Searching words" << endl;
- cout << " " << "E= "; color(8); cout << " ............... "; color(15);
- cout << "Filtr of extention" << endl;
- cout << endl;
- color(7); cout << " +----------------------------------------------------------------------------+";
- cout << endl;
- color (15);
- }
- // COLOR (sets color of command line) ======================================= //
- void Crawler::color (int number_of_color)
- {
- //DOS color
- HANDLE dos_color;
- dos_color = GetStdHandle(STD_OUTPUT_HANDLE);
- SetConsoleTextAttribute(dos_color, number_of_color);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement