Advertisement
kwasinski

data_retriever.php

Feb 28th, 2015
321
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 3.68 KB | None | 0 0
  1. <?php //Author:: vkwasinski@gmail.com
  2. /*
  3.     Scraps a page and retrieve these informations below {
  4.         Which state has more winners, V
  5.         What is the awards average for which state,
  6.         Throws a ordered list from the tens cases,
  7.     }
  8. */
  9.  
  10. include 'class/include.php';
  11.  
  12.  
  13. $url = isset($argv[1])? $argv[1]: null;
  14. if (!$url)  
  15.     exit( 'URL missing'."\n");
  16.  
  17. $html_parser = new simple_html_dom();
  18. $page_scrapper = new MyScrapper($url);
  19.    
  20. print 'scraping from '.$url."\n";
  21. $scraped_page = $page_scrapper->curl_scrap();
  22.  
  23. if (!$scraped_page)
  24.     exit('Something went wrong while scraping: '. $url. "\n");
  25. print 'Page Scrapped, checking and save if necessary'."\n";
  26. $scrap_isOk = $page_scrapper->check_n_save_page($scraped_page['scraped_page'], $scraped_page['filetime']);
  27.  
  28. print 'Retrieving data from file...'."\n";
  29. //lets retrieve the data required!
  30. $html_parser->load_file('megasena.html');
  31. $tr = $html_parser->find('tr');
  32.  
  33. $all_states = array();
  34. foreach ($tr as $element) {
  35.  
  36.     $state_td = $element->children(11);
  37.     preg_match("/\<td\>(\w\w)/", $state_td, $matches);
  38.     if (!isset($matches[1]))
  39.         continue;
  40.     $state = $matches[1];
  41.     if (!array_key_exists($state, $all_states))
  42.         $all_states[$state] = array();
  43.  
  44.     $total_winners = 0;
  45.     foreach (array('sena_winners' => 9, 'quina_winners' => 13, 'quadra_winners' => 15) as $game => $column_number) {
  46.         $winners_per_game = $element->children($column_number);
  47.         preg_match("/\>(\d+)/", $winners_per_game, $matches);
  48.  
  49.         if(isset($matches[1]))
  50.             $winners_per_game = (int) $matches[1];
  51.  
  52.         if (isset($all_states[$state][$game])) {
  53.             $all_states[$state][$game] += $winners_per_game;
  54.             $all_states[$state]['total_winners'] += $winners_per_game;
  55.  
  56.         } else {
  57.             $all_states[$state][$game] = $winners_per_game;
  58.             $all_states[$state]['total_winners'] = $winners_per_game;
  59.  
  60.         }
  61.         $total_winners += $winners_per_game;
  62.     }
  63.  
  64. }
  65.  
  66. $array_ordered_tens = array();
  67. foreach ($tr as $element) {
  68.     foreach (array(
  69.         'first_tens' => 2,
  70.         'seccond_tens' => 3,
  71.         'third_tens' => 4,
  72.         'fourth_tens' => 5,
  73.         'fifth_tens' => 6,
  74.         'sixth_tens' => 7,
  75.      ) as $dozen => $number) {
  76.  
  77.         $winners_per_game = $element->children($number);
  78.         preg_match("/\>(\d+)/", $winners_per_game, $matches);  
  79.  
  80.         if (!isset($matches[1]))
  81.             continue;
  82.  
  83.         $dozen_number = $matches[1];
  84.         if (isset($array_ordered_tens[$dozen])) {
  85.                 $array_ordered_tens[$dozen][] = $dozen_number;
  86.         } else {
  87.             $array_ordered_tens[$dozen][] = $dozen_number;
  88.         }
  89.        
  90.     }
  91. }
  92. //retriveing the state with more winners by grabbing the array with the highest value number and retrieving the key
  93. $state_with_more_winners = array_keys($all_states, max($all_states));
  94. $state_with_more_winners = $state_with_more_winners[0];
  95.  
  96. //average between the games quina, sena and quadra.
  97. $game_statistics = array();
  98. foreach ($all_states as $state => $properties) {
  99.     $game_statistics[$state]['average_winners'] = ($properties['quina_winners'] + $properties['sena_winners'] + $properties['quadra_winners']) / 3;
  100. }
  101.  
  102. $file_handle = fopen('result.txt', 'w');
  103.  
  104. ob_start();
  105. var_dump($game_statistics);
  106. $game_statistics = ob_get_clean(); //getting the buffer output of var_dump()
  107.  
  108. ob_start();
  109. var_dump($array_ordered_tens);
  110. $array_ordered_tens = ob_get_clean();
  111.  
  112. fwrite($file_handle, 'Candidate: Vinícius Kwasinski'."\n\n");
  113. fwrite($file_handle, 'State with more winners is: '. $state_with_more_winners."\n\n");
  114. fwrite($file_handle, 'The list with the awards average for which state : '."\n". $game_statistics."\n\n");
  115. fwrite($file_handle, 'An ordered list this tens case: '. $array_ordered_tens);
  116. fclose($file_handle);
  117.  
  118. print 'Sucessful, please open \'result.txt\'  and find the answers.'."\n";
  119.  
  120. //thank you for the challange.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement