Advertisement
tpaper

statgen 0.5

Nov 26th, 2015
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 10.87 KB | None | 0 0
  1. <?php
  2.  
  3. //V0.5 26 Nov 2015
  4.  
  5. /*
  6.     This script was made by Enrico `tpaper' Ronconi in Nov 2015
  7.     <ronconi.enrico@yahoo.it>
  8.    
  9.     The original script was originally written by me in Nov 2014, but since
  10.     every line of that code was like:
  11.    
  12.     "$pizza_and_ravioli == shalabalaba_func($random_shitty_name,$lel_xd)"
  13.    
  14.     I decided to re-write from zero it and I tried to keep that code human
  15.     readable this time.
  16.    
  17.     --
  18.    
  19.     You can do everything you want with this, I don't ask you money, but
  20.     PLEASE, CREDIT ME.
  21.     Thank You.
  22.     And sorry for my bad English, but you know, I'm Italian, so...
  23. */
  24.  
  25. /*
  26.     This program is free software: you can redistribute it and/or modify it
  27.     under the terms of the GNU General Public License as published by the
  28.     Free Software Foundation, either version 3 of the License, or (at your
  29.     option) any later version.
  30.  
  31.     This program is distributed in the hope that it will be useful, but
  32.     WITHOUT ANY WARRANTY; without even the implied warranty of
  33.     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  34.     General Public License for more details.
  35.  
  36.     You should have received a copy of the GNU General Public License along
  37.     with this program. If not, see <http://www.gnu.org/licenses/>.
  38. */
  39.  
  40. /*
  41.     What it does:
  42.     This script dumps infos about posts in a specific given group.
  43.     It saves that data into CVS file and (if required) it imports that data
  44.     in a MySQL table automatically.
  45.    
  46.     How it works:
  47.     First, it send an http request to Facebook graph API, then the response
  48.     (given in JSON format) is parsed, and the data stored into CVS file.
  49.     The response contains also a pointer to the next "page" of posts, so
  50.     the script continues to parse the other page and so on until the end of
  51.     the feed.
  52.    
  53.     The information stored are listed below:
  54.    
  55.     -- V 0.5
  56.         | - Post unique ID
  57.         | - Post author (unique ID and name)
  58.         | - Post type (eg. state, photo, link ...)
  59.         | - Post creation date
  60.         | - Number of likes to the post
  61.         | - Number of comments to the post
  62.         | - Author (unique ID and name) of the first post comment (if any)
  63.        
  64.     This script is designed do be ran in a unix-like system, anyway since
  65.     PHP and MySQL can run under multiple OS, it maybe will work under
  66.     Windows or other OS.
  67.    
  68.     This script was tested on a Rasperry Pi running raspian, with PHP 5.4.
  69.    
  70.     Since the script writes every line almost in real time, you can control
  71.     how it's going on by watching the output file.
  72.    
  73. */
  74.  
  75. //-------------------------------------
  76. //  GENERAL SETTINGS
  77. //-------------------------------------
  78.  
  79. $verbose = true;                            //If true, displays debug messages
  80. $log_filename = 'get_data.log';             //Log file name, IF EXISTING, IT WILL BE DELETED
  81. $out_filename = 'out.csv';                  //Output file name
  82. $mysql_auto = false;                        //If true, the script imports this data into a mysql table automatically
  83. $post_limit = false;                        //Max number of post stored. If set to false, it will continue until the feed ends
  84.  
  85. //You must provide a valid Facebook access token of a user that have access to the group
  86. $fb_access_token = '';
  87. $fb_group_id = '';                          //Target group id
  88.  
  89. //-------------------------------------
  90. //  MySQL OPTIONS
  91. //-------------------------------------
  92. //  Necessary only if $mysql_auto is set to true.
  93.  
  94. $MYSQL_host = 'localhost';                  //Where the MySQL server is hosted
  95. $MYSQL_port = '3306';                       //MySQL server port
  96. $MYSQL_username = 'php';                    //MySQL username. Obviously that user must have permission to insert data into $MYSQL_table
  97. $MYSQL_password = '';                       //If you need explanation for this var, you have to consider the idea of deleting this script from your pc and go swimming
  98. $MYSQL_db = '';                             //MySQL database name
  99. $MYSQL_table = '';                          //MySQL table name
  100.  
  101. $MYSQL_fields_list = ['ID','uid','uname','type','date','ln','cn','fc_uid','fc_uname']; //Fields name in the MySQL table. This order MUST be the same of the order in the parse_and_write function.
  102. /*
  103.     Table recommended format:
  104.    
  105.     +----------+-------------+------+-----+---------+-------+
  106.     | Field    | Type        | Null | Key | Default | Extra |
  107.     +----------+-------------+------+-----+---------+-------+
  108.     | ID       | varchar(33) | NO   | PRI | NULL    |       |
  109.     | uid      | varchar(17) | NO   |     | NULL    |       |
  110.     | uname    | tinytext    | NO   |     | NULL    |       |
  111.     | type     | varchar(10) | NO   |     | NULL    |       |
  112.     | date     | datetime    | NO   |     | NULL    |       |
  113.     | ln       | smallint(6) | NO   |     | NULL    |       |
  114.     | cn       | smallint(6) | NO   |     | NULL    |       |
  115.     | fc_uid   | varchar(17) | YES  |     | NULL    |       |
  116.     | fc_uname | tinytext    | YES  |     | NULL    |       |
  117.     +----------+-------------+------+-----+---------+-------+
  118. */ 
  119.  
  120. //-------------------------------------
  121. //  ADVANCED SETTINGS
  122. //-------------------------------------
  123. //  If not sure, just don't touch this. In most cases you don't have to
  124. //  modify this in order to run the script.
  125.  
  126. /*
  127. List of the fields that the script will ask to Facebook. Notice that ADD a
  128. field DOESN'T MAKE the script save that field value. If you want to store
  129. another field you have to modify this parameter and the functions
  130. parse_and_write and mysql_import.
  131. */
  132. $fb_fileds = ['id','from','created_time','type','likes.summary(true).limit(0)','comments.summary(true).limit(1)'];
  133. //default: $fb_fileds = ['id','from','created_time','type','likes.summary(true).limit(0)','comments.summary(true).limit(1)'];
  134.  
  135. /*
  136. Number of post asked per http request. Since Facebook limits the amount of
  137. data that you can retrieve from its servers in one request, if you set this
  138. value too big Facebook will be very angry and will reply your http request
  139. with a "fuck off" instead of the data wanted. And nothing will work
  140. */
  141. $fb_limit = 700;
  142. //default: $fb_limit = 700;
  143.  
  144. $http_tries = 10;                           //Number of http tries before give up
  145. $http_delay = 100;                          //Delay between http request tries (mills)
  146.  
  147. $table_record_sep = ',';                    //Fields separator in the csv file
  148. $table_row_sep = "\n";                      //Row separator in the csv file
  149.  
  150. //Escaped version of previous separators: (for MySQL importing)
  151. $table_record_sep_esc = ',';
  152. $table_row_sep_esc = "\\n";
  153.  
  154. //-------------------------------------
  155. //  END OF SETTINGS
  156. //-------------------------------------
  157.  
  158. function logthis($message){
  159.     /*
  160.     Log the message with timestamp into the file handled by $log_hande.
  161.     If $log_handle is null, no log is written.
  162.     If $verbose is set to true, it also displays the message.
  163.     */
  164.    
  165.     global $verbose,$log_handle;
  166.    
  167.     $micros = microtime();
  168.     $micros = explode(" ",$micros);
  169.     $micros = explode('.',$micros[0]);
  170.     $micros = $micros[1];
  171.     $timestamp = date("d M Y - H:i:s");
  172.     $timestamp = $timestamp.".$micros";
  173.    
  174.     $message = $timestamp." --- B0SS SAYS:: ".$message."\n";
  175.     if($verbose) echo($message);
  176.     if(is_null($log_handle)) return;
  177.     fwrite($log_handle,$message);
  178.     return;
  179. }
  180.  
  181. function build_url($token,$fields,$limit,$group_id){
  182.     /*
  183.     Given an access token, an array with fields list, an integer limit and
  184.     a group id this function builds an URL to query the graph API.
  185.     */
  186.    
  187.     return("https://graph.facebook.com/$group_id/feed?fields=".implode(',',$fields)."&limit=$limit&access_token=$token");
  188. }
  189.  
  190. function http_request($URL,$max_tries,$delay = 100){
  191.     /*
  192.     Send HTTP request and retrieve the response. If get an http error, it
  193.     tries again $max_tries times every $delay milliseconds
  194.     */
  195.     $tries = 0;
  196.     $out = false;
  197.     while(($tries < $max_tries) && ($out == false)){
  198.         $tries++;
  199.         logthis("tryng to open $URL (try $tries of $max_tries)...");
  200.         $out = @file_get_contents($URL);
  201.         if($out == true) {
  202.             logthis("Success!");
  203.             return $out;
  204.         } else logthis("Failed to open URL.");
  205.         usleep($delay * 1000);
  206.     }
  207.    
  208.     return $out;   
  209. }
  210.  
  211. function writeout($record,$out_handle){
  212.     /*
  213.     Write out on a file a record;
  214.     */
  215.     global $table_record_sep,$table_row_sep;
  216.     fwrite($out_handle,implode($table_record_sep,$record).$table_row_sep);
  217. }
  218.  
  219. function parse_and_write($json_response,$out_handle){
  220.         /*
  221.         This function parse the json code and writes on file a CVS table
  222.         with the data.
  223.         Returns the URL pointer to the next "page"
  224.         */
  225.         global $stored,$post_limit,$table_record_sep;
  226.        
  227.         if(!$json_response) return false;
  228.         $out = json_decode($json_response);
  229.        
  230.         foreach($out->{'data'} as $post){
  231.             $post_id = $post->{'id'};
  232.             $user_id = $post->{'from'}->{'id'};
  233.             $user_name = str_replace($table_record_sep,' ',$post->{'from'}->{'name'});
  234.             $type = $post->{'type'};
  235.             $created = explode('+',$post->{'created_time'});
  236.             $likes = $post->{'likes'}->{'summary'}->{'total_count'};
  237.             $comments = $post->{'comments'}->{'summary'}->{'total_count'};
  238.             if(isset($post->{'comments'}->{'data'}[0])) {
  239.                 $fc = $post->{'comments'}->{'data'}[0];
  240.                 $fc_user_id = $fc->{'from'}->{'id'};
  241.                 $fc_user_name = $fc->{'from'}->{'name'};
  242.             } else {
  243.                 $fc_user_id = null;
  244.                 $fc_user_name = null;
  245.             }
  246.             writeout([$post_id,$user_id,$user_name,$type,$created[0],$likes,$comments,$fc_user_id,$fc_user_name],$out_handle);
  247.             $stored++;
  248.             if(($stored >= $post_limit) && $post_limit) return false;
  249.         }
  250.        
  251.         if(isset($out->{'paging'}->{'next'})) return $out->{'paging'}->{'next'};
  252.         return false;
  253. }
  254.  
  255. function mysql_import($input_filename){
  256.     global $MYSQL_host, $MYSQL_username, $MYSQL_password, $MYSQL_db, $MYSQL_port, $MYSQL_table, $table_record_sep_esc, $table_row_sep_esc, $MYSQL_fields_list;
  257.    
  258.     logthis("Opening MySql connection...");
  259.     $MyHandle = @new mysqli($MYSQL_host, $MYSQL_username, $MYSQL_password, $MYSQL_db, $MYSQL_port);
  260.     if($MyHandle->connect_error) {
  261.         $mess = 'Error while connection (ErrNo: '.$MyHandle->connect_errno.' - '.$MyHandle->connect_error.")!\nCannot connect to database!";
  262.         logthis($mess);
  263.         return;
  264.     }
  265.     $MyHandle->set_charset("utf8");
  266.    
  267.     logthis("Executing LOAD DATA query...");
  268.    
  269.     $out = $MyHandle->query("LOAD DATA LOCAL INFILE '$input_filename' INTO TABLE $MYSQL_table FIELDS TERMINATED BY '$table_record_sep_esc' LINES TERMINATED BY '$table_row_sep_esc' (".implode(",",$MYSQL_fields_list).")");
  270.    
  271.     if(!$out) {
  272.         logthis("Error while importing data into MySQL table! ".$MyHandle->error);
  273.         return;
  274.     }
  275.    
  276.     logthis("Imported! ".$MyHandle->warning_count." warnings.");
  277.    
  278.     $MyHandle->close();
  279. }
  280.  
  281. $log_handle = fopen($log_filename,"w");                                 //Open log file
  282. $outfile = fopen($out_filename,"w");                                    //Open csv out file
  283.  
  284. logthis("ey b0ss!");                                                    //Welcome message
  285.  
  286. logthis("Collecting data and saving to file...");
  287. $stored = 0;
  288. $next = build_url($fb_access_token,$fb_fileds,$fb_limit,$fb_group_id);  //First URL
  289. while($next) $next = parse_and_write(http_request($next,$http_tries,$http_delay),$outfile); //Loop until feed end
  290. fclose($outfile);                                                       //Close output file
  291. logthis("$stored posts was saved successfully");
  292.  
  293. if($mysql_auto) mysql_import($out_filename);
  294.  
  295. logthis("bye b0ss!");                                                   //Goodbye message
  296.  
  297. fclose($log_handle);                                                    //Close log file
  298. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement