Guest User

Untitled

a guest
Jan 31st, 2018
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.02 KB | None | 0 0
  1. <?php
  2. // variable for the selection of the data source and for the distinction of cases
  3. $year=2012;
  4.  
  5. $dsn="mysql:dbname=wikiwm".$year.";host=127.0.0.1";
  6. $user='';
  7. $password='';
  8.  
  9. if($year == 2012){
  10. // 2012
  11. $queries = array(
  12. 'Articles-discussion-2012-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20120705000000',
  13. 'Discussion-2012-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120705000000',
  14. 'Articles-2012-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120705000000',
  15.  
  16. 'Articles-discussion-2012-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20120719000000',
  17. 'Discussion-2012-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120719000000',
  18. 'Articles-2012-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120719000000',
  19.  
  20. 'Articles-discussion-2012-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20120802000000',
  21. 'Discussion-2012-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120802000000',
  22. 'Articles-2012-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120802000000',
  23.  
  24. 'Articles-discussion-2012-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20120816000000',
  25. 'Discussion-2012-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120816000000',
  26. 'Articles-2012-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120816000000',
  27.  
  28. 'Articles-discussion-2012-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0',
  29. 'Discussion-2012-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0',
  30. 'Articles-2012-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0');
  31. }else{
  32. // 2011
  33. $queries = array(
  34. 'Articles-discussion-2011-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20110608000000',
  35. 'Discussion-2011-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110608000000',
  36. 'Articles-2011-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110608000000',
  37.  
  38. 'Articles-discussion-2011-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20110622000000',
  39. 'Discussion-2011-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110622000000',
  40. 'Articles-2011-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110622000000',
  41.  
  42. 'Articles-discussion-2011-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20110629000000',
  43. 'Discussion-2011-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110629000000',
  44. 'Articles-2011-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110629000000',
  45.  
  46. 'Articles-discussion-2011-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20110706000000',
  47. 'Discussion-2011-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110706000000',
  48. 'Articles-2011-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110706000000',
  49.  
  50. 'Articles-discussion-2011-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0',
  51. 'Discussion-2011-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0',
  52. 'Articles-2011-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0');
  53. }
  54.  
  55. try{
  56. $d = new PDO($dsn,$user,$password);
  57. } catch(PDOException $e) {
  58. echo 'Connection failed: ' . $e->getMessage();
  59. }
  60. // exclude zero, because it marks a system user and cannot be used as a vertices in Pajek/Gephi
  61. $sql = 'select distinct rev_user, rev_user_text from revision where rev_user > 0';
  62. $sth = $d->prepare($sql);
  63. $sth->execute();
  64.  
  65. // preparing data for sna
  66. $vertices = '';
  67. if($year == 2012){
  68. $i = 0;
  69. }else{
  70. $i = 1;
  71. }
  72. foreach ($sth->fetchAll() as $entry) {
  73. $vertices .= $entry['rev_user'] . " \"" . $entry['rev_user_text'] . "\"\n";
  74. $i++;
  75. }
  76.  
  77. $vertices = '*Vertices ' . $i . "\n" . $vertices;
  78.  
  79. foreach ($queries as $key => $sql) {
  80. $edges = "*Edges\n";
  81.  
  82. $sth = $d->prepare($sql);
  83. $sth->execute();
  84.  
  85. $pages = array();
  86.  
  87. foreach ($sth->fetchAll() as $entry) {
  88. $pages[$entry['rev_page']][] = $entry['rev_user'];
  89. }
  90.  
  91. // for all pages
  92. for ($i = 0; $i < count($pages); $i++) {
  93. $focus = current($pages);
  94.  
  95. // connect all users with each other
  96. for ($j = 0; $j < count($focus); $j++) {
  97. for ($k = ($j + 1); $k < (count($focus)); $k++) {
  98. $edges .= $focus[$j] . ' ' . $focus[$k] . "\n";
  99. }
  100. }
  101.  
  102. $pj = next($pages);
  103. }
  104.  
  105. file_put_contents("data/$key",$vertices . $edges);
  106. }
  107. echo "Done!";
  108. ?>
Add Comment
Please, Sign In to add comment