Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- // variable for the selection of the data source and for the distinction of cases
- $year=2012;
- $dsn="mysql:dbname=wikiwm".$year.";host=127.0.0.1";
- $user='';
- $password='';
- if($year == 2012){
- // 2012
- $queries = array(
- 'Articles-discussion-2012-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20120705000000',
- 'Discussion-2012-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120705000000',
- 'Articles-2012-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120705000000',
- 'Articles-discussion-2012-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20120719000000',
- 'Discussion-2012-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120719000000',
- 'Articles-2012-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120719000000',
- 'Articles-discussion-2012-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20120802000000',
- 'Discussion-2012-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120802000000',
- 'Articles-2012-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120802000000',
- 'Articles-discussion-2012-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20120816000000',
- 'Discussion-2012-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120816000000',
- 'Articles-2012-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20120816000000',
- 'Articles-discussion-2012-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0',
- 'Discussion-2012-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0',
- 'Articles-2012-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0');
- }else{
- // 2011
- $queries = array(
- 'Articles-discussion-2011-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20110608000000',
- 'Discussion-2011-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110608000000',
- 'Articles-2011-1.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110608000000',
- 'Articles-discussion-2011-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20110622000000',
- 'Discussion-2011-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110622000000',
- 'Articles-2011-2.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110622000000',
- 'Articles-discussion-2011-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20110629000000',
- 'Discussion-2011-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110629000000',
- 'Articles-2011-3.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110629000000',
- 'Articles-discussion-2011-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0 and rev_timestamp < 20110706000000',
- 'Discussion-2011-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110706000000',
- 'Articles-2011-4.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0 and rev_timestamp < 20110706000000',
- 'Articles-discussion-2011-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and rev_user > 0',
- 'Discussion-2011-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace in (1, 3,5,7,9,11,13,15) and rev_user > 0',
- 'Articles-2011-5.net' => 'SELECT distinct rev_page, rev_user FROM revision,page WHERE rev_page = page_id and page_namespace not in (1, 3,5,7,9,11,13,15) and rev_user > 0');
- }
- try{
- $d = new PDO($dsn,$user,$password);
- } catch(PDOException $e) {
- echo 'Connection failed: ' . $e->getMessage();
- }
- // exclude zero, because it marks a system user and cannot be used as a vertices in Pajek/Gephi
- $sql = 'select distinct rev_user, rev_user_text from revision where rev_user > 0';
- $sth = $d->prepare($sql);
- $sth->execute();
- // preparing data for sna
- $vertices = '';
- if($year == 2012){
- $i = 0;
- }else{
- $i = 1;
- }
- foreach ($sth->fetchAll() as $entry) {
- $vertices .= $entry['rev_user'] . " \"" . $entry['rev_user_text'] . "\"\n";
- $i++;
- }
- $vertices = '*Vertices ' . $i . "\n" . $vertices;
- foreach ($queries as $key => $sql) {
- $edges = "*Edges\n";
- $sth = $d->prepare($sql);
- $sth->execute();
- $pages = array();
- foreach ($sth->fetchAll() as $entry) {
- $pages[$entry['rev_page']][] = $entry['rev_user'];
- }
- // for all pages
- for ($i = 0; $i < count($pages); $i++) {
- $focus = current($pages);
- // connect all users with each other
- for ($j = 0; $j < count($focus); $j++) {
- for ($k = ($j + 1); $k < (count($focus)); $k++) {
- $edges .= $focus[$j] . ' ' . $focus[$k] . "\n";
- }
- }
- $pj = next($pages);
- }
- file_put_contents("data/$key",$vertices . $edges);
- }
- echo "Done!";
- ?>
Add Comment
Please, Sign In to add comment