Advertisement
jvvg

WikiMonitor Source Code

May 18th, 2014
338
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 17.68 KB | None | 0 0
  1. <?php
  2. function get_page_contents($title) {
  3.     $data = '';
  4.     while ($data == '') {
  5.         $data = curl_get('http://wiki.scratch.mit.edu/w/api.php?action=query&titles=' . rawurlencode($title) . '&prop=revisions&rvprop=content&format=xml&salt=' . md5(time()));
  6.     }
  7.     $page_xml = @new SimpleXMLElement($data);
  8.     $contents = (string) ($page_xml->query->pages->page->revisions->rev);
  9.     return $contents;
  10. }
  11.  
  12. function notify_user($user, $type, $info) {
  13.     //function to notify user
  14.     //check if this is on the ignore list
  15.     $page_ignore_list = explode("\n", file_get_contents('conf/ignore.txt')); //check if the page is ignored
  16.     if (in_array($info['page'], $page_ignore_list)) {
  17.         echo $info['page'] . ' is ignored. NOT sending notification.' . "\n";
  18.         return;
  19.     }
  20.     //check if the user's talk page allows bots, and if it does, check if the name is on the nobots override list
  21.     $overridenobots = false;
  22.     $talk = get_page_contents('User_talk:' . $user);
  23.     if (stristr($talk, '{{nobots}}')) { //check for nobots
  24.         preg_match_all('%<nowiki>(.*?)' . preg_quote('{{nobots}}') . '(.*?)</nowiki>%msi', $talk, $nowikimatches);
  25.         preg_match_all('%' . preg_quote('{{nobots}}') . '%msi', $talk, $nobotsmatches);
  26.         if (sizeof($nowikimatches[0]) < sizeof($nobotsmatches[0])) {
  27.             $nobots_override_list = explode("\n", file_get_contents('conf/nobotsoverride.txt')); //check if user is on nobots override list
  28.             if (!in_array($user, $nobots_override_list)) {
  29.                 echo $user . '\'s talk page does not allow bots. Skipping...' . "\n";
  30.                 return;
  31.             } else {
  32.                 $overridenobots = true;
  33.             }
  34.         }
  35.     }
  36.     $dateformat = 'd M Y H:i:s';
  37.     switch ($type) { //generate message
  38.         case 'sign':
  39.             $message = str_replace('($revid)', $info['revid'], str_replace('($page)', $info['page'], UNSIGNED_MESSAGE_BODY));
  40.             echo date($dateformat, time()) . ' ' . $user . ' did not sign post (' . $info['page'] . ' revision ' . $info['revid'] . '), notifying...' . "\n";
  41.             $summary = UNSIGNED_MESSAGE_SUBJECT;
  42.             if (strstr($talk, 'revision ' . $info['revid'] . ']')) {
  43.                 echo 'Already notified. Skipping...' . "\n";
  44.                 return;
  45.             }
  46.             break;
  47.         case 'war':
  48.             $message = 'It appears that you have been edit warring on the page ' . $info['page'] . '. Please stop immediately.';
  49.             $summary = 'Please stop edit warring';
  50.             break;
  51.         case 'excessive':
  52.             $message = str_replace('($count)', $info['count'], str_replace('($page)', $info['page'], RAPID_MESSAGE_BODY));
  53.             echo date($dateformat, time()) . ' Too many edits (' . $info['count'] . ') from ' . $user . ' on page ' . $info['page'] . ', notifying...' . "\n";
  54.             if (stristr($info['page'], 'talk')) {
  55.                 echo 'Ignoring talk pages, skipping...' . "\n";
  56.                 return;
  57.             }
  58.             $summary = RAPID_MESSAGE_SUBJECT;
  59.             if (strstr($talk, 'in quick succession to the page [[' . $info['page'] . ']] recently.')) {
  60.                 echo 'Already notified. Skipping...' . "\n";
  61.                 return;
  62.             }
  63.             break;
  64.         case 'uncat':
  65.             $message = str_replace('($page)', $info['page'], NOCAT_MESSAGE_BODY);
  66.             echo date($dateformat, time()) . ' Uncategorized page: ' . $info['page'] . ' by ' . $user . ', notifying...' . "\n";
  67.             $summary = NOCAT_MESSAGE_SUBJECT;
  68.             if (strstr($talk, 'making the page [[' . $info['page'] . ']], you')) {
  69.                 echo 'Already notified. Skipping...' . "\n";
  70.                 return;
  71.             }
  72.             break;
  73.     }
  74.     $message .= '<br />' . MESSAGE_SUFFIX; //add that it's a bot
  75.     if ($overridenobots) { //mention if user was nobots overridden
  76.         $message .= '<br /><b>Important:</b> although your talk page has the <nowiki>{{NoBots}}</nowiki> template on it, an exception was added for your talk page to override it. See [[User:WikiMonitor#NoBots_override]] for details.';
  77.     }
  78.     $message .= '~~~~';
  79.    
  80.     //submit the edit
  81.     $tokenxml = new SimpleXMLElement(curl_post('http://wiki.scratch.mit.edu/w/api.php?action=query&prop=info|revisions&intoken=edit&titles=User_talk:' . $user . '&format=xml', '', true)); //get token
  82.     $edittoken = (string)$tokenxml->query->pages->page->attributes()->edittoken;
  83.    
  84.     $return = curl_post('http://wiki.scratch.mit.edu/w/api.php', 'action=edit&title=User_talk:' . $user . '&section=new&summary=' . $summary . '&text=' . rawurlencode($message) . '&format=xml&bot=true&token=' . rawurlencode($edittoken)); //submit the edit
  85. }
  86.  
  87. function curl_post($url, $postfields, $refuseblank = false) {
  88.     $ch = curl_init ();
  89.     curl_setopt ( $ch, CURLOPT_URL, $url);
  90.     curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION, 1 );
  91.     curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
  92.     curl_setopt ( $ch, CURLOPT_POST, 1 );
  93.     curl_setopt ( $ch, CURLOPT_POSTFIELDS, $postfields);
  94.     curl_setopt ( $ch, CURLOPT_ENCODING, "" );
  95.     curl_setopt ( $ch, CURLOPT_COOKIEFILE, getcwd () . '/cookies.txt' );
  96.     curl_setopt ( $ch, CURLOPT_COOKIEJAR, getcwd () . '/cookies.txt' );
  97.     curl_setopt ( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6 (.NET CLR 3.5.30729)" );
  98.     $out = '';
  99.     if ($refuseblank) {
  100.         while ($out == '') {
  101.             $out = curl_exec ($ch);
  102.         }
  103.     } else {
  104.         $out = curl_exec($ch);
  105.     }
  106.     curl_close($ch);
  107.     return $out;
  108. }
  109.  
  110. function curl_get($url, $refuseblank = false) {
  111.     $ch = curl_init ();
  112.     curl_setopt ( $ch, CURLOPT_URL, $url);
  113.     curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION, 1 );
  114.     curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
  115.     curl_setopt ( $ch, CURLOPT_ENCODING, "" );
  116.     curl_setopt ( $ch, CURLOPT_COOKIEFILE, getcwd () . '/cookies.txt' );
  117.     curl_setopt ( $ch, CURLOPT_COOKIEJAR, getcwd () . '/cookies.txt' );
  118.     curl_setopt ( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6 (.NET CLR 3.5.30729)" );
  119.     $out = '';
  120.     if ($refuseblank) {
  121.         while ($out == '') {
  122.             $out = curl_exec ($ch);
  123.         }
  124.     } else {
  125.         $out = curl_exec($ch);
  126.     }
  127.     curl_close($ch);
  128.     return $out;
  129. }
  130.  
  131. function submit_edit($title, $contents, $summary, $minor = false) {
  132.     $tokenxml = new SimpleXMLElement(curl_post('http://wiki.scratch.mit.edu/w/api.php?action=query&prop=info|revisions&intoken=edit&titles=' . rawurlencode($title) . '&format=xml', '')); //get token
  133.     $edittoken = (string)$tokenxml->query->pages->page->attributes()->edittoken;
  134.     $return = curl_post('http://wiki.scratch.mit.edu/w/api.php', 'action=edit&title=' . rawurlencode($title) . '&summary=' . $summary . '&text=' . rawurlencode($contents) . '&format=xml&bot=true' . ($minor = true ? '&minor=true' : '') . '&token=' . rawurlencode($edittoken)); //submit the edit
  135. }
  136.  
  137. define('anononly', 1); //uncomment to disable logging in
  138. //log in
  139. $alreadyseen = array();
  140. $already_notified = array();
  141. $logincount = 0;
  142. //get online configuration
  143. preg_match('%<code><nowiki>\{(.*?)\}</nowiki></code>%', get_page_contents('User:WikiMonitor/Configuration/CategoryTemplates'), $matches); //templates that contain a category
  144. $category_templates = explode(',', $matches[1]);
  145. foreach ($category_templates as &$val) {
  146.     $val = '{{' . $val;
  147. }
  148.  
  149. preg_match('%<code><nowiki>\{(.*?)\}</nowiki></code>%', get_page_contents('User:WikiMonitor/Configuration/MessageSuffix'), $matches); //suffix to add to messages
  150. define('MESSAGE_SUFFIX', $matches[1]);
  151.  
  152. preg_match('%<code><nowiki>\{(.*?)\}</nowiki></code>%', get_page_contents('User:WikiMonitor/Configuration/SandboxTimeout'), $matches); //time to wait before clearing sandbox
  153. define('SANDBOX_TIMEOUT', $matches[1]);
  154. preg_match('%<code><pre><nowiki>(.*?)</nowiki></pre></code>%ms', get_page_contents('User:WikiMonitor/Configuration/DefaultSandbox'), $matches); //default sandbox text
  155. define('DEFAULT_SANDBOX_TEXT', $matches[1]);
  156.  
  157. preg_match('%<code><nowiki>\{Subj:(.*?)\}.*?\{Msg:(.*?)\}</nowiki></code>%ms', get_page_contents('User:WikiMonitor/Configuration/UnsignedMessage'), $matches); //unsigned post message
  158. define('UNSIGNED_MESSAGE_SUBJECT', $matches[1]);
  159. define('UNSIGNED_MESSAGE_BODY', $matches[2]);
  160.  
  161. preg_match('%<code><nowiki>\{Subj:(.*?)\}.*?\{Msg:(.*?)\}</nowiki></code>%ms', get_page_contents('User:WikiMonitor/Configuration/NoCategoryMessage'), $matches); //no category message
  162. define('NOCAT_MESSAGE_SUBJECT', $matches[1]);
  163. define('NOCAT_MESSAGE_BODY', $matches[2]);
  164.  
  165. preg_match('%<code><nowiki>\{Subj:(.*?)\}.*?\{Msg:(.*?)\}</nowiki></code>%ms', get_page_contents('User:WikiMonitor/Configuration/RapidEditMessage'), $matches); //rapid editing message
  166. define('RAPID_MESSAGE_SUBJECT', $matches[1]);
  167. define('RAPID_MESSAGE_BODY', $matches[2]);
  168.  
  169. preg_match('%<code><nowiki>\{(.*?)\}</nowiki></code>%', get_page_contents('User:WikiMonitor/Configuration/TooManyEdits'), $matches); //too many edits
  170. $parts = explode(',', $matches[1]);
  171. define('TOO_MANY_EDITS_COUNT', $parts[0]);
  172. define('TOO_MANY_EDITS_TIME', $parts[1]);
  173.  
  174. $clear_sandbox_time = 0; //start out with no clearing sandbox
  175. while (true) {
  176.     if (!defined('anononly')) {
  177.         //log in
  178.         $out = curl_post('http://wiki.scratch.mit.edu/w/api.php', 'action=login&lgname=WikiMonitor&lgpassword=[PASSWORD REMOVED]&format=xml', true);
  179.         $login_xml = new SimpleXMLElement($out);
  180.         $token = (string)$login_xml->login->attributes()->token;
  181.        
  182.         $login_xml = new SimpleXMLElement(curl_post('http://wiki.scratch.mit.edu/w/api.php',  'action=login&lgname=WikiMonitor&lgpassword=[PASSWORD REMOVED]&lgtoken=' . $token . '&format=xml'));
  183.         if ((string)$login_xml->login->attributes()->result != 'Success') {
  184.             echo 'Login failed!'. "\n";
  185.             switch ((string)$login_xml->login->attributes()->result) {
  186.                 case 'Throttled':
  187.                     echo 'Too many recent logins. Please wait ' . (int)$login_xml->login->attributes()->wait . ' seconds.' . "\n"; break;
  188.                 default:
  189.                     print_r($login_xml);
  190.             }
  191.             die;
  192.         }
  193.         $logincount++;
  194.         if ($logincount == 1) {
  195.             echo 'Login success!' . "\n";
  196.         }
  197.     }
  198.    
  199.     for ($cyclecount = 1; $cyclecount <= 30; $cyclecount++) { //every 30 cycles, log back in
  200.         $shutoffpage = get_page_contents('User:WikiMonitor/Disable'); //check for automatic shutoff
  201.         if (!strstr($shutoffpage, '<div id="botenabled" style="font-weight:bold">true</div>')) {
  202.             preg_match('%\(This page was last edited by (.*?)\)%', curl_get('http://wiki.scratch.mit.edu/wiki/User:WikiMonitor/Disable'), $matches);
  203.             echo 'This bot has been disabled by ' . $matches[1] . "\n"; die;
  204.         }  
  205.            
  206.         $recentchangesfullxml = new SimpleXMLElement(curl_get('http://wiki.scratch.mit.edu/w/api.php?action=query&list=recentchanges&rcprop=title|ids|sizes|flags|user|timestamp&rclimit=150&format=xml&salt=' . time(), true)); //get recent changes list
  207.         $recentchangesxml = $recentchangesfullxml->query->recentchanges->rc;
  208.         $editcounts = array();
  209.         $lastedits = array();
  210.         $pages = array();
  211.         foreach ($recentchangesxml as $val) {
  212.             //check for excessive edits, defined as 5+ in 30 minutes
  213.             if (strtotime((string)$val->attributes()->timestamp) > time() - (60 * TOO_MANY_EDITS_TIME)) {
  214.                 $pageid = (int)$val->attributes()->pageid;
  215.                 if (isset($editcounts[$pageid])) {
  216.                     $editcounts[$pageid]++;
  217.                 } else {
  218.                     $editcounts[$pageid] = 1;
  219.                     $pages[$pageid] = (string)$val->attributes()->title;
  220.                 }
  221.             }
  222.         }
  223.         asort($editcounts);
  224.         foreach ($editcounts as $pageid => $val) {
  225.             if ($val >= TOO_MANY_EDITS_COUNT) {
  226.                 $edited_users = array();
  227.                 //we've had over the required amount of edits, let's see who caused them
  228.                 foreach ($recentchangesxml as $change) {
  229.                     if ((int)$change->attributes()->pageid == $pageid) {
  230.                         $user = (string)$change->attributes()->user;
  231.                         if (isset($edited_users[$user])) {
  232.                             $edited_users[$user]++;
  233.                         } else {
  234.                             $edited_users[$user] = 1;
  235.                         }
  236.                     }
  237.                 }
  238.                 foreach ($edited_users as $user => $count) {
  239.                     if ($count >= TOO_MANY_EDITS_COUNT) {
  240.                         //too many edits - notify the user
  241.                         if (!in_array($user, $already_notified)) {
  242.                             notify_user($user, 'excessive', array('count' => $count, 'page' => $pages[$pageid]));
  243.                             $already_notified[] = $user;
  244.                         }
  245.                     }
  246.                 }
  247.             }
  248.         }
  249.        
  250.         $seensandbox = false;
  251.         foreach ($recentchangesxml as $val) {
  252.             $id = (string)$val->attributes()->revid;
  253.             if (!in_array($id, $alreadyseen)) {
  254.                 $alreadyseen[] = $id;
  255.                 $title = (string)$val->attributes()->title;
  256.                
  257.                 //check if it's the sandbox
  258.                 if ((string)$val->attributes()->title == 'Scratch Wiki:Sandbox' && !$seensandbox) {
  259.                     $seensandbox = true;
  260.                     if ($val->attributes()->user != 'WikiMonitor') {
  261.                         $clear_sandbox_time = time() + (SANDBOX_TIMEOUT * 60);
  262.                         echo 'Clearing sandbox at ' . date('d M Y H:i:s', $clear_sandbox_time) . "\n";
  263.                     }
  264.                 }
  265.                
  266.                 //check if user signed post
  267.                 //ignore minor edits
  268.                 if (!isset($val->attributes()->minor) && ((int)$val->attributes()->newlen - (int)$val->attributes()->oldlen) > 70) {
  269.                     if (stristr($title, 'talk:')) {
  270.                         //it's a talk page! did the user sign their post?
  271.                         $page_contents = get_page_contents($title); //make sure that other people have signed posts on it
  272.                         if (stristr($page_contents, '<scratchsig>')) {
  273.                             $rev_xml = new SimpleXMLElement(curl_get('http://wiki.scratch.mit.edu/w/api.php?action=query&prop=revisions&titles=' . rawurlencode($title) . '&rvlimit=1&rvprop=timestamp|user|comment&rvstartid=' . $id . '&rvdiffto=prev&rvlimit=1&format=xml', true));
  274.                             //print_r($rev_xml->query); die;
  275.                             $diff = (string)$rev_xml->query->pages->page->revisions->rev->diff;
  276.                             $comment = (string)$rev_xml->query->pages->page->revisions->rev->attributes()->comment;
  277.                             preg_match_all('%<td class=\'diff-addedline\'><div>(.*?)</div></td>%', $diff, $matches);
  278.                             $ok = true;
  279.                             foreach ($matches[1] as $diffline) {
  280.                                 if (stristr($diffline, '<ins class="diffchange">:') || strstr($comment, 'new section')) {
  281.                                     $ok = false;
  282.                                 }
  283.                                 if (stristr($diffline, '(UTC)')) {
  284.                                     $ok = true;
  285.                                     break;
  286.                                 }
  287.                             }
  288.                             if (!$ok) {
  289.                                 //no signature, check if the user fixed it
  290.                                 $ignore = false;
  291.                                 $origedittime = strtotime((string)$val->attributes()->timestamp);
  292.                                 foreach ($recentchangesxml as $change) {
  293.                                     $edittime = strtotime((string)$change->attributes()->timestamp);
  294.                                     if ($edittime <= $origedittime) {
  295.                                         //already passed the edit in question, so it wasn't fixed
  296.                                         break;
  297.                                     }
  298.                                     if ((string)$change->attributes()->title == $title && (string)$change->attributes()->user == (string)$val->attributes()->user) {
  299.                                         //the user fixed it in a subsequent edit, so don't notify them
  300.                                         $ignore = true;
  301.                                         echo 'Ignoring because it was fixed later (' . $title . ', ' . $val->attributes()->user . ')' . "\n";
  302.                                         break;
  303.                                     }
  304.                                 }
  305.                                 if (!$ignore) { //user did not fix it
  306.                                     notify_user($val->attributes()->user, 'sign', array('revid' => $id, 'page' => $title));
  307.                                 }
  308.                             }
  309.                         }
  310.                     }
  311.                 }
  312.                
  313.                 //check for uncategorized new pages
  314.                 if ($val->attributes()->type == 'new' && !stristr($title, 'talk:') && !stristr($title, 'user:') && !stristr($title, 'file:')) {
  315.                     $contents = get_page_contents($title);
  316.                     if (!stristr($contents, '[[Category:') && $contents != '') {
  317.                         $ok = false;
  318.                         foreach ($category_templates as $template) {
  319.                             if (stristr($contents, '{{' . $template)) {
  320.                                 $ok = true;
  321.                                 break;
  322.                             }
  323.                         }
  324.                         if (!$ok) {
  325.                             if (stristr($title, 'category:')) {
  326.                                 $title = ':' . $title;
  327.                             }
  328.                             //uncategorized
  329.                             notify_user($val->attributes()->user, 'uncat', array('page' => $title));
  330.                         }
  331.                     }
  332.                 }
  333.             }
  334.         }
  335.        
  336.         //should we clear the sandbox?
  337.         if ($clear_sandbox_time != 0 && time() > $clear_sandbox_time) {
  338.             if (trim(get_page_contents('Scratch Wiki:Sandbox')) != trim(DEFAULT_SANDBOX_TEXT)) {
  339.                 echo 'Clearing sandbox!' . "\n";
  340.                 submit_edit('Scratch Wiki:Sandbox', DEFAULT_SANDBOX_TEXT, 'Cleared the sandbox', false);
  341.             }
  342.             $clear_sandbox_time = 0;
  343.         }
  344.        
  345.         //check for uncategorized new files
  346.         $uploadlogxml = new SimpleXMLElement(curl_get('http://wiki.scratch.mit.edu/w/api.php?action=query&list=logevents&letype=upload&lelimit=10&format=xml', true)); //check upload log
  347.         $deletelogxml = new SimpleXMLElement(curl_get('http://wiki.scratch.mit.edu/w/api.php?action=query&list=logevents&letype=delete&lelimit=500&format=xml', true)); //let's also get the delete log
  348.         $movelogxml = new SimpleXMLElement(curl_get('http://wiki.scratch.mit.edu/w/api.php?action=query&list=logevents&letype=move&lelimit=50&format=xml', true)); //don't forget the move log
  349.         foreach ($uploadlogxml->query->logevents->item as $item) {
  350.             $id = (string)$item->attributes()->logid;
  351.             if (!in_array($id, $alreadyseen)) {
  352.                 $alreadyseen[] = $id;
  353.                 if ((string)$item->attributes()->action == 'upload') { //it's a new file
  354.                     $time = strtotime((string)$item->attributes()->timestamp);
  355.                     if ($time > time() - 180) {
  356.                         echo 'Sleeping ' . (180 - (time() - $time)) . ' seconds...' . "\n";
  357.                         sleep(180 - (time() - $time));
  358.                     }
  359.                     $contents = get_page_contents((string)$item->attributes()->title) . "\n";
  360.                     if (!strstr($contents, '[[Category:')) {
  361.                         //uncategorized
  362.                         $notify = true;
  363.                         //check for category templates
  364.                         foreach ($category_templates as $template) {
  365.                             if (stristr($contents, '{{' . $template)) {
  366.                                 $notify = false;
  367.                                 break;
  368.                             }
  369.                         }
  370.                         //check the delete log
  371.                         foreach ($deletelogxml->query->logevents->item as $delete_item_xml) {
  372.                             if ((string)$item->attributes()->title == (string)$delete_item_xml->attributes()->title) {
  373.                                 $notify = false;
  374.                                 break;
  375.                             }
  376.                         }
  377.                         foreach ($movelogxml->query->logevents->item as $move_item_xml) {
  378.                             if ((string)$item->attributes()->title == (string)$move_item_xml->attributes()->title) {
  379.                                 $notify = false;
  380.                                 break;
  381.                             }
  382.                         }
  383.                         if ($notify) {
  384.                             notify_user((string)$item->attributes()->user, 'uncat', array('page' => ':' . (string)$item->attributes()->title));
  385.                         }
  386.                     }
  387.                 }
  388.             }
  389.         }
  390.        
  391.         sleep(90);
  392.     }
  393. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement