daily pastebin goal
77%
SHARE
TWEET

Untitled

DatGuy1 Jun 15th, 2018 (edited) 17 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import re
  2. import json
  3. import sys
  4. from wikitools import *
  5. import userpass
  6. import mwparserfromhell
  7.  
  8. site = wiki.Wiki()
  9. site.login(userpass.username, userpass.password)
  10. etitles = ['Template:Infobox journal', 'Template:Infobox magazine']
  11. journaltitles = ['Template:Infobox journal']
  12. journalprojects = ['Template:WikiProject Academic Journals']
  13. magazineprojects = ['Template:WikiProject Magazines']
  14. bannershells = ['Template:WikiProject banner shell']
  15.  
  16. def updatelists():
  17.     global bannershells
  18.     global magazineprojects
  19.     global journalprojects
  20.     global journaltitles
  21.    
  22.     params = {'action':'query',
  23.               'list':'backlinks',
  24.               'bltitle':magazineprojects[0],
  25.               'bllimit':'5000',
  26.               'blfilterredir':'redirects',
  27.               'blnamespace':'10'
  28.               }
  29.     for pageelement in api.APIRequest(site, params).query(False)['query']['backlinks']:
  30.         magazineprojects.append(pageelement['title'])
  31.  
  32.     params = {'action':'query',
  33.               'list':'backlinks',
  34.               'bltitle':journalprojects[0],
  35.               'bllimit':'5000',
  36.               'blfilterredir':'redirects',
  37.               'blnamespace':'10'
  38.               }
  39.     for pageelement in api.APIRequest(site, params).query(False)['query']['backlinks']:
  40.         journalprojects.append(pageelement['title'])
  41.  
  42.     params = {'action':'query',
  43.               'list':'backlinks',
  44.               'bltitle':journaltitles[0],
  45.               'bllimit':'5000',
  46.               'blfilterredir':'redirects',
  47.               'blnamespace':'10'
  48.               }
  49.     for pageelement in api.APIRequest(site, params).query(False)['query']['backlinks']:
  50.         journaltitles.append(pageelement['title'])
  51.  
  52.     params = {'action':'query',
  53.               'list':'backlinks',
  54.               'bltitle':bannershells[0],
  55.               'bllimit':'5000',
  56.               'blfilterredir':'redirects',
  57.               'blnamespace':'10'
  58.               }
  59.     for pageelement in api.APIRequest(site, params).query(False)['query']['backlinks']:
  60.        bannershells.append(pageelement['title'])
  61.  
  62.     print(magazineprojects)
  63.     print(journalprojects)
  64.     print(bannershells)
  65.     print(journaltitles)
  66.  
  67. epages1 = []
  68. epages2 = []
  69.  
  70. def startAllowed():
  71.     if page.Page(site, 'User:DatBot/run/task9') != 'true':
  72.         return False
  73.     else:
  74.         return True
  75.  
  76. def getembeds():
  77.     global epages
  78.     for pagetitle in etitles:
  79.         params = {'action':'query',
  80.                   'list':'embeddedin',
  81.                   'eititle':pagetitle,
  82.                   'eilimit':'5000',
  83.                   'eifilterredir':'nonredirects',
  84.                   'einamespace':'0'
  85.                   }
  86.         req = api.APIRequest(site, params)
  87.         res = req.query(False)
  88.         length = len(res['query']['embeddedin'])
  89.         while length >= 5000:
  90.             for pagename in res['query']['embeddedin']:
  91.                 if pagetitle in journaltitles:
  92.                     epages1.append(pagename['title'])
  93.                 else:
  94.                     epages2.append(pagename['title'])
  95.             length = len(res['query']['embeddedin'])
  96.             try:
  97.                 params['eicontinue'] = res['continue']['eicontinue']
  98.             except KeyError:
  99.                 pass
  100.             req = api.APIRequest(site, params)
  101.             res = req.query(False)
  102.        
  103. def changepage(pagetitle, isjournal, originalpage):
  104.     global bannershells
  105.    
  106.     if page.Page(site, pagetitle).exists:
  107.         filetext = page.Page(site, pagetitle).getWikiText()
  108.     else:
  109.         filetext = ''
  110.  
  111.     if not allow_bots(filetext):
  112.         return
  113.  
  114.     if isjournal:
  115.         stringadd = "{{WikiProject Academic Journals|class=File}}"
  116.         transcludepage = etitles[0]
  117.     else:
  118.         transcludepage = etitles[1]
  119.         stringadd = "{{WikiProject Magazines|class=File}}"
  120.        
  121.     if any(bannershell in filetext for bannershell[9:] in bannershells):
  122.         re.sub(r'(%s.*\|1=)' % bannershell, '\g<1>\n%s' % stringadd, filetext, re.IGNORECASE)
  123.     else:
  124.         filetext = '%s\n' %  stringadd + filetext
  125.  
  126.     page.Page(site, pagetitle).edit(text = filetext,
  127.                                     bot = True,
  128.                                     summary = "Adding %s because [[%s]] transcludes %s ([[Wikipedia:Bots/Requests for approval/DatBot 9|BOT]])" % (stringadd, originalpage, transcludepage))
  129.    
  130.  
  131. def main():
  132.     global epages1
  133.     global epages2
  134.     global journalprojects
  135.     global magazineprojects
  136.  
  137.     if not startAllowed():
  138.         return
  139.     updatelists()
  140.     getembeds()
  141.     filechecked = open('pageschecked.txt', 'r')
  142.     pageschecked = filechecked.read()
  143.     pageschecked = pageschecked.decode('utf-8')
  144.     filechecked.close()
  145.     openfile = open('pageschecked.txt', 'a')
  146.     for epage in epages1 + list(set(epages2) - set(epages1)):
  147.         if epage in pageschecked:
  148.             continue
  149.         else:
  150.             try:
  151.                 # https://regex101.com/r/BXBwby/1
  152.                 filename = re.findall(r'\|\s*(?:image|cover|image_file|logo)\s*=\s*(?:\[\[)?(?:(?:File|Image)\s*:\s*)?(\w[^\|<\]]*)',
  153.                           page.Page(site, epage).getWikiText(), re.IGNORECASE | re.M)[0]
  154.  
  155.                 filetalk = "File talk:%s" % filename
  156.                 if not page.Page(site, filetalk).exists:
  157.                     filetext = ''
  158.                 else:
  159.                     filetext = page.Page(site, filetalk).getWikiText()
  160.                
  161.                 if epage in epages1:
  162.                     if any(pageelement in filetext.lower() for pageelement[9:] in journalprojects):
  163.                         continue
  164.                    
  165.                     changepage(filetalk, True, epage)
  166.                 if epage in epages2:
  167.                     if any(pageelement in filetext.lower() for pageelement[9:] in magazineprojects):
  168.                         continue
  169.                    
  170.                     changepage(filetalk, False, epage)
  171.                    
  172.             except IndexError:
  173.                 continue
  174.             openfile.write("%s\n" % epage.encode('utf-8')) # all refreshed monthly
  175.     openfile.close()
  176.  
  177. def allow_bots(text):
  178.     user = 'datbot'
  179.     text = mwparserfromhell.parse(text)
  180.     for tl in text.filter_templates():
  181.         if tl.name.matches(['bots', 'nobots']):
  182.             break
  183.     else:
  184.         return True
  185.     for param in tl.params:
  186.         bots = [x.lower().strip() for x in param.value.split(",")]
  187.         if param.name == 'allow':
  188.             if ''.join(bots) == 'none': return False
  189.             for bot in bots:
  190.                 if bot in (user, 'all'):
  191.                     return True
  192.         elif param.name == 'deny':
  193.             if ''.join(bots) == 'none': return True
  194.             for bot in bots:
  195.                 if bot in (user, 'all'):
  196.                     return False
  197.     if (tl.name.matches('nobots') and len(tl.params) == 0):
  198.         return False
  199.     return True
  200.  
  201. if __name__ == "__main__":
  202.     main()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top