Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import json
- import sys
- from wikitools import *
- import userpass
- import mwparserfromhell
- site = wiki.Wiki()
- site.login(userpass.username, userpass.password)
- etitles = ['Template:Infobox journal', 'Template:Infobox magazine']
- journaltitles = ['Template:Infobox journal']
- journalprojects = ['Template:WikiProject Academic Journals']
- magazineprojects = ['Template:WikiProject Magazines']
- bannershells = ['Template:WikiProject banner shell']
- def updatelists():
- global bannershells
- global magazineprojects
- global journalprojects
- global journaltitles
- params = {'action':'query',
- 'list':'backlinks',
- 'bltitle':magazineprojects[0],
- 'bllimit':'5000',
- 'blfilterredir':'redirects',
- 'blnamespace':'10'
- }
- for pageelement in api.APIRequest(site, params).query(False)['query']['backlinks']:
- magazineprojects.append(pageelement['title'])
- params = {'action':'query',
- 'list':'backlinks',
- 'bltitle':journalprojects[0],
- 'bllimit':'5000',
- 'blfilterredir':'redirects',
- 'blnamespace':'10'
- }
- for pageelement in api.APIRequest(site, params).query(False)['query']['backlinks']:
- journalprojects.append(pageelement['title'])
- params = {'action':'query',
- 'list':'backlinks',
- 'bltitle':journaltitles[0],
- 'bllimit':'5000',
- 'blfilterredir':'redirects',
- 'blnamespace':'10'
- }
- for pageelement in api.APIRequest(site, params).query(False)['query']['backlinks']:
- journaltitles.append(pageelement['title'])
- params = {'action':'query',
- 'list':'backlinks',
- 'bltitle':bannershells[0],
- 'bllimit':'5000',
- 'blfilterredir':'redirects',
- 'blnamespace':'10'
- }
- for pageelement in api.APIRequest(site, params).query(False)['query']['backlinks']:
- bannershells.append(pageelement['title'])
- print(magazineprojects)
- print(journalprojects)
- print(bannershells)
- print(journaltitles)
- epages1 = []
- epages2 = []
- def startAllowed():
- if page.Page(site, 'User:DatBot/run/task9') != 'true':
- return False
- else:
- return True
- def getembeds():
- global epages
- for pagetitle in etitles:
- params = {'action':'query',
- 'list':'embeddedin',
- 'eititle':pagetitle,
- 'eilimit':'5000',
- 'eifilterredir':'nonredirects',
- 'einamespace':'0'
- }
- req = api.APIRequest(site, params)
- res = req.query(False)
- length = len(res['query']['embeddedin'])
- while length >= 5000:
- for pagename in res['query']['embeddedin']:
- if pagetitle in journaltitles:
- epages1.append(pagename['title'])
- else:
- epages2.append(pagename['title'])
- length = len(res['query']['embeddedin'])
- try:
- params['eicontinue'] = res['continue']['eicontinue']
- except KeyError:
- pass
- req = api.APIRequest(site, params)
- res = req.query(False)
- def changepage(pagetitle, isjournal, originalpage):
- global bannershells
- if page.Page(site, pagetitle).exists:
- filetext = page.Page(site, pagetitle).getWikiText()
- else:
- filetext = ''
- if not allow_bots(filetext):
- return
- if isjournal:
- stringadd = "{{WikiProject Academic Journals|class=File}}"
- transcludepage = etitles[0]
- else:
- transcludepage = etitles[1]
- stringadd = "{{WikiProject Magazines|class=File}}"
- if any(bannershell in filetext for bannershell[9:] in bannershells):
- re.sub(r'(%s.*\|1=)' % bannershell, '\g<1>\n%s' % stringadd, filetext, re.IGNORECASE)
- else:
- filetext = '%s\n' % stringadd + filetext
- page.Page(site, pagetitle).edit(text = filetext,
- bot = True,
- summary = "Adding %s because [[%s]] transcludes %s ([[Wikipedia:Bots/Requests for approval/DatBot 9|BOT]])" % (stringadd, originalpage, transcludepage))
- def main():
- global epages1
- global epages2
- global journalprojects
- global magazineprojects
- if not startAllowed():
- return
- updatelists()
- getembeds()
- filechecked = open('pageschecked.txt', 'r')
- pageschecked = filechecked.read()
- pageschecked = pageschecked.decode('utf-8')
- filechecked.close()
- openfile = open('pageschecked.txt', 'a')
- for epage in epages1 + list(set(epages2) - set(epages1)):
- if epage in pageschecked:
- continue
- else:
- try:
- # https://regex101.com/r/BXBwby/1
- filename = re.findall(r'\|\s*(?:image|cover|image_file|logo)\s*=\s*(?:\[\[)?(?:(?:File|Image)\s*:\s*)?(\w[^\|<\]]*)',
- page.Page(site, epage).getWikiText(), re.IGNORECASE | re.M)[0]
- filetalk = "File talk:%s" % filename
- if not page.Page(site, filetalk).exists:
- filetext = ''
- else:
- filetext = page.Page(site, filetalk).getWikiText()
- if epage in epages1:
- if any(pageelement in filetext.lower() for pageelement[9:] in journalprojects):
- continue
- changepage(filetalk, True, epage)
- if epage in epages2:
- if any(pageelement in filetext.lower() for pageelement[9:] in magazineprojects):
- continue
- changepage(filetalk, False, epage)
- except IndexError:
- continue
- openfile.write("%s\n" % epage.encode('utf-8')) # all refreshed monthly
- openfile.close()
- def allow_bots(text):
- user = 'datbot'
- text = mwparserfromhell.parse(text)
- for tl in text.filter_templates():
- if tl.name.matches(['bots', 'nobots']):
- break
- else:
- return True
- for param in tl.params:
- bots = [x.lower().strip() for x in param.value.split(",")]
- if param.name == 'allow':
- if ''.join(bots) == 'none': return False
- for bot in bots:
- if bot in (user, 'all'):
- return True
- elif param.name == 'deny':
- if ''.join(bots) == 'none': return True
- for bot in bots:
- if bot in (user, 'all'):
- return False
- if (tl.name.matches('nobots') and len(tl.params) == 0):
- return False
- return True
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement