Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import pywikibot as pwb
- from pywikibot import pagegenerators as pg
- import urllib
- import json
- import re
- import sys
- paraules = [u' Arròs ', u' arròs '] # NO TOCAR AQUESTES LLISTES
- paraules_plural = [' arrossos ']
- paraules_punt_sing = [u' Arròs.', u' arròs.', u' arròs,', u' Arròs,']
- paraules_coma_sing = [u' arròs,', u' Arròs,']
- paraules_punt_pl = [' arrossos.']
- def main(pag):
- for paraula in paraules:
- if paraula.replace(' ', '') in str(pag).decode('utf-8'):
- return
- text = pag.text
- coincidencies = []
- for paraula in paraules:
- if paraula in text:
- coincidencies.append(paraula)
- for paraula in paraules_plural:
- if paraula in text:
- coincidencies.append(paraula)
- for paraula in paraules_punt_sing:
- if paraula in text:
- coincidencies.append(paraula)
- for paraula in paraules_punt_pl:
- if paraula in text:
- coincidencies.append(paraula)
- print("\033[1m%s\033[0m: \033[1m%s\033[0m coincidències" % (pag, len(coincidencies)))
- print("COINCIDENCIES:" + str(coincidencies))
- if len(coincidencies) == 0:
- print("Me la salto")
- return
- for paraula in paraules:
- u_noutext = text.replace(paraula, ' [[' + paraula.replace(' ', '') + ']] ')
- for paraula in paraules_plural:
- dos_noutext = u_noutext.replace(paraula, ' [[' + paraules[0].replace(' ', '') + '|' + paraula.replace(' ', '') + ']] ')
- for paraula in paraules_punt_sing:
- tres_noutext = dos_noutext.replace(paraula, ' [[' + paraula.replace('\.', '').replace(' ', '') + ']].')
- for paraula in paraules_coma_sing:
- quatre_noutext = tres_noutext.replace(paraula, ' [[' + paraula.replace(' ', '').replace(',', '') + ']],')
- for paraula in paraules_punt_pl:
- noutext = quatre_noutext.replace(paraula, ' [[' + paraules[0].replace(' ', '') + '|' + paraula.replace('\.', '').replace(' ', '').replace(',', '') + ']].')
- pag.text = noutext
- pag.save(u'Bot: Afegint enllaços per la paraula %s' % paraules[0])
- if __name__ == '__main__':
- allpages = pg.AllpagesPageGenerator(site=pwb.Site("ca", "wikiquote"), start="!", namespace=0, includeredirects = True)
- pages = pg.PreloadingGenerator(allpages, pageNumber = 100)
- for page in pages:
- main(page)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement