Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from time import time
- from textwrap import fill
- from pywikibot import Site, Page, warning
- from pywikibot.bot import SingleSiteBot, ExistingPageBot, NoRedirectPageBot
- from pywikibot.editor import TextEditor
- class ReferenceFixer(
- SingleSiteBot,
- ExistingPageBot,
- NoRedirectPageBot
- ):
- def __init__(self, **kwargs):
- super().__init__()
- self.generator = self.site.randompages()
- def longpages(self):
- for tup in self.site.longpages():
- yield tup[0]
- def treat_page(self):
- page = self.current_page
- self.put_current(fix(page.text),
- summary='同一个句子可能出现多个需要添加脚注的词语,因此请不要将脚注置于标点符号后面。请参阅[[Help:脚注]]。')
- def skip_page(self, page):
- if not page.namespace().content:
- return True
- return super().skip_page(page)
- def fix(text):
- cur = Cursor(text)
- while cur.find_next('。<ref'):
- dist_tag = cur.next('</ref>', True)
- dist_sct = cur.next('/>', True) # self-closing tag
- if not dist_tag and not dist_sct:
- cur.fwd(1)
- continue
- if bool(dist_tag) != bool(dist_sct):
- dist = dist_tag or dist_sct
- else:
- dist = min(dist_tag, dist_sct)
- cur.remove()
- cur.to(dist)
- if cur.next_nchars(1) == '。':
- continue
- cur.insert('。', pos=cur.pos())
- return str(cur)
- class Cursor:
- def __str__(self):
- return self.text
- def __init__(self, text, pos=0):
- self.text = text
- self._pos = pos
- def _out_of_range(self, num):
- return num < 0 or num >= len(self.text)
- def pos(self):
- return self._pos
- def at(self, pos):
- if self._out_of_range(pos):
- return False
- else:
- return self.text[pos]
- def this(self):
- return self.text[self._pos]
- def next_nchars(self, count, this=False):
- return (self.this() if this else '') + self.text[self._pos + 1:self._pos + count + 1]
- def to(self, pos):
- if self._out_of_range(pos):
- return False
- else:
- self._pos = pos
- return True
- def fwd(self, count):
- return self.to(self._pos + count)
- def back(self, count):
- return self.to(self._pos - count)
- def insert(self, char, pos=None):
- _pos = self._pos
- text = self.text
- if pos == None:
- self.text = text[:_pos] + char + text[_pos:]
- else:
- self.text = text[:pos] + char + text[pos:]
- def remove(self, pos=None):
- _pos = self._pos
- text = self.text
- if pos == None:
- if self._out_of_range(_pos):
- return False
- self.text = text[:_pos] + text[_pos + 1:]
- return True
- else:
- if self._out_of_range(pos):
- return False
- if pos < _pos:
- self._pos -= 1
- self.text = text[:pos] + text[pos + 1:]
- return True
- def next(self, sub, last_char=False):
- if (result := self.text.find(sub, self._pos)) == -1:
- return False
- if last_char:
- result += len(sub) - 1
- return result
- def find_next(self, sub, to_last_char=False):
- if not (pos := self.next(sub, to_last_char)):
- return False
- self.to(pos)
- return True
- def main():
- ReferenceFixer().run()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement