Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- import re
- re.purge()
- #purge spoiler tags from Tvtropes document
- #There's probably a simpler way to do this that doesn't require 3 separate loops,
- #but I'm not very good at regular expressions
- #so this was the best I could come up with that didn't have undesirable side effects.
- #corner case: this will fail if the original document contains 3 consecutive double-quote (") characters.
- article = """simply paste
- the article you want to despoiler
- into this multi-line
- triple-quoted string"""
- #corner case: this will fail if this obscure Unicode emoji character is actually in the original document
- pl1 = "🐰"
- theList = []
- # this loop evacuates all bracketed expressions besides spoilertags
- # into a list of strings, replacing them with our adorable placeholder character
- while re.search(r'((\[\[)(?!spoiler:))(.+?\]\])',article) != None:
- tmp1 = re.search(r'((\[\[)(?!spoiler:))(.+?\]\])',article)
- tmp2 = tmp1.group()
- theList.append(tmp2)
- article = re.sub(r'((\[\[)(?!spoiler:))(.+?\]\])',pl1,article,count=1)
- # remove spoiler tags from the document
- while re.search(r'\[\[spoiler:.+\]\]',article) != None:
- tmp1 = re.search(r'\[\[spoiler:.+\]\]',article)
- tmp2 = tmp1.group()
- tmp3 = re.sub(r'^\[\[spoiler:',"",tmp2,count=1)
- tmp4 = re.sub(r'\]\]$',"",tmp3,count=1)
- article = re.sub(r'\[\[spoiler:.+\]\]',tmp4,article,count=1)
- # pull our other bracketed expressions out of the list and re-insert them
- theList.reverse()
- while re.search(pl1,article) != None:
- article = re.sub(pl1,theList.pop(),article,count=1)
- #Not bothering to write to a file "properly". You can just capture the standard output if you need to.
- #e.g. on a linux/unix shell you would do 'python3 purge_spoilers.py >> output.txt'
- print(article)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement