Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import re
- path = 'F:\\wikititle\\wiki3.txt'
- i = open(path, encoding="utf-8")
- var=i.read()
- list2 = re.findall("[^a-zA-Z0-9]([a-zA-Z]{4}_[a-zA-Z]{6|7})[^a-zA-Z0-9]",var)
- list2 = list2.split()
- seen = set()
- uniq = [x for x in list2 if x not in seen and not seen.add(x)]
- f = open('F:\\wikititle\\kidsb.txt','x')
- f.write(json.dumps(uniq))
- f.close()
- print("done")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement