Advertisement
Guest User

Untitled

a guest
Jul 17th, 2019
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.47 KB | None | 0 0
  1. # remove markdown links
  2. record[0] = re.sub(r"\[.*\]\(.*\)","", record[0])
  3. # remove links
  4. record[0] = re.sub(r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*", "", record[0])
  5. record[0] = re.sub(r"\&(.*?);","",record[0]) # remove html characters
  6. record[0] = re.sub(r"(\w+/+\w)+","", record[0])
  7. record[0] = re.sub(r"(\w+_)+\w+","",record[0]) # remove usernames (with underscores)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement