Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # remove markdown links
- record[0] = re.sub(r"\[.*\]\(.*\)","", record[0])
- # remove links
- record[0] = re.sub(r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*", "", record[0])
- record[0] = re.sub(r"\&(.*?);","",record[0]) # remove html characters
- record[0] = re.sub(r"(\w+/+\w)+","", record[0])
- record[0] = re.sub(r"(\w+_)+\w+","",record[0]) # remove usernames (with underscores)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement