Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- reg_ex = '[a-zA-Z0-9][^ ^\n]*'
- course_num = '[A-Z]{4}\xa0\d{5}'
- #ls = [x for x in text if x not in course_num]
- #print(ls)
- nums = (re.findall(course_num, text)[0].replace('\xa0', ' ').split(' '))
- #final_text = re.findall(reg_ex, text)
- #nums = (re.findall(course_num, text))
- other = (re.findall("[a-zA-Z0-9]\w+", text))
- #print(other)
- return [x.lower() for x in other if (x not in INDEX_IGNORE) and (x not in nums)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement