Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from nltk import word_tokenize, pos_tag, ne_chunk
- import nltk
- from nltk.corpus import wordnet
- nltk.download('punkt')
- nltk.download('averaged_perceptron_tagger')
- nltk.download('words')
- nltk.download('words')
- with open('walking.txt', 'r') as file:
- text = file.read().replace('n', ' ')
- person_list = []
- person_names = person_list
- def get_human_names(text):
- tokens = nltk.tokenize.word_tokenize(text)
- pos = nltk.pos_tag(tokens)
- sentt = nltk.ne_chunk(pos, binary = False)
- person = []
- name = ""
- for subtree in sentt.subtrees(filter=lambda t: t.label() == 'PERSON'):
- for leaf in subtree.leaves():
- person.append(leaf[0])
- if len(person) > 1: #avoid grabbing lone surnames
- for part in person:
- name += part + ' '
- if name[:-1] not in person_list:
- person_list.append(name[:-1])
- name = ''
- person = []
- # print (person_list)
- names = get_human_names(text)
- for person in person_list:
- person_split = person.split(" ")
- for name in person_split:
- if wordnet.synsets(name):
- if(name in person):
- person_names.remove(person)
- break
- print(person_names)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement