Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2019
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.21 KB | None | 0 0
  1. from nltk import word_tokenize, pos_tag, ne_chunk
  2. import nltk
  3. from nltk.corpus import wordnet
  4. nltk.download('punkt')
  5. nltk.download('averaged_perceptron_tagger')
  6. nltk.download('words')
  7. nltk.download('words')
  8.  
  9. with open('walking.txt', 'r') as file:
  10. text = file.read().replace('n', ' ')
  11.  
  12. person_list = []
  13. person_names = person_list
  14. def get_human_names(text):
  15. tokens = nltk.tokenize.word_tokenize(text)
  16. pos = nltk.pos_tag(tokens)
  17. sentt = nltk.ne_chunk(pos, binary = False)
  18.  
  19. person = []
  20. name = ""
  21. for subtree in sentt.subtrees(filter=lambda t: t.label() == 'PERSON'):
  22. for leaf in subtree.leaves():
  23. person.append(leaf[0])
  24. if len(person) > 1: #avoid grabbing lone surnames
  25. for part in person:
  26. name += part + ' '
  27. if name[:-1] not in person_list:
  28. person_list.append(name[:-1])
  29. name = ''
  30. person = []
  31. # print (person_list)
  32.  
  33. names = get_human_names(text)
  34. for person in person_list:
  35. person_split = person.split(" ")
  36. for name in person_split:
  37. if wordnet.synsets(name):
  38. if(name in person):
  39. person_names.remove(person)
  40. break
  41.  
  42. print(person_names)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement