Advertisement
Guest User

Untitled

a guest
Mar 31st, 2020
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.22 KB | None | 0 0
  1. import nltk
  2. from nltk.stem import WordNetLemmatizer
  3. wordnet_lemma = WordNetLemmatizer()
  4. from collections import Counter
  5.  
  6.  
  7. credit_scoring['tokenized_words'] = credit_scoring['purpose'].apply(nltk.word_tokenize)
  8.  
  9. def lemmatize_nouns(text):
  10. lemmas = [wordnet_lemma.lemmatize(token, pos='n') for token in text]
  11. return lemmas
  12. def lemmatize_verbs(text):
  13. lemmas = [wordnet_lemma.lemmatize(token, pos='v') for token in text]
  14. return lemmas
  15.  
  16. credit_scoring['lemmas'] = credit_scoring['tokenized_words'].apply(lemmatize_nouns, lemmatize_verbs)
  17.  
  18. #display(credit_scoring['lemmas'].head(20))
  19.  
  20. def to_cut_purpose(lemmas):
  21. if 'house' in lemmas or 'estate' in lemmas or 'property' in lemmas or 'housing' in lemmas:
  22. return 'real estate'
  23. elif 'wedding' in lemmas:
  24. return 'wedding'
  25. elif 'education'in lemmas or 'university' in lemmas or 'educated' in lemmas:
  26. return 'education'
  27. elif 'car' in lemmas:
  28. return'car'
  29. else:
  30. return'unclassified'
  31.  
  32.  
  33. credit_scoring['ultimate_purpose'] = credit_scoring['lemmas'].apply(to_cut_purpose)
  34.  
  35. print(Counter(credit_scoring['ultimate_purpose']))
  36.  
  37. print(credit_scoring[credit_scoring['ultimate_purpose'] == 'unclassified'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement