Guest User

Untitled

a guest
Dec 15th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.15 KB | None | 0 0
  1. wordlist_dict = {
  2. 'class1': ['word_a', 'word_b', 'word_c'],
  3. 'class2': ['word_d', 'word_e'],
  4. 'class3': ['word_f', 'word_g', 'word_h', 'word_i']
  5. }
  6.  
  7. sent_list = [
  8. "I have a sentence with word_g",
  9. "And another sentence with word_d",
  10. "Don't forget word_b",
  11. "no key word here"
  12. ]
  13.  
  14. import re
  15.  
  16. pattern_list = []
  17. pattern_all = ''
  18. wordlist = sorted(wordlist_dict.values())
  19. for v in wordlist:
  20. pattern_list.append('({})+'.format('|'.join(v)))
  21. pattern_all += '|' + '|'.join(v)
  22. pattern_all = '({})+'.format(pattern_all[1:])
  23. print(pattern_list)
  24. # ['(word_a|word_b|word_c)+', '(word_d|word_e)+', '(word_f|word_g|word_h|word_i)+']
  25. print(pattern_all)
  26. # (word_a|word_b|word_c|word_d|word_e|word_f|word_g|word_h|word_i)+
  27.  
  28. new_sent_list = []
  29. for sent in sent_list:
  30. match_obj = re.search(pattern_all, sent)
  31. if match_obj:
  32. match = match_obj.group(0)
  33. for i in range(len(pattern_list)):
  34. if re.search(pattern_list[i], sent):
  35. match_wordlist = wordlist[i]
  36. match_wordlist.remove(match)
  37. for word in match_wordlist:
  38. new_sent_list.append(sent.replace(match, word))
Add Comment
Please, Sign In to add comment