Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- wordlist_dict = {
- 'class1': ['word_a', 'word_b', 'word_c'],
- 'class2': ['word_d', 'word_e'],
- 'class3': ['word_f', 'word_g', 'word_h', 'word_i']
- }
- sent_list = [
- "I have a sentence with word_g",
- "And another sentence with word_d",
- "Don't forget word_b",
- "no key word here"
- ]
- import re
- pattern_list = []
- pattern_all = ''
- wordlist = sorted(wordlist_dict.values())
- for v in wordlist:
- pattern_list.append('({})+'.format('|'.join(v)))
- pattern_all += '|' + '|'.join(v)
- pattern_all = '({})+'.format(pattern_all[1:])
- print(pattern_list)
- # ['(word_a|word_b|word_c)+', '(word_d|word_e)+', '(word_f|word_g|word_h|word_i)+']
- print(pattern_all)
- # (word_a|word_b|word_c|word_d|word_e|word_f|word_g|word_h|word_i)+
- new_sent_list = []
- for sent in sent_list:
- match_obj = re.search(pattern_all, sent)
- if match_obj:
- match = match_obj.group(0)
- for i in range(len(pattern_list)):
- if re.search(pattern_list[i], sent):
- match_wordlist = wordlist[i]
- match_wordlist.remove(match)
- for word in match_wordlist:
- new_sent_list.append(sent.replace(match, word))
Add Comment
Please, Sign In to add comment