Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import re
- output = []
- # use a "with" block to automatically close I/O streams
- with open('mylist.txt') as word_list:
- # read the contents of mylist.txt into the words list using list comprehension
- words = [word.strip().lower() for word in word_list]
- with open('stuff.tsv') as tsv:
- # read the contents of stuff.tsv into the line list using list comprehension
- lines = [line for line in tsv]
- # create a dictionary of compiled regular expressions for the word list
- regexen = {}
- for word in words:
- regexen[word] = re.compile(r'\b{0}\b'.format(word))
- # iterate over the lines
- for line in lines:
- # iterate over the word list
- for word in words:
- # create a regular expression using word boundaries around our word
- match = regexen[word].search(line.lower())
- # if we find one of the words in the line, then add it to the output list
- if match:
- # add the line to the output list
- if line.endswith('\n'):
- output.append(line)
- else:
- output.append('{0}\n'.format(line))
- # write some debug output to the console
- print('Found line {0} that matched word {1}'.format(line, word))
- # exit the word while loop
- break
- # open output.tsv using a with block with write permissions
- with open('output.tsv', 'w') as output_file:
- # write the output list to the file
- output_file.writelines(output)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement