Advertisement
Guest User

Untitled

a guest
May 30th, 2015
254
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.45 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import re
  3.  
  4. output = []
  5.  
  6. # use a "with" block to automatically close I/O streams
  7. with open('mylist.txt') as word_list:
  8.  
  9. # read the contents of mylist.txt into the words list using list comprehension
  10. words = [word.strip().lower() for word in word_list]
  11.  
  12. with open('stuff.tsv') as tsv:
  13. # read the contents of stuff.tsv into the line list using list comprehension
  14. lines = [line for line in tsv]
  15.  
  16. # create a dictionary of compiled regular expressions for the word list
  17. regexen = {}
  18.  
  19. for word in words:
  20. regexen[word] = re.compile(r'\b{0}\b'.format(word))
  21.  
  22. # iterate over the lines
  23. for line in lines:
  24.  
  25. # iterate over the word list
  26. for word in words:
  27.  
  28. # create a regular expression using word boundaries around our word
  29. match = regexen[word].search(line.lower())
  30.  
  31. # if we find one of the words in the line, then add it to the output list
  32. if match:
  33.  
  34. # add the line to the output list
  35. if line.endswith('\n'):
  36. output.append(line)
  37. else:
  38. output.append('{0}\n'.format(line))
  39.  
  40. # write some debug output to the console
  41. print('Found line {0} that matched word {1}'.format(line, word))
  42.  
  43. # exit the word while loop
  44. break
  45.  
  46. # open output.tsv using a with block with write permissions
  47. with open('output.tsv', 'w') as output_file:
  48.  
  49. # write the output list to the file
  50. output_file.writelines(output)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement