ismaelvc

pids_manipulator.py

Aug 25th, 2013
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.91 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import re
  4. import sys
  5.  
  6.  
  7. def read_files():
  8.     '''Open and read log file for searching content files'''
  9.     with open(sys.argv[1]) as f:
  10.         content = f.read()
  11.         search1 = re.findall(r'(\w+)(\.)(jpg|pdf|gif)', content)
  12.         search2 = re.findall(r'(\w+)(\/)(index.html)', content)
  13.         return (search1, search2)
  14.  
  15.  
  16. def extract_pids(search1, search2):
  17.     '''Extract PIDs from content file name also merging and removing duplications'''
  18.     ext1 = tuple(pids1[0] for pids1 in search1)
  19.     ext2 = tuple(pids2[0] for pids2 in search2)
  20.     ext = set(ext1 + ext2)
  21.     return ext
  22.  
  23.  
  24. def store_pids(ext):
  25.     '''Store PIDs in file'''
  26.     for out in ext:
  27.         with open('PIDs.log', 'a') as result:
  28.             result.write(out+'\n')
  29.  
  30. def main():
  31.     matches = read_files()
  32.     pids = extract_pids(*matches)
  33.     store_pids(pids)
  34.  
  35.  
  36. if __name__ == '__main__':
  37.     main()
Advertisement
Add Comment
Please, Sign In to add comment