Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import re
- import sys
- def read_files():
- '''Open and read log file for searching content files'''
- with open(sys.argv[1]) as f:
- content = f.read()
- search1 = re.findall(r'(\w+)(\.)(jpg|pdf|gif)', content)
- search2 = re.findall(r'(\w+)(\/)(index.html)', content)
- return (search1, search2)
- def extract_pids(search1, search2):
- '''Extract PIDs from content file name also merging and removing duplications'''
- ext1 = tuple(pids1[0] for pids1 in search1)
- ext2 = tuple(pids2[0] for pids2 in search2)
- ext = set(ext1 + ext2)
- return ext
- def store_pids(ext):
- '''Store PIDs in file'''
- for out in ext:
- with open('PIDs.log', 'a') as result:
- result.write(out+'\n')
- def main():
- matches = read_files()
- pids = extract_pids(*matches)
- store_pids(pids)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment