Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import sys
- import os.path
- import glob2
- import re
- list = []
- tags = []
- def processFiles(args):
- input_filemask = "log"
- directory = args[1]
- if os.path.isdir(directory):
- print ("processing a directory")
- list_of_files = glob2.glob('%s/*.%s' % (directory, input_filemask))
- else:
- print ("processing a list of files")
- list_of_files = sys.argv[1:]
- for file_name in list_of_files:
- print (file_name)
- processFile(file_name)
- def has_class_but_no_id(tag): #method to return all <p></p> tags and contents
- return tag.has_attr('class') and not tag.has_attr('id')
- def processFile(filename):
- soup = BeautifulSoup(open(filename), 'html.parser')
- list = soup.find_all(has_class_but_no_id)
- for tag in list:
- tags.append(str(tag))
- for result in tags:
- if result.find("iot platform") != -1:
- print(result.encode("utf-8"))
- if __name__ == '__main__':
- if (len(sys.argv) > 1):
- processFiles(sys.argv)
- else:
- print ("usage message")
Advertisement
Add Comment
Please, Sign In to add comment