Guest User

Untitled

a guest
Mar 22nd, 2018
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.08 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import sys
  3. import os.path
  4. import glob2
  5. import re
  6.  
  7. list = []
  8. tags = []
  9.  
  10. def processFiles(args):
  11.     input_filemask = "log"
  12.     directory = args[1]
  13.     if os.path.isdir(directory):
  14.         print ("processing a directory")
  15.         list_of_files = glob2.glob('%s/*.%s' % (directory, input_filemask))
  16.     else:
  17.         print ("processing a list of files")
  18.         list_of_files = sys.argv[1:]
  19.     for file_name in list_of_files:
  20.         print (file_name)
  21.         processFile(file_name)
  22.  
  23. def has_class_but_no_id(tag): #method to return all <p></p> tags and contents
  24.     return tag.has_attr('class') and not tag.has_attr('id')
  25.  
  26. def processFile(filename):
  27.     soup = BeautifulSoup(open(filename), 'html.parser')
  28.     list = soup.find_all(has_class_but_no_id)
  29.     for tag in list:
  30.         tags.append(str(tag))
  31.     for result in tags:
  32.         if result.find("iot platform") != -1:
  33.             print(result.encode("utf-8"))
  34.  
  35. if __name__ == '__main__':
  36.     if (len(sys.argv) > 1):
  37.         processFiles(sys.argv)
  38.     else:
  39.         print ("usage message")
Advertisement
Add Comment
Please, Sign In to add comment