Advertisement
Guest User

Untitled

a guest
Dec 13th, 2017
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.92 KB | None | 0 0
  1. '''
  2. author: J.S.
  3.  
  4. program to take a list of IPs from an xlsx file,
  5. process the IPs through a web scraper,
  6. then output the desired datapoints in a new xlsx file
  7.  
  8. '''
  9.  
  10. ## import the necessary python libraries
  11. from openpyxl import Workbook, load_workbook
  12. import requests, lxml.html
  13. from bs4 import BeautifulSoup
  14. from tkinter import *
  15. from tkinter import filedialog
  16. from tkinter import messagebox
  17.  
  18. ## open the data source file
  19.  
  20. messagebox.showinfo("Source File", "Please select your input source file")
  21. inputFile = Tk()
  22. inputFile.filename =  filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("excel files","*.xlsx"),("all files","*.*")))
  23.  
  24. ## assigns the selected file as the source
  25. wb = Workbook()
  26. wb = load_workbook(inputFile.filename)
  27. source = wb.active
  28.  
  29. ## open the output target file
  30. messagebox.showinfo("Output File", "Please define your output target file")
  31.  
  32. outputFile = Tk()
  33. outputFile.filename = filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("excel files","*.xlsx"),("all files","*.*")))
  34.  
  35. ## open the data output file
  36. openDest = Workbook()
  37. openDest = load_workbook(outputFile.filename)
  38. destination = openDest.active
  39.  
  40. for col in source.iter_rows(min_row=0, max_row=2, max_col=1): ## loops through the rows in the source file
  41.  
  42.     cell = col[0]## initializes the cell variable as index zero
  43.  
  44.     url = str(cell.value)## defines the url based on the value of the current cell in the source file
  45.  
  46.     s = requests.session()## defines the session object as 's'
  47.    
  48.     login = s.get("http://" + url + "/login.asp")## creates a session begin, and navigates to the login page
  49.    
  50.     login_html = lxml.html.fromstring(login.text)## parses the login page for processing
  51.    
  52.     inputs = login_html.xpath(r'//form//input')## declares the login form field xpath
  53.    
  54.     form = {x.attrib["name"]: x.attrib["value"] for x in inputs}## attribute variables set for form fields
  55.  
  56.     ## defines the username and password for login
  57.     form['username'] = 'root'
  58.     form['password'] = 'crown'
  59.  
  60.     ## posts the username and password to the form
  61.     response = s.post("http://" + url + "/login.asp", data=form)
  62.  
  63.     ## navigates to the system info page to scrape the data
  64.     r = s.get("http://" + url + "/sysinf.asp")
  65.    
  66.     ## defines the soup as the page text, and provides the parser argument
  67.     soup = BeautifulSoup(r.content, "html.parser")
  68.  
  69.     ## strips out all unnecessary string data, and stores the Model, Serial, and Firmware information
  70.     modelInfo = soup.table.text[8:22]
  71.     serial = soup.table.text[105:112]
  72.     firmWare = soup.table.text[133:141]
  73.    
  74.     destination.append(list([url, modelInfo, serial, firmWare]))## appends the data to the output target file
  75.  
  76. print('populating the output file...')
  77.            
  78. openDest.save('digiDataOutput.xlsx')## saves the output targer document
  79.  
  80. print("done!") ## print completion message
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement