Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- author: J.S.
- program to take a list of IPs from an xlsx file,
- process the IPs through a web scraper,
- then output the desired datapoints in a new xlsx file
- '''
- ## import the necessary python libraries
- from openpyxl import Workbook, load_workbook
- import requests, lxml.html
- from bs4 import BeautifulSoup
- from tkinter import *
- from tkinter import filedialog
- from tkinter import messagebox
- ## open the data source file
- messagebox.showinfo("Source File", "Please select your input source file")
- inputFile = Tk()
- inputFile.filename = filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("excel files","*.xlsx"),("all files","*.*")))
- ## assigns the selected file as the source
- wb = Workbook()
- wb = load_workbook(inputFile.filename)
- source = wb.active
- ## open the output target file
- messagebox.showinfo("Output File", "Please define your output target file")
- outputFile = Tk()
- outputFile.filename = filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("excel files","*.xlsx"),("all files","*.*")))
- ## open the data output file
- openDest = Workbook()
- openDest = load_workbook(outputFile.filename)
- destination = openDest.active
- for col in source.iter_rows(min_row=0, max_row=2, max_col=1): ## loops through the rows in the source file
- cell = col[0]## initializes the cell variable as index zero
- url = str(cell.value)## defines the url based on the value of the current cell in the source file
- s = requests.session()## defines the session object as 's'
- login = s.get("http://" + url + "/login.asp")## creates a session begin, and navigates to the login page
- login_html = lxml.html.fromstring(login.text)## parses the login page for processing
- inputs = login_html.xpath(r'//form//input')## declares the login form field xpath
- form = {x.attrib["name"]: x.attrib["value"] for x in inputs}## attribute variables set for form fields
- ## defines the username and password for login
- form['username'] = 'root'
- form['password'] = 'crown'
- ## posts the username and password to the form
- response = s.post("http://" + url + "/login.asp", data=form)
- ## navigates to the system info page to scrape the data
- r = s.get("http://" + url + "/sysinf.asp")
- ## defines the soup as the page text, and provides the parser argument
- soup = BeautifulSoup(r.content, "html.parser")
- ## strips out all unnecessary string data, and stores the Model, Serial, and Firmware information
- modelInfo = soup.table.text[8:22]
- serial = soup.table.text[105:112]
- firmWare = soup.table.text[133:141]
- destination.append(list([url, modelInfo, serial, firmWare]))## appends the data to the output target file
- print('populating the output file...')
- openDest.save('digiDataOutput.xlsx')## saves the output targer document
- print("done!") ## print completion message
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement