Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from urllib import request
- from bs4 import BeautifulSoup
- import requests
- from urllib.parse import urljoin
- import openpyxl as op
- import datetime
- import time
- def change_window(browser):
- all_handles = set(browser.window_handles)
- switch_to = all_handles - set([browser.current_window_handle])
- assert len(switch_to) == 1
- browser.switch_to.window(*switch_to)
- def main():
- for i in range(1,9):
- wb = op.load_workbook('一般名称.xlsx')
- ws = wb.active
- word = ws['A'+str(i)].value
- driver = webdriver.Chrome(r'C:/chromedriver.exe')
- driver.get("https://www.pmda.go.jp/PmdaSearch/kikiSearch/")
- #id検索
- elem_search_word = driver.find_element_by_id("txtName")
- elem_search_word.send_keys(word)
- #name検索
- elem_search_btn = driver.find_element_by_name('btnA')
- elem_search_btn.click()
- change_window(driver)
- #print(driver.page_source)
- cur_url = driver.current_url
- html = driver.page_source
- soup = BeautifulSoup(html,'html.parser')
- #print(cur_url)
- has_pdf_link = False
- print(word)
- wb = op.load_workbook('URL_DATA.xlsx')
- ws = wb.active
- ws['C'+str(i)].value = word
- for a_tag in soup.find_all('a'):
- link_pdf = (urljoin(cur_url, a_tag.get('href')))
- #link_PDFから文末がpdfと文中にPDFが入っているものを抽出
- #print(word)
- if (not link_pdf.lower().endswith('.pdf')) and ('/ResultDataSetPDF/' not in link_pdf):
- continue
- if ('searchhelp' not in link_pdf):
- has_pdf_link = True
- print(link_pdf)
- ws['B'+str(i)].value = link_pdf
- if not has_pdf_link:
- print('False')
- ws['B'+str(i)].value = has_pdf_link
- time.sleep(2)
- time_data = datetime.datetime.today()
- ws['A'+str(i)].value = time_data
- #wb = op.load_workbook('URL_DATA.xlsx')
- #ws = wb.active
- #時間を記入
- #ws['A'+str(i)].value = time_data
- #URLを記入
- #ws['B'+str(i)].value = link_pdf
- #一般名称を記入
- #ws['C'+str(i)].value = word
- wb.save('URL_DATA.xlsx')
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment