Untitled

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from urllib import request
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin
import openpyxl as op
import datetime
import time


def change_window(browser):
    all_handles = set(browser.window_handles)
    switch_to = all_handles - set([browser.current_window_handle])
    assert len(switch_to) == 1
    browser.switch_to.window(*switch_to)


def main():
    for i in range(1,9):
        wb = op.load_workbook('一般名称.xlsx')
        ws = wb.active
        word = ws['A'+str(i)].value

        driver = webdriver.Chrome(r'C:/chromedriver.exe')
        driver.get("https://www.pmda.go.jp/PmdaSearch/kikiSearch/")
        #id検索
        elem_search_word = driver.find_element_by_id("txtName")
        elem_search_word.send_keys(word)
        #name検索
        elem_search_btn = driver.find_element_by_name('btnA')
        elem_search_btn.click()
        change_window(driver)

        #print(driver.page_source)
        cur_url = driver.current_url
        html = driver.page_source
        soup = BeautifulSoup(html,'html.parser')
        #print(cur_url)

        has_pdf_link = False
        print(word)

        wb = op.load_workbook('URL_DATA.xlsx')
        ws = wb.active
        ws['C'+str(i)].value = word

        for a_tag in soup.find_all('a'):
            link_pdf = (urljoin(cur_url, a_tag.get('href')))
            #link_PDFから文末がpdfと文中にPDFが入っているものを抽出
            #print(word)

            if (not link_pdf.lower().endswith('.pdf')) and ('/ResultDataSetPDF/' not in link_pdf):
                continue
            if ('searchhelp' not in link_pdf):
                has_pdf_link = True
                print(link_pdf)
                ws['B'+str(i)].value = link_pdf

        if not has_pdf_link:
            print('False')
            ws['B'+str(i)].value = has_pdf_link

            time.sleep(2)
            time_data = datetime.datetime.today()

            ws['A'+str(i)].value = time_data

        #wb = op.load_workbook('URL_DATA.xlsx')
        #ws = wb.active
            #時間を記入
        #ws['A'+str(i)].value = time_data
        #URLを記入
        #ws['B'+str(i)].value = link_pdf
        #一般名称を記入
        #ws['C'+str(i)].value = word

        wb.save('URL_DATA.xlsx')


if __name__ == "__main__":
    main()