Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from bs4 import BeautifulSoup
- import sqlalchemy
- from sqlalchemy import create_engine
- path = 'd:/data/app_qcc512x_qcc302x.html'
- htmlfile = open(path, 'r', encoding='utf-8')
- htmlhandle = htmlfile.read()
- soup = BeautifulSoup(htmlhandle, 'lxml')
- count = 0
- result = pd.DataFrame({},index=[0])
- result['author'] = ''
- result['title'] = ''
- result['source'] = ''
- new = result
- for item in soup.find_all('tr'):
- if 'AU ' in item.get_text():
- author = item.get_text()
- new['author'] = author
- elif 'TI ' in item.get_text():
- title = item.get_text()
- new['title'] = title
- elif 'SO ' in item.get_text():
- source = item.get_text()
- new['source'] = source
- count += 1
- result = result.append(new,ignore_index=True)
- print(count)
- connect_info = 'mysql+pymysql://{}:{}@{}:{}/{}?
- charset=utf8'.format("username", "password", "host", "port", "qcc")
- engine = create_engine(connect_info)
- df.to_sql(name='app_qcc512x_qcc302x',
- con=engine,
- if_exists='append',
- index=False,
- dtype={'IterationId': sqlalchemy.types.Integer(),
- 'title': sqlalchemy.types.NVARCHAR(length=255)
- }
- )
- File "C:UsersyangyjAppDataLocalContinuumanaconda3libsite-
- packagessqlalchemyengineurl.py", line 71, in __init__
- self.port = int(port)
- ValueError: invalid literal for int() with base 10: 'port'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement