Advertisement
Guest User

Untitled

a guest
Aug 20th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.35 KB | None | 0 0
  1. import pandas as pd
  2. from bs4 import BeautifulSoup
  3. import sqlalchemy
  4. from sqlalchemy import create_engine
  5.  
  6.  
  7. path = 'd:/data/app_qcc512x_qcc302x.html'
  8. htmlfile = open(path, 'r', encoding='utf-8')
  9.  
  10. htmlhandle = htmlfile.read()
  11.  
  12.  
  13. soup = BeautifulSoup(htmlhandle, 'lxml')
  14.  
  15.  
  16.  
  17.  
  18. count = 0
  19. result = pd.DataFrame({},index=[0])
  20. result['author'] = ''
  21. result['title'] = ''
  22. result['source'] = ''
  23. new = result
  24. for item in soup.find_all('tr'):
  25. if 'AU ' in item.get_text():
  26. author = item.get_text()
  27. new['author'] = author
  28. elif 'TI ' in item.get_text():
  29. title = item.get_text()
  30. new['title'] = title
  31. elif 'SO ' in item.get_text():
  32. source = item.get_text()
  33. new['source'] = source
  34. count += 1
  35. result = result.append(new,ignore_index=True)
  36. print(count)
  37.  
  38.  
  39. connect_info = 'mysql+pymysql://{}:{}@{}:{}/{}?
  40. charset=utf8'.format("username", "password", "host", "port", "qcc")
  41. engine = create_engine(connect_info)
  42.  
  43. df.to_sql(name='app_qcc512x_qcc302x',
  44. con=engine,
  45. if_exists='append',
  46. index=False,
  47. dtype={'IterationId': sqlalchemy.types.Integer(),
  48. 'title': sqlalchemy.types.NVARCHAR(length=255)
  49. }
  50. )
  51.  
  52. File "C:UsersyangyjAppDataLocalContinuumanaconda3libsite-
  53. packagessqlalchemyengineurl.py", line 71, in __init__
  54. self.port = int(port)
  55.  
  56. ValueError: invalid literal for int() with base 10: 'port'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement