Guest User

Untitled

a guest
Jan 16th, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.22 KB | None | 0 0
  1. import xml.etree.ElementTree as ET
  2. import pandas as pd
  3. from sqlalchemy import create_engine
  4. from sqlalchemy.orm import sessionmaker, scoped_session
  5.  
  6. # do this if running in jupyter
  7. # pd.set_option('display.max_columns', None)
  8.  
  9. # convert XML to dataframe (assumes only one layer of nesting)
  10. def xml2df(xml_data):
  11. root = ET.XML(xml_data) # element tree
  12. all_records = []
  13. for i, child in enumerate(root):
  14. record = {}
  15. for subchild in child:
  16. record[subchild.tag] = subchild.text
  17. all_records.append(record)
  18. df = pd.DataFrame(all_records)
  19.  
  20. # how to make datetimes from unix epoch ints
  21. df['CreatedTimestamp'] = pd.to_datetime(df['CreatedDate'], unit='s')
  22. df['ModifiedTimestamp'] = pd.to_datetime(df['ModifiedDate'], unit='s')
  23.  
  24. return df
  25.  
  26. # load XML to dataframe (gotta be small)
  27. xml_data = open('example.xml').read()
  28. df = xml2df(xml_data)
  29.  
  30. # export dataframe to sqlite
  31. engine = create_engine('sqlite:///example.db')
  32. name = 'my_table'
  33. df.to_sql(name, engine, if_exists='replace')
  34.  
  35. # see how many records loaded
  36. Session = scoped_session(sessionmaker(bind=engine))
  37. s = Session()
  38. result = s.execute('SELECT COUNT(*) FROM my_table').fetchall()
  39. print("{} records loaded".format(result[0][0]))
Add Comment
Please, Sign In to add comment