Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import xml.etree.ElementTree as ET
- import pandas as pd
- from sqlalchemy import create_engine
- from sqlalchemy.orm import sessionmaker, scoped_session
- # do this if running in jupyter
- # pd.set_option('display.max_columns', None)
- # convert XML to dataframe (assumes only one layer of nesting)
- def xml2df(xml_data):
- root = ET.XML(xml_data) # element tree
- all_records = []
- for i, child in enumerate(root):
- record = {}
- for subchild in child:
- record[subchild.tag] = subchild.text
- all_records.append(record)
- df = pd.DataFrame(all_records)
- # how to make datetimes from unix epoch ints
- df['CreatedTimestamp'] = pd.to_datetime(df['CreatedDate'], unit='s')
- df['ModifiedTimestamp'] = pd.to_datetime(df['ModifiedDate'], unit='s')
- return df
- # load XML to dataframe (gotta be small)
- xml_data = open('example.xml').read()
- df = xml2df(xml_data)
- # export dataframe to sqlite
- engine = create_engine('sqlite:///example.db')
- name = 'my_table'
- df.to_sql(name, engine, if_exists='replace')
- # see how many records loaded
- Session = scoped_session(sessionmaker(bind=engine))
- s = Session()
- result = s.execute('SELECT COUNT(*) FROM my_table').fetchall()
- print("{} records loaded".format(result[0][0]))
Add Comment
Please, Sign In to add comment