Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sqlite3
- import glob
- import sas7bdat
- class SASexport(sas7bdat.SAS7BDAT):
- def head(self, n=5):
- for i, row in enumerate(self.readlines()):
- print row
- if i == n:
- break
- if n > self.header.properties.row_count:
- print 'n exceeds the total number and here shows all rows!'
- def meta(self):
- print self.header
- def to_sqlite(self, sqlitedb):
- if sqlitedb is None or not isinstance(sqlitedb, str):
- sqlitedb = ':memory'
- print 'not valid output name and instead use in-memory database'
- cols = self.columns
- strs = [''] * len(cols)
- for i, n in enumerate(cols):
- if n.type == "number":
- if n.format in self.DATE_FORMAT_STRINGS:
- strs[i] = '{} date'.format(n.name)
- elif n.format in self.DATE_TIME_FORMAT_STRINGS:
- strs[i] = '{} timestamp'.format(n.name)
- else:
- strs[i] = '{} real'.format(n.name)
- else:
- strs[i] = '{} varchar({})'.format(n.name,n.length)
- table = self.header.properties.name
- cmd1 = "CREATE TABLE {} ({})".format(table, ', '.join(strs))
- cmd2 = 'INSERT INTO {} VALUES ( {} )'.format(table,
- ','.join(['?']*len(cols)))
- conn = sqlite3.connect(sqlitedb)
- c = conn.cursor()
- for i, line in enumerate(self.readlines()):
- if i == 0:
- c.execute('DROP TABLE IF EXISTS {}'.format(table))
- c.execute(cmd1)
- else:
- c.execute(cmd2, line)
- conn.commit()
- c.close()
- def to_pandas(self):
- try:
- import pandas
- except ImportError:
- raise ImportError('No pandas moduleis available and this'
- 'method cannot used')
- rst = []
- for i, x in enumerate(self.readlines()):
- if i == 0:
- colnames = x
- else:
- rst.append(x)
- df = pandas.DataFrame(data = rst, columns = colnames)
- return df
- class SASbatchexport:
- def __init__(self, directory):
- if directory is None or not isinstance(directory, str):
- raise ValueError('SAS library path has to be specified')
- self.directory = directory
- def to_sqlitedb(self, dest=None):
- """Export all SAS data sets to a SQLite database"""
- if dest is None or not isinstance(dest, str):
- print 'The output SQLite db will be name as SASOUTPUT.db'
- dest = 'SASOUTPUT.db'
- s = self.directory + '/*.sas7bdat'
- for sasfile in glob.glob(s):
- _data = SASexport(sasfile)
- _data.to_sqlite(dest)
- print 'SAS dataset {} has been successfully exported'.format( \
- _data.header.properties.name)
- if __name__ == '__main__':
- # Test the export feature
- data = SASexport('d:/google/onedrive/class.sas7bdat')
- data.head(30)
- data.meta()
- data.to_sqlite('d:/tmp/test3.db')
- data = SASexport('d:/google/onedrive/prdsal2.sas7bdat')
- data.head()
- df = data.to_pandas()
- print df
- # Test the batch export feature
- lib = SASbatchexport('d:/google/onedrive/')
- lib.to_sqlitedb('d:/tmp/test.db')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement