Advertisement
Guest User

Untitled

a guest
May 27th, 2015
307
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.24 KB | None | 0 0
  1. import sqlite3
  2. import glob
  3. import sas7bdat
  4.  
  5. class SASexport(sas7bdat.SAS7BDAT):
  6. def head(self, n=5):
  7. for i, row in enumerate(self.readlines()):
  8. print row
  9. if i == n:
  10. break
  11. if n > self.header.properties.row_count:
  12. print 'n exceeds the total number and here shows all rows!'
  13.  
  14. def meta(self):
  15. print self.header
  16.  
  17. def to_sqlite(self, sqlitedb):
  18. if sqlitedb is None or not isinstance(sqlitedb, str):
  19. sqlitedb = ':memory'
  20. print 'not valid output name and instead use in-memory database'
  21. cols = self.columns
  22. strs = [''] * len(cols)
  23. for i, n in enumerate(cols):
  24. if n.type == "number":
  25. if n.format in self.DATE_FORMAT_STRINGS:
  26. strs[i] = '{} date'.format(n.name)
  27. elif n.format in self.DATE_TIME_FORMAT_STRINGS:
  28. strs[i] = '{} timestamp'.format(n.name)
  29. else:
  30. strs[i] = '{} real'.format(n.name)
  31. else:
  32. strs[i] = '{} varchar({})'.format(n.name,n.length)
  33. table = self.header.properties.name
  34. cmd1 = "CREATE TABLE {} ({})".format(table, ', '.join(strs))
  35. cmd2 = 'INSERT INTO {} VALUES ( {} )'.format(table,
  36. ','.join(['?']*len(cols)))
  37. conn = sqlite3.connect(sqlitedb)
  38. c = conn.cursor()
  39. for i, line in enumerate(self.readlines()):
  40. if i == 0:
  41. c.execute('DROP TABLE IF EXISTS {}'.format(table))
  42. c.execute(cmd1)
  43. else:
  44. c.execute(cmd2, line)
  45. conn.commit()
  46. c.close()
  47.  
  48. def to_pandas(self):
  49. try:
  50. import pandas
  51. except ImportError:
  52. raise ImportError('No pandas moduleis available and this'
  53. 'method cannot used')
  54. rst = []
  55. for i, x in enumerate(self.readlines()):
  56. if i == 0:
  57. colnames = x
  58. else:
  59. rst.append(x)
  60. df = pandas.DataFrame(data = rst, columns = colnames)
  61. return df
  62.  
  63. class SASbatchexport:
  64. def __init__(self, directory):
  65. if directory is None or not isinstance(directory, str):
  66. raise ValueError('SAS library path has to be specified')
  67. self.directory = directory
  68. def to_sqlitedb(self, dest=None):
  69. """Export all SAS data sets to a SQLite database"""
  70. if dest is None or not isinstance(dest, str):
  71. print 'The output SQLite db will be name as SASOUTPUT.db'
  72. dest = 'SASOUTPUT.db'
  73. s = self.directory + '/*.sas7bdat'
  74. for sasfile in glob.glob(s):
  75. _data = SASexport(sasfile)
  76. _data.to_sqlite(dest)
  77. print 'SAS dataset {} has been successfully exported'.format( \
  78. _data.header.properties.name)
  79.  
  80. if __name__ == '__main__':
  81. # Test the export feature
  82. data = SASexport('d:/google/onedrive/class.sas7bdat')
  83. data.head(30)
  84. data.meta()
  85. data.to_sqlite('d:/tmp/test3.db')
  86. data = SASexport('d:/google/onedrive/prdsal2.sas7bdat')
  87. data.head()
  88. df = data.to_pandas()
  89. print df
  90.  
  91. # Test the batch export feature
  92. lib = SASbatchexport('d:/google/onedrive/')
  93. lib.to_sqlitedb('d:/tmp/test.db')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement