Advertisement
Guest User

Python SPSSIO code fragment

a guest
Mar 16th, 2011
233
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.17 KB | None | 0 0
  1. import os, ctypes, datetime
  2.  
  3. global retcodes
  4. retcodes = {0: "SPSS_OK",
  5.             1: "SPSS_FITAB_FULL",
  6.             2: "SPSS_FILE_OERROR",
  7.             3: "SPSS_NO_MEMORY",
  8.             4: "SPSS_FILE_RERROR",
  9.             5: "SPSS_INVALID_FILE",
  10.             6: "SPSS_NO_TYPE2",
  11.             7: "SPSS_NO_TYPE999"}
  12.  
  13. global printTypes
  14. printTypes =  {1: ('SPSS_FMT_A', 'Alphanumeric'),
  15.                2: ('SPSS_FMT_AHEX', 'Alphanumeric hexadecimal'),
  16.                3: ('SPSS_FMT_COMMA', 'F Format with commas'),
  17.                4: ('SPSS_FMT_DOLLAR', 'Commas and floating dollar sign'),
  18.                5: ('SPSS_FMT_F', 'Default Numeric Format'),
  19.                6: ('SPSS_FMT_IB', 'Integer binary'),
  20.                7: ('SPSS_FMT_PIBHEX', 'Positive integer binary - hex'),
  21.                8: ('SPSS_FMT_P', 'Packed decimal'),
  22.                9: ('SPSS_FMT_PIB', 'Positive integer binary unsigned'),
  23.                10: ('SPSS_FMT_PK', 'Positive integer binary unsigned'),
  24.                11: ('SPSS_FMT_RB', 'Floating point binary'),
  25.                12: ('SPSS_FMT_RBHEX', 'Floating point binary hex'),
  26.                15: ('SPSS_FMT_Z', 'Zoned decimal'),
  27.                16: ('SPSS_FMT_N', 'N Format- unsigned with leading 0s'),
  28.                17: ('SPSS_FMT_E', 'E Format- with explicit power of 10'),
  29.                20: ('SPSS_FMT_DATE', 'Date format dd-mmm-yyyy'),
  30.                21: ('SPSS_FMT_TIME', 'Time format hh:mm:ss.s'),
  31.                22: ('SPSS_FMT_ADATE', 'Date and Time'),
  32.                23: ('SPSS_FMT_JDATE', 'Date format dd-mmm-yyyy'),
  33.                24: ('SPSS_FMT_DTIME', 'Julian date - yyyyddd'),
  34.                25: ('SPSS_FMT_WKDAY', 'Date-time dd hh:mm:ss.s'),
  35.                26: ('SPSS_FMT_MONTH', 'Day of the week'),
  36.                27: ('SPSS_FMT_MOYR', 'Month'),
  37.                28: ('SPSS_FMT_QYR', 'mmm yyyy'),
  38.                29: ('SPSS_FMT_WKYR', 'q Q yyyy'),
  39.                30: ('SPSS_FMT_PCT', 'ww WK yyyy'),
  40.                31: ('SPSS_FMT_DOT', 'Percent - F followed by %'),
  41.                32: ('SPSS_FMT_CCA', 'Like COMMA, switching dot for comma'),
  42.                33: ('SPSS_FMT_CCB', 'User Programmable currency format'),
  43.                34: ('SPSS_FMT_CCC', 'User Programmable currency format'),
  44.                35: ('SPSS_FMT_CCD', 'User Programmable currency format'),
  45.                36: ('SPSS_FMT_CCE', 'User Programmable currency format'),
  46.                37: ('SPSS_FMT_EDATE', 'User Programmable currency format'),
  47.                38: ('SPSS_FMT_SDATE', 'Date in dd/mm/yyyy style')}
  48.  
  49. def loadSavFile(fn):
  50.     os.environ["PATH"] += ";" + os.path.abspath(os.curdir)
  51.     ctypes.cdll.LoadLibrary("spssio32.dll")
  52.     spssio = ctypes.windll.spssio32
  53.     libc = ctypes.cdll.msvcrt
  54.  
  55.     if os.path.exists(fn):
  56.         fh = libc._fdopen(fn, "rb")
  57.         fhPtr = ctypes.pointer(ctypes.c_int(fh))
  58.         retcode = spssio.spssOpenRead(ctypes.c_char_p(fn), fhPtr)
  59.         return retcode, spssio, fh
  60.     else:
  61.         raise Exception, "File '%s' does not exist!" % fn
  62.  
  63.  
  64. def getNumberofVariables(fh, spssio):
  65.     numVarsPtr = ctypes.pointer(ctypes.c_int())
  66.     retcode = spssio.spssGetNumberofVariables(fh, numVarsPtr)
  67.     numVars = numVarsPtr[0]
  68.     #print "Numvars: ", numVars
  69.     return retcode, numVars
  70.  
  71. def getVarNamesAndTypes(fh, spssio):
  72.     numVarsPtr = ctypes.pointer(ctypes.c_int())
  73.     spssio.spssGetNumberofVariables(fh, numVarsPtr)
  74.     numVars = numVarsPtr[0]
  75.    
  76.     varNamesArray = (ctypes.c_char_p * numVars)() ## this might not be initialized correctly
  77.     varNamesPtr = ctypes.pointer(varNamesArray)
  78.  
  79.     varTypesArray = (ctypes.c_int * numVars)()
  80.     varTypesPtr = ctypes.pointer(varTypesArray)
  81.    
  82.     retcode = spssio.spssGetVarNames(fh, numVarsPtr, varNamesPtr, varTypesPtr)
  83.  
  84.     varNames = [varNamesPtr[0][i] for i in range(numVars)]
  85.     varTypes = [varTypesPtr[0][i] for i in range(numVars)]
  86.        
  87.     #print "Varnames: ", retcode, varNames, varTypes
  88.     return retcode, varNames, varTypes
  89.  
  90. def getNumberofCases(fh, spssio):
  91.     numofCasesPtr = ctypes.pointer(ctypes.c_long())
  92.     retcode = spssio.spssGetNumberofCases(fh, numofCasesPtr)
  93.     nCases = numofCasesPtr[0]
  94.     print "nCases:", retcode, nCases
  95.     return retcode, nCases
  96.  
  97. def getVarHandle(fh, spssio, variable):
  98.     varName = ctypes.c_char_p(variable)
  99.     varHandlePtr = ctypes.pointer(ctypes.c_double())
  100.     retcode = spssio.spssGetVarHandle(fh, varName, varHandlePtr)
  101.     varHandle = varHandlePtr[0]
  102.     #print "varHandle", retcode, varHandle
  103.     return retcode, varHandle
  104.  
  105. def getVarAttributes(fh, spssio, variable="educ"):
  106.     varName = ctypes.c_char_p(variable)
  107.     attribNamesPtr = ctypes.pointer(ctypes.c_char())
  108.     attribTextPtr = ctypes.pointer(ctypes.c_char())
  109.     nAttributesPtr = ctypes.pointer(ctypes.c_int())
  110.     retcode = spssio.spssGetVarAttributes(fh, varName, attribNamesPtr, attribTextPtr, nAttributesPtr)
  111.     #print "VarAttributes", retcode, attribNamesPtr[0], attribTextPtr[0], nAttributesPtr[0]
  112.     return retcode, attribNamesPtr[0], attribTextPtr[0], nAttributesPtr[0]
  113.  
  114. def getValueNumeric(fh, spssio, varHandle):
  115.     numValuePtr = ctypes.pointer(ctypes.c_double())
  116.     retcode = spssio.spssGetValueNumeric(fh,
  117.                                ctypes.c_double(varHandle),
  118.                                numValuePtr)
  119.     numValue = numValuePtr[0]
  120.     #print "Numerical value: ", numValue
  121.     return retcode, numValue
  122.  
  123. def getValueChar(fh, spssio, varHandle):
  124.     strSize = 200
  125.     charValuePtr = ctypes.pointer(ctypes.c_char())
  126.     retcode = spssio.spssGetValueChar(fh,
  127.                             ctypes.c_double(varHandle),
  128.                             charValuePtr,
  129.                             ctypes.c_int(strSize))
  130.     charValue = charValuePtr[0]
  131.     #print "Character value: ", retcode, charValue
  132.     return retcode, charValue
  133.  
  134. def getVarPrintFormat(fh, spssio, variable="educ"):
  135.     varName = ctypes.c_char_p(variable)
  136.     printTypePtr = ctypes.pointer(ctypes.c_int())
  137.     printDecPtr = ctypes.pointer(ctypes.c_int())
  138.     printWidPtr = ctypes.pointer(ctypes.c_int())
  139.     retcode = spssio.spssGetVarPrintFormat(fh,
  140.                             varName,
  141.                             printTypePtr,
  142.                             printDecPtr,
  143.                             printWidPtr)
  144.     printType = printTypePtr[0]
  145.     printDec = printDecPtr[0]
  146.     printWid = printWidPtr[0]      
  147.     return retcode, printType, printDec, printWid
  148.  
  149. def spss2IsoDate(spssDateValue, missingDateValue=""):
  150.     gregorian = datetime.datetime(1582, 10, 14, 0, 0, 0)
  151.     try:
  152.         theDate = gregorian + datetime.timedelta(seconds=spssDateValue)
  153.         return datetime.date.isoformat(theDate)
  154.     except OverflowError:
  155.         return missingDateValue
  156.    
  157.  
  158. def savReader(savFileName):
  159.     debug = True
  160.     retcode, spssio, fh = loadSavFile(savFileName)
  161.     if retcodes[retcode] == "SPSS_OK":
  162.         numVars = getNumberofVariables(fh, spssio)[1]
  163.         varNames, varTypes = getVarNamesAndTypes(fh, spssio)[1:]
  164.         nCases = getNumberofCases(fh, spssio)[1]
  165.         varHandle = getVarHandle(fh, spssio, variable = "educ")[1]
  166.         attribNames, attribText, nAttributes = getVarAttributes(fh, spssio, variable="educ")[1:] # untested.
  167.  
  168.         print "*" * 70
  169.         print "File '%s' has %s columns (variables) and %s rows (%s values)" % \
  170.               (savFileName, numVars, nCases, numVars * nCases)
  171.         print "*" * 70
  172.  
  173.         varNames = ['id', 'gender', 'bdate', 'educ', 'jobcat', 'salary',
  174.                     'salbegin', 'jobtime', 'prevexp', 'minority']
  175. ##        varNames =  ['weight', 'mens', 'fast', 'binge', 'vomit', 'purge',
  176. ##                     'hyper', 'fami', 'eman', 'frie', 'school', 'satt',
  177. ##                     'sbeh', 'mood', 'preo', 'body', 'time', 'diag', 'tidi',
  178. ##                     'number', 'diag2', 'time2']
  179.         varNames = varNames[1:] ## first/zeroeth var causes ERROR -- why???
  180.         for i in range(nCases):
  181.             spssio.spssReadCaseRecord(fh)
  182.             record = []
  183.             for varName, varType in zip(varNames, varTypes):
  184.                 varHandle = getVarHandle(fh, spssio, variable=varName)[1]
  185.                 printType = getVarPrintFormat(fh, spssio, variable=varName)[1]
  186.                 if varType == 0:
  187.                     value = getValueNumeric(fh, spssio, varHandle)[1]
  188.                     if printTypes[printType][0] == 'SPSS_FMT_DATE':
  189.                         value = spss2IsoDate(value)
  190.                 else:
  191.                     value = getValueChar(fh, spssio, varHandle)[1]
  192.                 #if i % 100 == 0 and debug:
  193.                 #    print "Value/varName", value, varName
  194.                 record.append(value)
  195.  
  196.             if debug and i % 100 == 0:        
  197.                 print "record", i+1, record
  198.  
  199.             yield record
  200.         spssio.spssCloseRead(fh)
  201.  
  202.     else:
  203.         try:
  204.             print "Error", retcodes[retcode]
  205.         except KeyError:
  206.             print "Unknown error code (%s)" % retcode
  207.         finally:
  208.             raise Exception, "You fail!"
  209.  
  210. if __name__ == "__main__":
  211.     sav = savReader(r"C:\Program Files\SPSS Evaluation\Employee data.sav")
  212.     #sav = savReader(r"C:\Program Files\SPSS Evaluation\anorectic.sav")
  213.     for record in sav:
  214.         pass
  215.         #print record
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement