Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os, ctypes, datetime
- global retcodes
- retcodes = {0: "SPSS_OK",
- 1: "SPSS_FITAB_FULL",
- 2: "SPSS_FILE_OERROR",
- 3: "SPSS_NO_MEMORY",
- 4: "SPSS_FILE_RERROR",
- 5: "SPSS_INVALID_FILE",
- 6: "SPSS_NO_TYPE2",
- 7: "SPSS_NO_TYPE999"}
- global printTypes
- printTypes = {1: ('SPSS_FMT_A', 'Alphanumeric'),
- 2: ('SPSS_FMT_AHEX', 'Alphanumeric hexadecimal'),
- 3: ('SPSS_FMT_COMMA', 'F Format with commas'),
- 4: ('SPSS_FMT_DOLLAR', 'Commas and floating dollar sign'),
- 5: ('SPSS_FMT_F', 'Default Numeric Format'),
- 6: ('SPSS_FMT_IB', 'Integer binary'),
- 7: ('SPSS_FMT_PIBHEX', 'Positive integer binary - hex'),
- 8: ('SPSS_FMT_P', 'Packed decimal'),
- 9: ('SPSS_FMT_PIB', 'Positive integer binary unsigned'),
- 10: ('SPSS_FMT_PK', 'Positive integer binary unsigned'),
- 11: ('SPSS_FMT_RB', 'Floating point binary'),
- 12: ('SPSS_FMT_RBHEX', 'Floating point binary hex'),
- 15: ('SPSS_FMT_Z', 'Zoned decimal'),
- 16: ('SPSS_FMT_N', 'N Format- unsigned with leading 0s'),
- 17: ('SPSS_FMT_E', 'E Format- with explicit power of 10'),
- 20: ('SPSS_FMT_DATE', 'Date format dd-mmm-yyyy'),
- 21: ('SPSS_FMT_TIME', 'Time format hh:mm:ss.s'),
- 22: ('SPSS_FMT_ADATE', 'Date and Time'),
- 23: ('SPSS_FMT_JDATE', 'Date format dd-mmm-yyyy'),
- 24: ('SPSS_FMT_DTIME', 'Julian date - yyyyddd'),
- 25: ('SPSS_FMT_WKDAY', 'Date-time dd hh:mm:ss.s'),
- 26: ('SPSS_FMT_MONTH', 'Day of the week'),
- 27: ('SPSS_FMT_MOYR', 'Month'),
- 28: ('SPSS_FMT_QYR', 'mmm yyyy'),
- 29: ('SPSS_FMT_WKYR', 'q Q yyyy'),
- 30: ('SPSS_FMT_PCT', 'ww WK yyyy'),
- 31: ('SPSS_FMT_DOT', 'Percent - F followed by %'),
- 32: ('SPSS_FMT_CCA', 'Like COMMA, switching dot for comma'),
- 33: ('SPSS_FMT_CCB', 'User Programmable currency format'),
- 34: ('SPSS_FMT_CCC', 'User Programmable currency format'),
- 35: ('SPSS_FMT_CCD', 'User Programmable currency format'),
- 36: ('SPSS_FMT_CCE', 'User Programmable currency format'),
- 37: ('SPSS_FMT_EDATE', 'User Programmable currency format'),
- 38: ('SPSS_FMT_SDATE', 'Date in dd/mm/yyyy style')}
- def loadSavFile(fn):
- os.environ["PATH"] += ";" + os.path.abspath(os.curdir)
- ctypes.cdll.LoadLibrary("spssio32.dll")
- spssio = ctypes.windll.spssio32
- libc = ctypes.cdll.msvcrt
- if os.path.exists(fn):
- fh = libc._fdopen(fn, "rb")
- fhPtr = ctypes.pointer(ctypes.c_int(fh))
- retcode = spssio.spssOpenRead(ctypes.c_char_p(fn), fhPtr)
- return retcode, spssio, fh
- else:
- raise Exception, "File '%s' does not exist!" % fn
- def getNumberofVariables(fh, spssio):
- numVarsPtr = ctypes.pointer(ctypes.c_int())
- retcode = spssio.spssGetNumberofVariables(fh, numVarsPtr)
- numVars = numVarsPtr[0]
- #print "Numvars: ", numVars
- return retcode, numVars
- def getVarNamesAndTypes(fh, spssio):
- numVarsPtr = ctypes.pointer(ctypes.c_int())
- spssio.spssGetNumberofVariables(fh, numVarsPtr)
- numVars = numVarsPtr[0]
- varNamesArray = (ctypes.c_char_p * numVars)() ## this might not be initialized correctly
- varNamesPtr = ctypes.pointer(varNamesArray)
- varTypesArray = (ctypes.c_int * numVars)()
- varTypesPtr = ctypes.pointer(varTypesArray)
- retcode = spssio.spssGetVarNames(fh, numVarsPtr, varNamesPtr, varTypesPtr)
- varNames = [varNamesPtr[0][i] for i in range(numVars)]
- varTypes = [varTypesPtr[0][i] for i in range(numVars)]
- #print "Varnames: ", retcode, varNames, varTypes
- return retcode, varNames, varTypes
- def getNumberofCases(fh, spssio):
- numofCasesPtr = ctypes.pointer(ctypes.c_long())
- retcode = spssio.spssGetNumberofCases(fh, numofCasesPtr)
- nCases = numofCasesPtr[0]
- print "nCases:", retcode, nCases
- return retcode, nCases
- def getVarHandle(fh, spssio, variable):
- varName = ctypes.c_char_p(variable)
- varHandlePtr = ctypes.pointer(ctypes.c_double())
- retcode = spssio.spssGetVarHandle(fh, varName, varHandlePtr)
- varHandle = varHandlePtr[0]
- #print "varHandle", retcode, varHandle
- return retcode, varHandle
- def getVarAttributes(fh, spssio, variable="educ"):
- varName = ctypes.c_char_p(variable)
- attribNamesPtr = ctypes.pointer(ctypes.c_char())
- attribTextPtr = ctypes.pointer(ctypes.c_char())
- nAttributesPtr = ctypes.pointer(ctypes.c_int())
- retcode = spssio.spssGetVarAttributes(fh, varName, attribNamesPtr, attribTextPtr, nAttributesPtr)
- #print "VarAttributes", retcode, attribNamesPtr[0], attribTextPtr[0], nAttributesPtr[0]
- return retcode, attribNamesPtr[0], attribTextPtr[0], nAttributesPtr[0]
- def getValueNumeric(fh, spssio, varHandle):
- numValuePtr = ctypes.pointer(ctypes.c_double())
- retcode = spssio.spssGetValueNumeric(fh,
- ctypes.c_double(varHandle),
- numValuePtr)
- numValue = numValuePtr[0]
- #print "Numerical value: ", numValue
- return retcode, numValue
- def getValueChar(fh, spssio, varHandle):
- strSize = 200
- charValuePtr = ctypes.pointer(ctypes.c_char())
- retcode = spssio.spssGetValueChar(fh,
- ctypes.c_double(varHandle),
- charValuePtr,
- ctypes.c_int(strSize))
- charValue = charValuePtr[0]
- #print "Character value: ", retcode, charValue
- return retcode, charValue
- def getVarPrintFormat(fh, spssio, variable="educ"):
- varName = ctypes.c_char_p(variable)
- printTypePtr = ctypes.pointer(ctypes.c_int())
- printDecPtr = ctypes.pointer(ctypes.c_int())
- printWidPtr = ctypes.pointer(ctypes.c_int())
- retcode = spssio.spssGetVarPrintFormat(fh,
- varName,
- printTypePtr,
- printDecPtr,
- printWidPtr)
- printType = printTypePtr[0]
- printDec = printDecPtr[0]
- printWid = printWidPtr[0]
- return retcode, printType, printDec, printWid
- def spss2IsoDate(spssDateValue, missingDateValue=""):
- gregorian = datetime.datetime(1582, 10, 14, 0, 0, 0)
- try:
- theDate = gregorian + datetime.timedelta(seconds=spssDateValue)
- return datetime.date.isoformat(theDate)
- except OverflowError:
- return missingDateValue
- def savReader(savFileName):
- debug = True
- retcode, spssio, fh = loadSavFile(savFileName)
- if retcodes[retcode] == "SPSS_OK":
- numVars = getNumberofVariables(fh, spssio)[1]
- varNames, varTypes = getVarNamesAndTypes(fh, spssio)[1:]
- nCases = getNumberofCases(fh, spssio)[1]
- varHandle = getVarHandle(fh, spssio, variable = "educ")[1]
- attribNames, attribText, nAttributes = getVarAttributes(fh, spssio, variable="educ")[1:] # untested.
- print "*" * 70
- print "File '%s' has %s columns (variables) and %s rows (%s values)" % \
- (savFileName, numVars, nCases, numVars * nCases)
- print "*" * 70
- varNames = ['id', 'gender', 'bdate', 'educ', 'jobcat', 'salary',
- 'salbegin', 'jobtime', 'prevexp', 'minority']
- ## varNames = ['weight', 'mens', 'fast', 'binge', 'vomit', 'purge',
- ## 'hyper', 'fami', 'eman', 'frie', 'school', 'satt',
- ## 'sbeh', 'mood', 'preo', 'body', 'time', 'diag', 'tidi',
- ## 'number', 'diag2', 'time2']
- varNames = varNames[1:] ## first/zeroeth var causes ERROR -- why???
- for i in range(nCases):
- spssio.spssReadCaseRecord(fh)
- record = []
- for varName, varType in zip(varNames, varTypes):
- varHandle = getVarHandle(fh, spssio, variable=varName)[1]
- printType = getVarPrintFormat(fh, spssio, variable=varName)[1]
- if varType == 0:
- value = getValueNumeric(fh, spssio, varHandle)[1]
- if printTypes[printType][0] == 'SPSS_FMT_DATE':
- value = spss2IsoDate(value)
- else:
- value = getValueChar(fh, spssio, varHandle)[1]
- #if i % 100 == 0 and debug:
- # print "Value/varName", value, varName
- record.append(value)
- if debug and i % 100 == 0:
- print "record", i+1, record
- yield record
- spssio.spssCloseRead(fh)
- else:
- try:
- print "Error", retcodes[retcode]
- except KeyError:
- print "Unknown error code (%s)" % retcode
- finally:
- raise Exception, "You fail!"
- if __name__ == "__main__":
- sav = savReader(r"C:\Program Files\SPSS Evaluation\Employee data.sav")
- #sav = savReader(r"C:\Program Files\SPSS Evaluation\anorectic.sav")
- for record in sav:
- pass
- #print record
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement