Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- for zipname in zipList:
- archive = zipfile.ZipFile(zipname)
- fileList = archive.namelist()
- df = []
- for fileName in fileList:
- if fileName.endswith('.txt'):
- archive.extract(fileName)
- p_id = re.search('product_(.+?)_snapshot', fileName).group(1)
- p_id = int(p_id)
- data = pd.read_csv(fileName, lineterminator= str(']'), low_memory= False, error_bad_lines=False, header= None)#, names= ['a','d','f','r','y','h','n','m','k'])
- new = data[1].str.split("[", n = 1, expand = True)
- data[1]= new[0]
- data[10]= new[1]
- data.drop(data.index[-1], inplace=True)
- data[10]= new[1].str.strip('[').str.strip('"')
- data['id'] = p_id
- data = data.loc[:,[1,2,10,'id']]
- #print(data)
- data[1] = data[1].replace(r'^s*$', np.nan, regex=True)
- data = data.ffill()
- data = data.set_index([1]).drop(index= 'bids:').reset_index()
- idx = (data[10].astype('float') - (data[10].astype('float')[0] * 1.05)).abs().idxmin()
- csdf = data[2].cumsum()
- data[2] = csdf
- final = pd.DataFrame(data.iloc[idx]).transpose().rename(columns= {'p_id': "id"})
- final['id']= final['id'].astype(int)
- #print(final)
- df.append(final)
- df = pd.concat(df, axis = 0)
- print(df)
- 1 2 10 id
- 1278 asks: 96965.4 2228.31344 31
- 1 2 10 id
- 23 asks: 197716 1.07176 62
- 1 2 10 id
- 70 asks: 1.24412e+06 0.43061 84
- 1 2 10 id
- 176 asks: 184.386 135927785.19000 11
- 1 2 10 id
- 1278 asks: 96965.4 2228.31344 31
- 23 asks: 197716 1.07176 62
- 70 asks: 1.24412e+06 0.43061 84
- 176 asks: 184.386 135927785.19000 11
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement