Advertisement
Guest User

Untitled

a guest
Jun 16th, 2019
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.83 KB | None | 0 0
  1. for zipname in zipList:
  2. archive = zipfile.ZipFile(zipname)
  3. fileList = archive.namelist()
  4.  
  5. df = []
  6. for fileName in fileList:
  7. if fileName.endswith('.txt'):
  8. archive.extract(fileName)
  9. p_id = re.search('product_(.+?)_snapshot', fileName).group(1)
  10. p_id = int(p_id)
  11.  
  12. data = pd.read_csv(fileName, lineterminator= str(']'), low_memory= False, error_bad_lines=False, header= None)#, names= ['a','d','f','r','y','h','n','m','k'])
  13.  
  14. new = data[1].str.split("[", n = 1, expand = True)
  15. data[1]= new[0]
  16. data[10]= new[1]
  17. data.drop(data.index[-1], inplace=True)
  18. data[10]= new[1].str.strip('[').str.strip('"')
  19. data['id'] = p_id
  20.  
  21. data = data.loc[:,[1,2,10,'id']]
  22. #print(data)
  23. data[1] = data[1].replace(r'^s*$', np.nan, regex=True)
  24. data = data.ffill()
  25. data = data.set_index([1]).drop(index= 'bids:').reset_index()
  26. idx = (data[10].astype('float') - (data[10].astype('float')[0] * 1.05)).abs().idxmin()
  27. csdf = data[2].cumsum()
  28. data[2] = csdf
  29. final = pd.DataFrame(data.iloc[idx]).transpose().rename(columns= {'p_id': "id"})
  30. final['id']= final['id'].astype(int)
  31. #print(final)
  32. df.append(final)
  33.  
  34. df = pd.concat(df, axis = 0)
  35. print(df)
  36.  
  37. 1 2 10 id
  38. 1278 asks: 96965.4 2228.31344 31
  39. 1 2 10 id
  40. 23 asks: 197716 1.07176 62
  41. 1 2 10 id
  42. 70 asks: 1.24412e+06 0.43061 84
  43. 1 2 10 id
  44. 176 asks: 184.386 135927785.19000 11
  45.  
  46. 1 2 10 id
  47. 1278 asks: 96965.4 2228.31344 31
  48. 23 asks: 197716 1.07176 62
  49. 70 asks: 1.24412e+06 0.43061 84
  50. 176 asks: 184.386 135927785.19000 11
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement