SHARE
TWEET

Untitled

a guest Jun 16th, 2019 50 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. for zipname in zipList:
  2.   archive = zipfile.ZipFile(zipname)
  3.   fileList = archive.namelist()
  4.  
  5.   df = []
  6.   for fileName in fileList:
  7.     if fileName.endswith('.txt'):
  8.         archive.extract(fileName)
  9.         p_id = re.search('product_(.+?)_snapshot', fileName).group(1)
  10.         p_id = int(p_id)
  11.  
  12.         data = pd.read_csv(fileName, lineterminator= str(']'), low_memory= False, error_bad_lines=False, header= None)#, names= ['a','d','f','r','y','h','n','m','k'])
  13.  
  14.         new = data[1].str.split("[", n = 1, expand = True)
  15.         data[1]= new[0]
  16.         data[10]= new[1]
  17.         data.drop(data.index[-1], inplace=True)
  18.         data[10]= new[1].str.strip('[').str.strip('"')
  19.         data['id'] = p_id
  20.  
  21.         data = data.loc[:,[1,2,10,'id']]
  22.         #print(data)
  23.         data[1] = data[1].replace(r'^s*$', np.nan, regex=True)
  24.         data = data.ffill()
  25.         data = data.set_index([1]).drop(index= 'bids:').reset_index()
  26.         idx = (data[10].astype('float') - (data[10].astype('float')[0] * 1.05)).abs().idxmin()
  27.         csdf = data[2].cumsum()
  28.         data[2] = csdf
  29.         final = pd.DataFrame(data.iloc[idx]).transpose().rename(columns= {'p_id': "id"})
  30.         final['id']= final['id'].astype(int)
  31.         #print(final)
  32.         df.append(final)
  33.  
  34.   df = pd.concat(df, axis = 0)            
  35.   print(df)
  36.      
  37. 1        2          10  id
  38. 1278  asks:  96965.4  2228.31344  31
  39.       1       2       10  id
  40. 23  asks:  197716  1.07176  62
  41.       1            2       10  id
  42. 70  asks:  1.24412e+06  0.43061  84
  43.       1        2               10  id
  44. 176  asks:  184.386  135927785.19000  11
  45.      
  46. 1           2               10        id
  47. 1278     asks:     96965.4       2228.31344        31
  48. 23       asks:      197716          1.07176        62
  49. 70       asks: 1.24412e+06          0.43061        84
  50. 176      asks:     184.386  135927785.19000        11
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top