Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 00 : 123,000,444,220
- 01 : 500,000
- 0a : 30,444,555,120
- .
- .
- .
- ff : 45,002,221,222
- import pandas
- for i in range(4):
- for j in range(10):
- digits = str(i)+str(j)
- all_data = pandas.read_csv('/home/asus/data/arief_anbiya_{}.csv'.format(digits), chunksize = 100000, iterator = True) #Transform each csv data to DF.
- all_data = pandas.concat(all_data, ignore_index= True)
- all_data['account_group'] = [acc_id[0:2] for acc_id in all_data.account_id] #Adding a now column 'first_two' of each account_id (from current csv).
- dummy = all_data.groupby(by = 'account_group').sum() #Sum of 'account_balance' of current DF grouped by 'first_two' (from current csv).
- if i == 0 and j == 0: #Create the Series for the total 'account_balance' (grouped by 'first_two').
- sum_of_groupby = dummy.account_balance
- else: #Update the Series of total 'account_balance'.
- for ft in set(all_data['account_group']): # unique_ft is all unique 'first_two' (from current csv).
- try:
- sum_of_groupby[ft] += dummy.account_balance[ft]
- except:
- sum_of_groupby[ft] = dummy.account_balance[ft]
- print('Done adding : arief_anbiya_{}.csv'.format(digits))
Add Comment
Please, Sign In to add comment