Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- data = [
- 'TTMNPMGN=31.28954;NLOW=307.97567;TTMPRCFPS=27.16872',
- 'NLOW=307.97567;TTMPRCFPS=27.16872;TTMGROSMGN=49.10444',
- ]
- import pandas as pd
- print('\n--- original data ---\n')
- df1 = pd.DataFrame(data)
- print(df1.head())
- print('\n--- splitted to columns ---\n')
- df2 = pd.DataFrame()
- for index, row in df1.iterrows():
- d = dict(item.split('=') for item in row[0].split(';'))
- df2 = df2.append(d, ignore_index=True)
- print(df2.head())
- print('\n--- filled missing values ---\n')
- #df2.fillna('0', inplace=True)
- df2['TTMNPMGN'].fillna('999', inplace=True)
- df2['TTMGROSMGN'].fillna('-999', inplace=True)
- print(df2.head())
- # save using "columns" to keep order of columns in file
- df2.to_csv('output.csv', index=False, columns=['TTMNPMGN', 'NLOW', 'TTMPRCFPS', 'TTMGROSMGN'])
- '''
- --- original data ---
- 0
- 0 TTMNPMGN=31.28954;NLOW=307.97567;TTMPRCFPS=27....
- 1 NLOW=307.97567;TTMPRCFPS=27.16872;TTMGROSMGN=4...
- --- splited to columns ---
- NLOW TTMNPMGN TTMPRCFPS TTMGROSMGN
- 0 307.97567 31.28954 27.16872 NaN
- 1 307.97567 NaN 27.16872 49.10444
- --- filled missing values ---
- NLOW TTMNPMGN TTMPRCFPS TTMGROSMGN
- 0 307.97567 31.28954 27.16872 -999
- 1 307.97567 999 27.16872 49.10444
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement