Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {"location":{"town":"Rome","groupe":"Advanced",
- "school":{"SchoolGroupe":"TrowMet", "SchoolName":"VeronM"}},
- "id":"145",
- "Mother":{"MotherName":"Helen","MotherAge":"46"},"NGlobalNote":2,
- "Father":{"FatherName":"Peter","FatherAge":"51"},
- "Teacher":["MrCrock","MrDaniel"],"Field":"Marketing",
- "season":["summer","spring"]}
- Groupe Id MotherName FatherName
- Advanced 56 Laure James
- Middle 11 Ann Nicolas
- Advanced 6 Helen Franc
- df = pd.DataFrame(columns=['group', 'id', 'Father', 'Mother'])
- with open (path/to/file) as f:
- for chunk in f:
- jfile = json.loads(chunk)
- if 'groupe' in jfile['location']:
- groupe = jfile['location']['groupe']
- else:
- groupe=np.nan
- if 'id' in jfile:
- id = jfile['id']
- else:
- id = np.nan
- if 'MotherName' in jfile['Mother']:
- MotherName = jfile['Mother']['MotherName']
- else:
- MotherName = np.nan
- if 'FatherName' in jfile['Father']:
- FatherName = jfile['Father']['FatherName']
- else:
- FatherName = np.nan
- df = df.append({"groupe":group,"id":id,"MotherName":MotherName,"FatherName":FatherName},
- ignore_index=True)
- import numpy as np
- import pandas as pd
- import json
- import time
- def extract_data(data):
- """ convert 1 json dict to records for import"""
- dummy = {}
- jfile = json.loads(data.strip())
- return (
- jfile.get('location', dummy).get('groupe', np.nan),
- jfile.get('id', np.nan),
- jfile.get('Mother', dummy).get('MotherName', np.nan),
- jfile.get('Father', dummy).get('FatherName', np.nan))
- start = time.time()
- df = pd.DataFrame.from_records(map(extract_data, open('file.json')),
- columns=['group', 'id', 'Father', 'Mother'])
- print('New algorithm', time.time()-start)
- #
- # The original way
- #
- start= time.time()
- df=pd.DataFrame(columns=['group', 'id', 'Father', 'Mother'])
- with open ('file.json') as f:
- for chunk in f:
- jfile=json.loads(chunk)
- if 'groupe' in jfile['location']:
- groupe=jfile['location']['groupe']
- else:
- groupe=np.nan
- if 'id' in jfile:
- id=jfile['id']
- else:
- id=np.nan
- if 'MotherName' in jfile['Mother']:
- MotherName=jfile['Mother']['MotherName']
- else:
- MotherName=np.nan
- if 'FatherName' in jfile['Father']:
- FatherName=jfile['Father']['FatherName']
- else:
- FatherName=np.nan
- df = df.append({"groupe":groupe,"id":id,"MotherName":MotherName,"FatherName":FatherName},
- ignore_index=True)
- print('original', time.time()-start)
- with open (path/to/file) as f:
- d = {'group': [], 'id': [], 'Father': [], 'Mother': []}
- for chunk in f:
- jfile = json.loads(chunk)
- d['groupe'].append(jfile['location'].get('groupe', np.nan))
- d['id'].append(jfile.get('id', np.nan))
- d['MotherName'].append(jfile['Mother'].get('MotherName', np.nan))
- d['FatherName'].append(jfile['Father'].get('FatherName', np.nan))
- df = pd.DataFrame(d)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement