Advertisement
Guest User

Untitled

a guest
Jun 17th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.28 KB | None | 0 0
  1. {"location":{"town":"Rome","groupe":"Advanced",
  2. "school":{"SchoolGroupe":"TrowMet", "SchoolName":"VeronM"}},
  3. "id":"145",
  4. "Mother":{"MotherName":"Helen","MotherAge":"46"},"NGlobalNote":2,
  5. "Father":{"FatherName":"Peter","FatherAge":"51"},
  6. "Teacher":["MrCrock","MrDaniel"],"Field":"Marketing",
  7. "season":["summer","spring"]}
  8.  
  9. Groupe Id MotherName FatherName
  10. Advanced 56 Laure James
  11. Middle 11 Ann Nicolas
  12. Advanced 6 Helen Franc
  13.  
  14. df = pd.DataFrame(columns=['group', 'id', 'Father', 'Mother'])
  15. with open (path/to/file) as f:
  16. for chunk in f:
  17. jfile = json.loads(chunk)
  18. if 'groupe' in jfile['location']:
  19. groupe = jfile['location']['groupe']
  20. else:
  21. groupe=np.nan
  22. if 'id' in jfile:
  23. id = jfile['id']
  24. else:
  25. id = np.nan
  26. if 'MotherName' in jfile['Mother']:
  27. MotherName = jfile['Mother']['MotherName']
  28. else:
  29. MotherName = np.nan
  30. if 'FatherName' in jfile['Father']:
  31. FatherName = jfile['Father']['FatherName']
  32. else:
  33. FatherName = np.nan
  34. df = df.append({"groupe":group,"id":id,"MotherName":MotherName,"FatherName":FatherName},
  35. ignore_index=True)
  36.  
  37. import numpy as np
  38. import pandas as pd
  39. import json
  40. import time
  41.  
  42. def extract_data(data):
  43. """ convert 1 json dict to records for import"""
  44. dummy = {}
  45. jfile = json.loads(data.strip())
  46. return (
  47. jfile.get('location', dummy).get('groupe', np.nan),
  48. jfile.get('id', np.nan),
  49. jfile.get('Mother', dummy).get('MotherName', np.nan),
  50. jfile.get('Father', dummy).get('FatherName', np.nan))
  51.  
  52. start = time.time()
  53. df = pd.DataFrame.from_records(map(extract_data, open('file.json')),
  54. columns=['group', 'id', 'Father', 'Mother'])
  55. print('New algorithm', time.time()-start)
  56.  
  57. #
  58. # The original way
  59. #
  60.  
  61. start= time.time()
  62. df=pd.DataFrame(columns=['group', 'id', 'Father', 'Mother'])
  63. with open ('file.json') as f:
  64. for chunk in f:
  65. jfile=json.loads(chunk)
  66. if 'groupe' in jfile['location']:
  67. groupe=jfile['location']['groupe']
  68. else:
  69. groupe=np.nan
  70. if 'id' in jfile:
  71. id=jfile['id']
  72. else:
  73. id=np.nan
  74. if 'MotherName' in jfile['Mother']:
  75. MotherName=jfile['Mother']['MotherName']
  76. else:
  77. MotherName=np.nan
  78. if 'FatherName' in jfile['Father']:
  79. FatherName=jfile['Father']['FatherName']
  80. else:
  81. FatherName=np.nan
  82. df = df.append({"groupe":groupe,"id":id,"MotherName":MotherName,"FatherName":FatherName},
  83. ignore_index=True)
  84. print('original', time.time()-start)
  85.  
  86. with open (path/to/file) as f:
  87. d = {'group': [], 'id': [], 'Father': [], 'Mother': []}
  88. for chunk in f:
  89. jfile = json.loads(chunk)
  90. d['groupe'].append(jfile['location'].get('groupe', np.nan))
  91. d['id'].append(jfile.get('id', np.nan))
  92. d['MotherName'].append(jfile['Mother'].get('MotherName', np.nan))
  93. d['FatherName'].append(jfile['Father'].get('FatherName', np.nan))
  94.  
  95. df = pd.DataFrame(d)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement