Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- _Lines = []
- def addline1(line):
- '''Read the first few lines of json data'''
- global addline
- if '"ActivityId"' in line:
- # Discard prior stuff. Start chunk from previous line.
- addline = addline2
- _Lines[:] = _Lines[-1:]
- _Lines.append(line)
- def addline2(line, lines_append=_Lines.append):
- '''Add line after first (header) lines'''
- if '"ActivityId"' in line:
- chunk = _Lines[:-1]
- do_chunk(chunk)
- _Lines[:] = _Lines[-1:]
- lines_append(line)
- _Chunks = []
- def do_chunk(chunk):
- '''Process a chunk of lines.'''
- cdata = ''.join(chunk).strip()
- if cdata[-1] == ',':
- cdata = cdata[:-1]
- jdata = json.loads(cdata)
- _Chunks.append(jdata)
- def read_info_chunks(path):
- addline = addline1
- with open(path, 'r') as f:
- for line in f:
- addline(line)
- # Process the final chunk
- _Lines[:] = _Lines[:-6]
- do_chunk(_Lines)
- return _Chunks
- def read_info_block(path):
- with open(path, 'r') as f:
- jdata = json.load(f)
- chunks = jdata['RESPONSE']['RESULT'][0]['TrainAnnouncement']
- import timeit
- print("Chunked parsing")
- print(timeit.repeat('read_info_chunks("test.json")', 'from __main__ import read_info_chunks',
- number=1000))
- print("Continuous parsing")
- print(timeit.repeat('read_info_block("test.json")', 'from __main__ import read_info_block',
- number=1000))
- """
- RESULTS:
- $ python test.py
- Chunked parsing
- [0.6402807509875856, 0.6375155359855853, 0.6337668350315653]
- Continuous parsing
- [0.45900151698151603, 0.45118441100930795, 0.44374530599452555]
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement