Guest User

Untitled

a guest
Feb 14th, 2018
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.89 KB | None | 0 0
  1. from nltk.tokenize import word_tokenize
  2. import pickle
  3. import pprint
  4. import json
  5.  
  6. """
  7.  
  8. (heads, descs, keywords) = ([headline], [description], )
  9.  
  10. """
  11.  
  12. DATA_FILE = 'data/signalmedia-1m.jsonl.test'
  13. PICKLE_FILE = 'data/tokens.pkl'
  14.  
  15.  
  16. def write_to_pickle(filename, data):
  17. with open(filename, 'wb') as f:
  18. pickle.dump(data, f)
  19.  
  20.  
  21. def read_from_pickle(filename):
  22. with open(filename, 'r') as f:
  23. return pickle.load(f)
  24.  
  25.  
  26. def get_json_data(filename):
  27. heads = []
  28. descs = []
  29. HEADER = "title"
  30. DESCRIPTION = "content"
  31.  
  32. with open(filename, 'r') as json_data:
  33. for json_object in json_data:
  34. heads.append(json.loads(json_object)[HEADER])
  35. descs.append(json.loads(json_object)[DESCRIPTION])
  36. return heads, descs, None
  37.  
  38.  
  39. data = get_json_data(DATA_FILE)
  40. write_to_pickle(PICKLE_FILE, data)
  41. data = read_from_pickle(PICKLE_FILE)
  42. pprint.pprint(data)
Add Comment
Please, Sign In to add comment