Guest User

Untitled

a guest
Feb 21st, 2018
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.82 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import os
  4. import json
  5. import datetime
  6. from elasticsearch import Elasticsearch,helpers
  7. import sys
  8. import time
  9.  
  10.  
  11. def read_file(filename):
  12. cmds = ""
  13. if filename is not '':
  14. finput = open(filename)
  15. lines = [x.replace('\n', '') for x in finput]
  16. finput.close()
  17. return lines
  18.  
  19.  
  20. def jsonpretty(text):
  21. return json.dumps(text, indent=4, sort_keys=True)
  22.  
  23.  
  24. def write_json_file(d, outfilename):
  25. fo = open(outfilename, "w")
  26. json.dump(d, fo, indent=4)
  27. fo.close()
  28. return
  29.  
  30.  
  31. if __name__ == "__main__":
  32.  
  33. actions = []
  34. es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
  35.  
  36. lines = read_file("json/EL99_Sinergi_Bangun_Negeri.jsonl")
  37. for line in lines:
  38. doc = json.loads(line)
  39. doc['ts'] = time.strftime('%Y-%m-%dT%H:%M:%S.000Z', time.gmtime(doc['date']))
  40. doc['ts_hour'] = time.strftime('%H', time.gmtime(doc['date']))
  41. doc['ts_day'] = time.strftime('%w', time.gmtime(doc['date']))
  42.  
  43. if not 'media' in doc:
  44. if 'text' in doc:
  45. doc['media'] = {}
  46. doc['media']['type'] = 'text'
  47.  
  48. if 'text' in doc:
  49. doc['text_len'] = len(doc['text'])
  50.  
  51. if 'print_name' not in doc['from']:
  52. doc['from']['print_name'] = doc['from']['first_name']+'_'+doc['from']['last_name']
  53.  
  54. if 'phone' not in doc['from']:
  55. doc['from']['phone'] = "0000000000"
  56.  
  57.  
  58. action = {
  59. "_index": "el99",
  60. "_type": "telegram",
  61. "_id": str(doc['date'])+"_"+doc['from']['id'],
  62. "_source": doc
  63. }
  64. actions.append(action)
  65. #print jsonpretty(action)
  66.  
  67.  
  68. write_json_file(actions, "dump.json")
  69. es.indices.delete(index='el99', ignore=[400, 404])
  70. es.indices.create(index='el99')
  71. res = helpers.bulk(es, actions)
  72. print res
Add Comment
Please, Sign In to add comment