Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import os
- import json
- import datetime
- from elasticsearch import Elasticsearch,helpers
- import sys
- import time
- def read_file(filename):
- cmds = ""
- if filename is not '':
- finput = open(filename)
- lines = [x.replace('\n', '') for x in finput]
- finput.close()
- return lines
- def jsonpretty(text):
- return json.dumps(text, indent=4, sort_keys=True)
- def write_json_file(d, outfilename):
- fo = open(outfilename, "w")
- json.dump(d, fo, indent=4)
- fo.close()
- return
- if __name__ == "__main__":
- actions = []
- es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
- lines = read_file("json/EL99_Sinergi_Bangun_Negeri.jsonl")
- for line in lines:
- doc = json.loads(line)
- doc['ts'] = time.strftime('%Y-%m-%dT%H:%M:%S.000Z', time.gmtime(doc['date']))
- doc['ts_hour'] = time.strftime('%H', time.gmtime(doc['date']))
- doc['ts_day'] = time.strftime('%w', time.gmtime(doc['date']))
- if not 'media' in doc:
- if 'text' in doc:
- doc['media'] = {}
- doc['media']['type'] = 'text'
- if 'text' in doc:
- doc['text_len'] = len(doc['text'])
- if 'print_name' not in doc['from']:
- doc['from']['print_name'] = doc['from']['first_name']+'_'+doc['from']['last_name']
- if 'phone' not in doc['from']:
- doc['from']['phone'] = "0000000000"
- action = {
- "_index": "el99",
- "_type": "telegram",
- "_id": str(doc['date'])+"_"+doc['from']['id'],
- "_source": doc
- }
- actions.append(action)
- #print jsonpretty(action)
- write_json_file(actions, "dump.json")
- es.indices.delete(index='el99', ignore=[400, 404])
- es.indices.create(index='el99')
- res = helpers.bulk(es, actions)
- print res
Add Comment
Please, Sign In to add comment