Advertisement
SvOzMaS

etl

Apr 20th, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.02 KB | None | 0 0
  1. import sys
  2. import json
  3. import os
  4. def main():
  5.  try:
  6.  if len(sys.argv) >= 3:
  7.  dirin = sys.argv[1]
  8.  dirout = sys.argv[2]
  9.  else:
  10.  print 'Missing: required dir for input and dir for output'
  11.  return 1 # indicates errorlessly exit, but not necessary
  12.  count = 1
  13. for file in os.listdir(dirin):
  14.  if file.endswith(".json"):
  15. with open(os.path.join(dirin, file), 'r') as f:
  16.  data = json.load(f)
  17.  data_new = {}
  18. data_new['id'] = str(count)
  19. data_new['published'] = data['published']
  20. data_new['text'] = data['text']
  21. data_new['title'] = data['title']
  22. data_new['author'] = data['author']
  23.  data_new['url'] = data['url']
  24. with open(os.path.join(dirout, file), 'w') as f:
  25.  json.dump(data_new, f)
  26.  count = count+1
  27.  except Exception as ex:
  28.  print ex.message
  29.  return 1 # indicates error, but not necessary
  30.  else:
  31.  return 0 # indicates errorlessly exit, but not necessary
  32. # this is the standard boilerplate that calls the main() function
  33. if __name__ == '__main__':
  34.  # sys.exit(main(sys.argv)) # used to give a better look to exists
  35.  main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement