Advertisement
SvOzMaS

etl.py

Apr 21st, 2018
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.10 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import sys
  4. import json
  5. import os
  6. def main():
  7.  try:
  8.     if len(sys.argv) >= 3:
  9.         dirin = sys.argv[1]
  10.         dirout = sys.argv[2]
  11.     else:
  12.         print 'Missing: required dir for input and dir for output'
  13.         return 1 # indicates errorlessly exit, but not necessary
  14.     count = 1
  15.  
  16.     for file in os.listdir(dirin):
  17.         if file.endswith(".json"):
  18.             with open(os.path.join(dirin, file), 'r') as f:
  19.                 data = json.load(f)
  20.             data_new = {}
  21.             data_new['id'] = str(count)
  22.             data_new['published'] = data['published']
  23.             data_new['text'] = data['text']
  24.             data_new['title'] = data['title']
  25.             data_new['author'] = data['author']
  26.             data_new['url'] = data['url']
  27.             with open(os.path.join(dirout, file), 'w') as f:
  28.                 json.dump(data_new, f)
  29.             count = count+1
  30.  
  31.  except Exception as ex:
  32.      print str(ex)
  33.      return 1 # indicates error, but not necessary
  34.  else:
  35.     return 0 # indicates errorlessly exit, but not necessary
  36. # this is the standard boilerplate that calls the main() function
  37. if __name__ == '__main__':
  38.     # sys.exit(main(sys.argv)) # used to give a better look to exists
  39.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement