Guest User

Untitled

a guest
Jan 21st, 2018
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.82 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import sys
  3. import os
  4. from datetime import datetime
  5. from time import time
  6. import re
  7. t = time() # current time
  8. t=int(t*1000) # milliseconds for math later
  9. f=os.popen('hadoop job -list') # initial job list from jobtracker
  10. task_list = [] # empty list to store jobs for ordering if needed
  11. arg = ""
  12. if len(sys.argv) == 2:
  13. arg = sys.argv[1]
  14. ############################
  15. # format from hadoop job -list expected to look like this...
  16. # job_201106020849_0019 1 1307126449025 cloudera NORMAL NA
  17. ############
  18. for jobline in f:
  19. if 'JobId' in jobline: #get rid of header
  20. continue
  21. if 'currently' in jobline: #get rid of header
  22. continue
  23. words = str.split(jobline) #split on whitespace to mangle my way
  24. words[0] is job_name
  25. run_time = int(t) - int(words[2]) # pull out the start time
  26. run_time = run_time / 1000 # milliseconds transform
  27. newjobline = re.sub(r'\d{13}',str(run_time), jobline) # replace with
  28. elapsed time
  29. newjobline = newjobline.rstrip("\n") #strip extra newline
  30. mtaskcommand = 'hadoop job -list-attempt-ids ' + words[0] + ' map
  31. running' # command string to get running maps
  32. mtasks = os.popen(str(mtaskcommand)) # get running maps
  33. nummaps = len(mtasks.readlines())
  34. rtaskcommand = 'hadoop job -list-attempt-ids ' + words[0] + ' reduce running'
  35. rtasks = os.popen(str(rtaskcommand)) # get running reducers
  36. numreducers = len(rtasks.readlines())
  37. taskline = " MAPPERS:" + str(nummaps) + " REDUCERS:" + str(numreducers)
  38. jobinfo = newjobline + "\n" + taskline
  39. totaltasks = int(nummaps) + int(numreducers)
  40. task_list.append((totaltasks,jobinfo)) # build list of tasks, tuples
  41. with numtasks
  42. if arg != "-task": # regular order
  43. print jobinfo # default order by start time task_id
  44.  
  45. if arg == "-task":
  46. task_list.sort(reverse=True)
  47. for i2 in task_list:
  48. print i2[1]
Add Comment
Please, Sign In to add comment