Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import sys
- import os
- from datetime import datetime
- from time import time
- import re
- t = time() # current time
- t=int(t*1000) # milliseconds for math later
- f=os.popen('hadoop job -list') # initial job list from jobtracker
- task_list = [] # empty list to store jobs for ordering if needed
- arg = ""
- if len(sys.argv) == 2:
- arg = sys.argv[1]
- ############################
- # format from hadoop job -list expected to look like this...
- # job_201106020849_0019 1 1307126449025 cloudera NORMAL NA
- ############
- for jobline in f:
- if 'JobId' in jobline: #get rid of header
- continue
- if 'currently' in jobline: #get rid of header
- continue
- words = str.split(jobline) #split on whitespace to mangle my way
- words[0] is job_name
- run_time = int(t) - int(words[2]) # pull out the start time
- run_time = run_time / 1000 # milliseconds transform
- newjobline = re.sub(r'\d{13}',str(run_time), jobline) # replace with
- elapsed time
- newjobline = newjobline.rstrip("\n") #strip extra newline
- mtaskcommand = 'hadoop job -list-attempt-ids ' + words[0] + ' map
- running' # command string to get running maps
- mtasks = os.popen(str(mtaskcommand)) # get running maps
- nummaps = len(mtasks.readlines())
- rtaskcommand = 'hadoop job -list-attempt-ids ' + words[0] + ' reduce running'
- rtasks = os.popen(str(rtaskcommand)) # get running reducers
- numreducers = len(rtasks.readlines())
- taskline = " MAPPERS:" + str(nummaps) + " REDUCERS:" + str(numreducers)
- jobinfo = newjobline + "\n" + taskline
- totaltasks = int(nummaps) + int(numreducers)
- task_list.append((totaltasks,jobinfo)) # build list of tasks, tuples
- with numtasks
- if arg != "-task": # regular order
- print jobinfo # default order by start time task_id
- if arg == "-task":
- task_list.sort(reverse=True)
- for i2 in task_list:
- print i2[1]
Add Comment
Please, Sign In to add comment