Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.52 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. import pandas as pd
  4. from datetime import datetime
  5. import time, os
  6.  
  7.  
  8. # Notes:
  9. # SDSC-SP2
  10. # Duration: May 1998 thru April 2000
  11. # TimeZoneString is a standard UNIX string indicating the time zone in which the log was generated
  12. # UnixStartTime: 893466664
  13. # TimeZone: -28800
  14. # TimeZoneString: US/Pacific
  15. # StartTime: Fri Apr 24 18:11:04 PDT 1998
  16. # EndTime: Sat Apr 29 21:08:32 PDT 2000
  17.  
  18. UnixStartTime = 893466664
  19. os.environ['TZ'] = 'US/Pacific'
  20. time.tzset()
  21.  
  22. path1 = "SDSCSP2.txt"
  23. #path1 = "/Users/zecanelha/Desktop/1o Semestre/MEI/Datasets/SDSC-SP2.txt"
  24.  
  25. #path2 = "MEI/Datasets/HPC2N.txt"
  26.  
  27. columns = ["Job Number", "Submit Time","Wait Time","Run Time","Number of Allocated Processors","Average CPU Time Used","Used Memory", \
  28. "Requested Number of Processors","Requested Time","Requested Memory","Status","UserID","Group ID","Executable Number", \
  29. "Queue Number","Partition Number","Preceding Job Number","Think"]
  30.  
  31.  
  32. try:
  33. dataset = pd.read_table(path1, delimiter = ',', header = None, names = columns, index_col = False);
  34. except IOError as e:
  35. print(e)
  36.  
  37.  
  38.  
  39. #days= ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
  40. finish_time = dataset["Submit Time"] + dataset["Wait Time"] + dataset["Run Time"]
  41. logStartTime = dataset.loc[dataset.index[0],'Submit Time']#->primeira data
  42.  
  43. #print(aux[1])#->criar vetor com 2 colunas
  44. date1 = time.strftime('%Y-%m-%d',time.localtime(UnixStartTime + logStartTime))
  45. datef = datetime.strptime(date1, '%Y-%m-%d')#->converter em datetime desde str
  46. weeknumber = datef.isocalendar()[1]#buscar numero da semana
  47. #print(date1)#->so primeira data
  48. #print(UnixStartTime)
  49. #print(logStartTime)
  50. #print(weeknumber)
  51. #------------------------------------------------------------------------------------------------
  52. #print(finish_time)
  53. aux = UnixStartTime+finish_time
  54. lista = []
  55. lista_semanas = []
  56. for i in aux:
  57. lista.append(i)
  58.  
  59. for i in lista:
  60. var = time.strftime('%Y-%m-%d',time.localtime(i))
  61. var2 = datetime.strptime(var, '%Y-%m-%d')
  62. lista_semanas.append(var2.isocalendar()[1])
  63. '''
  64. if (var2.isocalendar()[1] == 1):
  65. cont+=1
  66. '''
  67. #dataset.loc[j,'Finish Week'] = j
  68.  
  69. #print(lista_semanas)
  70. dataset.insert(18, 'Finish Week', lista_semanas)
  71. #print(dataset)
  72.  
  73.  
  74.  
  75.  
  76. '''
  77. print(time.strftime('%Y-%m-%d',time.localtime(UnixStartTime + logStartTime)))
  78. print(datef)
  79. print(datef.isocalendar()[1])
  80. #print(logStartTime)
  81. '''
  82.  
  83.  
  84. '''
  85.  
  86. # Get number of canceled jobs - 5 - and finished jobs - 1 -
  87.  
  88. status = dataset['Status']
  89. plt.hist(status, bins = 'auto', align = 'mid',color = 'orange', rwidth=0.85, label = 'Status of the jobs')
  90. plt.xlabel('Status')
  91. plt.ylabel('Number of jobs')
  92. plt.legend()
  93. plt.show()
  94.  
  95.  
  96. # Get number of jobs per user
  97.  
  98. #Get unique users -> set unordered colection of distinct objects
  99.  
  100. users = set(dataset['UserID'])
  101. jobsPerUser = []
  102.  
  103. for i in users:
  104. df = dataset.loc[dataset['UserID'] == i]
  105. jobsPerUser.append(len(df.index))
  106.  
  107. plt.plot(jobsPerUser)
  108. plt.xlabel('UserID')
  109. plt.ylabel('Number of jobs')
  110. plt.legend()
  111. plt.grid()
  112. plt.show()
  113. '''
  114. # Get number of jobs per weeks
  115. wks = dataset['Finish Week']
  116. plt.hist(wks, bins = 53, align = 'mid',color = 'coral', rwidth=0.85, label = 'Weeks of the jobs')
  117. plt.xlabel('Weeks')
  118. plt.ylabel('Number of jobs')
  119. plt.legend()
  120. plt.show()
  121.  
  122. # Get Number of complete jobs per weeks -> Fazer copia de dataset, tirar as colunas com status = 5
  123. datasetJobCompleted = dataset.copy()
  124. datasetJobCompleted = datasetJobCompleted[datasetJobCompleted['Status'] != 5]
  125. #print(datasetJobCompleted)
  126. wksCompleted = datasetJobCompleted['Finish Week']
  127. plt.hist(wksCompleted, bins = 53, align = 'mid',color = 'coral', rwidth=0.85, label = 'Weeks of the jobs completed')
  128. plt.xlabel('Weeks')
  129. plt.ylabel('Number of jobs completed')
  130. plt.legend()
  131. plt.show()
  132.  
  133. '''
  134. # Number of processors per jobs
  135.  
  136. plt.plot(dataset['Job Number'],dataset['Number of Allocated Processors'],label = "Number of allocated processors per job")
  137. plt.xlabel('Jobs')
  138. plt.ylabel('Allocated processors')
  139. plt.legend()
  140. plt.grid()
  141. plt.show()
  142.  
  143. # Comparisation between requested processors and number of processors allocated
  144.  
  145. plt.subplot(2,1,1)
  146. plt.plot(dataset['Requested Number of Processors'], label = 'Requested Number of processors')
  147. plt.xlabel("Jobs")
  148. plt.ylabel("Requested processors")
  149. plt.grid()
  150.  
  151. plt.subplot(2,1,2)
  152. plt.plot(dataset['Number of Allocated Processors'], label = 'Number of Allocated Processors')
  153. plt.xlabel("Jobs")
  154. plt.ylabel("Allocated proccessors")
  155. plt.grid()
  156.  
  157. plt.show()
  158. '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement