Advertisement
Guest User

Untitled

a guest
Jun 24th, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.79 KB | None | 0 0
  1. import os, sys, time
  2.  
  3. #########################################################################################
  4. #########################################################################################
  5.  
  6. #if len(sys.argv) != 3:
  7. # print '''Usage: {0}
  8. # <input>
  9. # <output>\n\n\n'''.format(sys.argv[0])
  10. # sys.exit()
  11.  
  12. finput, foutput = sys.argv[1:]
  13.  
  14. # Section 1: Reading into memory, and possibly time-inefficient
  15. #OUT = open(foutput,'w')
  16. #with open(finput) as IN:
  17. # lines = []
  18. # t1 = time.time()
  19. # for line in IN:
  20. # if "<variable name=" in line:
  21. # lst = line.strip().split()
  22. # lines.append(lst)
  23. #sortedList = sorted(lines)
  24. #old = ''
  25. #for each in sortedList:
  26. # Assign the values after '=' in variable name= to the variable variableContents
  27. # This involves replacing '"', and ')', and adding a ',' instead of '(', and then splitting at ','
  28. # variableContents = each[1].replace('(',',').replace(")",'').replace('name=','').replace('"','').split(',')
  29. # The contents of variableContents are then joined and saved as the variable 'variable'. This involves starting of with the 0th element and joining elements 1 to -1 (2nd last) between parentheses.
  30. # variable = variableContents[0]+'('+','.join(variableContents[1:-1])+')'
  31. # 'old' carries the name of the variable from the previous line. This enables printing each row in the input file into a column in the output file.
  32. # if variable == old:
  33. # OUT.write('\t{0}'.format(float(each[-2].replace('"','').split('=')[1])))
  34. # else:
  35. # OUT.write('\n{0}\t{1}'.format(variable, float(each[-2].replace('"','').split('=')[1])))
  36. # print each
  37. # old = variable
  38. #t2 = time.time()
  39. #print 'This process took ', (t2-t1)/60.0 , 'mins'
  40.  
  41. # Section 2: Attempt to make it time-efficient
  42. def delete_key(d, k):
  43. r = dict(d)
  44. del r[k]
  45. return r
  46.  
  47. OUT = open(foutput,'w')
  48. with open(finput) as IN:
  49. recordedVariables = []
  50. lines = []
  51. old = ''
  52. remember = {}
  53. for line in IN:
  54. if "<variable name=" in line:
  55. # Split at spaces
  56. lst = line.strip().split()
  57. # Assign the values after '=' in variable name= to the variable variableContents
  58. # This involves replacing '"', and ')', and adding a ',' instead of '(', and then splitting at ','
  59. variableContents = lst[1].replace('(',',').replace(")",'').replace('name=','').replace('"','').split(',')
  60. # The contents of variableContents are then joined and saved as the variable 'variable'. This involves starting of with the 0th element and joining elements 1 to -1 (2nd last) between parentheses.
  61. variable = variableContents[0]+'\t'+'\t'.join(variableContents[1:-1])
  62. if variable not in recordedVariables:
  63. if variable == old:
  64. lines.append(lst)
  65. else:
  66. if len(lines) == 21:
  67. sortedList = sorted(lines)
  68. OUT.write('{0}'.format(old))
  69. for each in sortedList:
  70. OUT.write('\t{0}'.format(float(each[-2].replace('"','').split('=')[1])))
  71. OUT.write('\n')
  72. #t2 = time.time()
  73. #print old, ' took ', (t2-t1)/60.0 , 'mins'
  74. lines = []
  75. lines.append(lst)
  76. recordedVariables.append(old)
  77. old = variable
  78. elif len(lines) == 0:
  79. t1 = time.time()
  80. t0 = t1
  81. lines.append(lst)
  82. old = variable
  83. if variable in remember.keys():
  84. for each in remember[variable]:
  85. lines.append(each)
  86. remember = delete_key(remember, variable)
  87. else:
  88. if variable not in remember.keys():
  89. remember[variable] = []
  90. remember[variable].append(lst)
  91. else:
  92. remember[variable].append(lst)
  93.  
  94. #print (len(remember.keys()))
  95.  
  96. for variable in remember.keys():
  97. values = remember[variable]
  98. sortedList = sorted(values)
  99. OUT.write('{0}'.format(variable))
  100. for each in sortedList:
  101. OUT.write('\t{0}'.format(float(each[-2].replace('"','').split('=')[1])))
  102. OUT.write('\n')
  103. #t2 = time.time()
  104. #print old, ' took ', (t2-t1)/60.0 , 'mins'
  105. t3 = time.time()
  106. print ("Process took ", (t3-t0)/60.0, ' mins')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement