SHARE
TWEET

Untitled

a guest Jun 24th, 2019 49 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import os, sys, time
  2.  
  3. #########################################################################################
  4. #########################################################################################
  5.  
  6. #if len(sys.argv) != 3:
  7. #   print '''Usage: {0}
  8. #   <input>
  9. #   <output>\n\n\n'''.format(sys.argv[0])
  10. #   sys.exit()
  11.  
  12. finput, foutput = sys.argv[1:]
  13.  
  14. # Section 1: Reading into memory, and possibly time-inefficient
  15. #OUT = open(foutput,'w')
  16. #with open(finput) as IN:
  17. #   lines = []
  18. #   t1 = time.time()
  19. #   for line in IN:
  20. #       if "<variable name=" in line:
  21. #           lst = line.strip().split()
  22. #           lines.append(lst)
  23. #sortedList = sorted(lines)
  24. #old = ''
  25. #for each in sortedList:
  26.     # Assign the values after '=' in variable name= to the variable variableContents
  27.     # This involves replacing '"', and ')', and adding a ',' instead of '(', and then splitting at ','
  28. #   variableContents = each[1].replace('(',',').replace(")",'').replace('name=','').replace('"','').split(',')
  29.     # The contents of variableContents are then joined and saved as the variable 'variable'. This involves starting of with the 0th element and joining elements 1 to -1 (2nd last) between parentheses.
  30. #   variable = variableContents[0]+'('+','.join(variableContents[1:-1])+')'
  31.     # 'old' carries the name of the variable from the previous line. This enables printing each row in the input file into a column in the output file.
  32. #   if variable == old:
  33. #       OUT.write('\t{0}'.format(float(each[-2].replace('"','').split('=')[1])))
  34. #   else:
  35. #       OUT.write('\n{0}\t{1}'.format(variable, float(each[-2].replace('"','').split('=')[1])))
  36. #       print each
  37. #       old = variable
  38. #t2 = time.time()
  39. #print 'This process took ', (t2-t1)/60.0 , 'mins'
  40.  
  41. # Section 2: Attempt to make it time-efficient
  42. def delete_key(d, k):
  43.     r = dict(d)
  44.     del r[k]
  45.     return r
  46.  
  47. OUT = open(foutput,'w')
  48. with open(finput) as IN:
  49.     recordedVariables = []
  50.     lines = []
  51.     old = ''
  52.     remember = {}
  53.     for line in IN:
  54.         if "<variable name=" in line:
  55.             # Split at spaces
  56.             lst = line.strip().split()
  57.             # Assign the values after '=' in variable name= to the variable variableContents
  58.             # This involves replacing '"', and ')', and adding a ',' instead of '(', and then splitting at ','
  59.             variableContents = lst[1].replace('(',',').replace(")",'').replace('name=','').replace('"','').split(',')
  60.             # The contents of variableContents are then joined and saved as the variable 'variable'. This involves starting of with the 0th element and joining elements 1 to -1 (2nd last) between parentheses.
  61.             variable = variableContents[0]+'\t'+'\t'.join(variableContents[1:-1])
  62.             if variable not in recordedVariables:
  63.                 if variable == old:
  64.                     lines.append(lst)
  65.                 else:
  66.                     if len(lines) == 21:
  67.                         sortedList = sorted(lines)
  68.                         OUT.write('{0}'.format(old))
  69.                         for each in sortedList:
  70.                             OUT.write('\t{0}'.format(float(each[-2].replace('"','').split('=')[1])))
  71.                         OUT.write('\n')
  72.                         #t2 = time.time()
  73.                         #print old, ' took ', (t2-t1)/60.0 , 'mins'
  74.                         lines = []
  75.                         lines.append(lst)
  76.                         recordedVariables.append(old)
  77.                         old = variable
  78.                     elif len(lines) == 0:
  79.                         t1 = time.time()
  80.                         t0 = t1
  81.                         lines.append(lst)
  82.                         old = variable
  83.                         if variable in remember.keys():
  84.                             for each in remember[variable]:
  85.                                 lines.append(each)
  86.                             remember = delete_key(remember, variable)
  87.                     else:
  88.                         if variable not in remember.keys():
  89.                             remember[variable] = []
  90.                             remember[variable].append(lst)
  91.                         else:
  92.                             remember[variable].append(lst)
  93.  
  94. #print (len(remember.keys()))
  95.  
  96. for variable in remember.keys():
  97.     values = remember[variable]
  98.     sortedList = sorted(values)
  99.     OUT.write('{0}'.format(variable))
  100.     for each in sortedList:
  101.         OUT.write('\t{0}'.format(float(each[-2].replace('"','').split('=')[1])))
  102.     OUT.write('\n')
  103.     #t2 = time.time()
  104.     #print old, ' took ', (t2-t1)/60.0 , 'mins'
  105. t3 = time.time()
  106. print ("Process took ", (t3-t0)/60.0, ' mins')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top