Advertisement
Guest User

extracting from multiple excel documents

a guest
Jan 15th, 2020
3,151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.51 KB | None | 0 0
  1. # importing libraries
  2. import openpyxl
  3. import os
  4. import matplotlib.pyplot as plt
  5.  
  6. # File path
  7. path =  FILE_PATH
  8.  
  9.  
  10. # cells containing desired data in the workbooks
  11. cells = [
  12.     'G17', 'G19', 'G21',
  13.     'K17', 'K19', 'K21',
  14.     'O17', 'O19', 'O21',
  15.     'S17', 'S19', 'S21',
  16.     'W17', 'W19', 'W21',
  17.     ]
  18.  
  19. # lists for plotting etc.
  20. dates = []
  21. weights = []
  22.  
  23. os.chdir(path)
  24.  
  25. # iterate through files.
  26. for file in os.listdir(path):
  27.    
  28.     # avoid old file format because only interested in newer files
  29.     if not (file[-3:])  == "xls":
  30.        
  31.         # read workbook and sheet
  32.         workbook = openpyxl.load_workbook(file, data_only=True)    
  33.         test_sheet = workbook['Tests']
  34.        
  35.         # iterate over predefined cells from list
  36.         for cell in cells:
  37.            
  38.             # get date of result. The date is in row 10 of same column as result
  39.             cast_cell = test_sheet[str(cell[0] + "10")].value
  40.            
  41.             # and get the actual result
  42.             cell_val = test_sheet[cell].value
  43.            
  44.             # if result is not None and year is recent add to result lists
  45.             # cast_cell is a datetime object after reading the cell value to it
  46.             # so we can use .year directly from that object
  47.             if not cell_val == None and cast_cell.year > 2000:
  48.                
  49.                 # appending to the lists
  50.                 dates.append(cast_cell)
  51.                 weights.append(cell_val)
  52.    
  53.      
  54. # print basic stats
  55. print(len(weights), "results found. Max:", max(weights), ", Min:", min(weights))
  56. print("Average:", round(sum(weights) / len(weights), 2))
  57.  
  58. # data specific for the intercalibration
  59. # Original data replaced with random data as I do not have permission to share it
  60.  
  61. our_val = [564.7, 572.7, 567.3, 561.3, 565.7, 564.1, 582.3, 574.3, 576.9, 565.2, 576.9]
  62.  
  63. years = [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2019, 2019, 2019]
  64.  
  65. avg_vals = [580.2, 578.3, 584.6, 587.1, 581.9, 578.7, 586.0, 580.5, 580.5, 580.5, 580.5]
  66.  
  67.  
  68. # plot data
  69. plt.figure(figsize=(25,12))
  70. plt.suptitle("Weight histogram")
  71.  
  72.  
  73. # histogram
  74. plt.subplot(131)
  75. plt.title("Histogram, weights")
  76. plt.hist(weights, bins=20)
  77.  
  78.  
  79. # weights over time
  80. plt.subplot(132)
  81. plt.title("Weights over time")
  82. plt.scatter(dates, weights)
  83.  
  84.  
  85. # plotting us vs the intercalibration
  86. plt.subplot(133)
  87. plt.title("Us (Red) vs. Intercalibration (Blue)")
  88. plt.scatter(years, our_val, c='red')
  89. plt.scatter(years, avg_vals, c='blue')
  90. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement