daily pastebin goal
84%
SHARE
TWEET

Untitled

a guest Jun 13th, 2018 50 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from openpyxl import load_workbook
  2. import os
  3.  
  4.  
  5. dir_name = os.path.relpath(os.path.dirname(__file__))
  6. file_name = os.path.join(dir_name, 'Data.xlsx')
  7.  
  8. unique_items = []
  9. all_items = []
  10.  
  11. def add_to_list(item):
  12.     if item not in unique_items:
  13.         unique_items.append(item)
  14.  
  15. def find_duplicates(filename, has_header = False):
  16.     wb = load_workbook(filename = filename, use_iterators = True)
  17.     ws = wb.get_active_sheet() # ws is now an IterableWorksheet
  18.  
  19.     for row in ws.iter_rows(): # it brings a new method: iter_rows()
  20.         if has_header:
  21.             has_header = False
  22.             continue
  23.         else:
  24.             item = ''
  25.             for cell in row:               
  26.                 if cell.column == 'A' or cell.column == 'F': #or cell.column == 'J' or cell.column == 'L'
  27.                     item += (cell.internal_value + '#')
  28.             add_to_list(item)      
  29.             all_items.append(item) 
  30.  
  31.  
  32. find_duplicates(file_name, True)
  33.  
  34. total_duplicates = 0
  35.  
  36. print("Total Items: ", len(all_items))
  37. print("Total Unique Items: ", len(unique_items))
  38.  
  39. for x in unique_items:
  40.     if all_items.count(x) > 1:
  41.         total_duplicates +=1   
  42.         print ("Duplicate Item: ", x)  
  43.  
  44. print("Total Duplicates Found: ", total_duplicates)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top