Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from openpyxl import load_workbook
- import os
- dir_name = os.path.relpath(os.path.dirname(__file__))
- file_name = os.path.join(dir_name, 'Data.xlsx')
- unique_items = []
- all_items = []
- def add_to_list(item):
- if item not in unique_items:
- unique_items.append(item)
- def find_duplicates(filename, has_header = False):
- wb = load_workbook(filename = filename, use_iterators = True)
- ws = wb.get_active_sheet() # ws is now an IterableWorksheet
- for row in ws.iter_rows(): # it brings a new method: iter_rows()
- if has_header:
- has_header = False
- continue
- else:
- item = ''
- for cell in row:
- if cell.column == 'A' or cell.column == 'F': #or cell.column == 'J' or cell.column == 'L'
- item += (cell.internal_value + '#')
- add_to_list(item)
- all_items.append(item)
- find_duplicates(file_name, True)
- total_duplicates = 0
- print("Total Items: ", len(all_items))
- print("Total Unique Items: ", len(unique_items))
- for x in unique_items:
- if all_items.count(x) > 1:
- total_duplicates +=1
- print ("Duplicate Item: ", x)
- print("Total Duplicates Found: ", total_duplicates)
Add Comment
Please, Sign In to add comment