Untitled

from openpyxl import load_workbook
import os


dir_name = os.path.relpath(os.path.dirname(__file__))
file_name = os.path.join(dir_name, 'Data.xlsx')

unique_items = []
all_items = []

def add_to_list(item):
    if item not in unique_items:
        unique_items.append(item)

def find_duplicates(filename, has_header = False):
	wb = load_workbook(filename = filename, use_iterators = True)
	ws = wb.get_active_sheet() # ws is now an IterableWorksheet

	for row in ws.iter_rows(): # it brings a new method: iter_rows()
		if has_header:
		    has_header = False
		    continue
		else:
			item = ''
			for cell in row:
				if cell.column == 'A' or cell.column == 'F': #or cell.column == 'J' or cell.column == 'L'
				    item += (cell.internal_value + '#')
			add_to_list(item)
			all_items.append(item)


find_duplicates(file_name, True)

total_duplicates = 0

print("Total Items: ", len(all_items))
print("Total Unique Items: ", len(unique_items))

for x in unique_items:
	if all_items.count(x) > 1:
		total_duplicates +=1
		print ("Duplicate Item: ", x)

print("Total Duplicates Found: ", total_duplicates)