Guest User

Untitled

a guest
Jun 13th, 2018
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.09 KB | None | 0 0
  1. from openpyxl import load_workbook
  2. import os
  3.  
  4.  
  5. dir_name = os.path.relpath(os.path.dirname(__file__))
  6. file_name = os.path.join(dir_name, 'Data.xlsx')
  7.  
  8. unique_items = []
  9. all_items = []
  10.  
  11. def add_to_list(item):
  12. if item not in unique_items:
  13. unique_items.append(item)
  14.  
  15. def find_duplicates(filename, has_header = False):
  16. wb = load_workbook(filename = filename, use_iterators = True)
  17. ws = wb.get_active_sheet() # ws is now an IterableWorksheet
  18.  
  19. for row in ws.iter_rows(): # it brings a new method: iter_rows()
  20. if has_header:
  21. has_header = False
  22. continue
  23. else:
  24. item = ''
  25. for cell in row:
  26. if cell.column == 'A' or cell.column == 'F': #or cell.column == 'J' or cell.column == 'L'
  27. item += (cell.internal_value + '#')
  28. add_to_list(item)
  29. all_items.append(item)
  30.  
  31.  
  32. find_duplicates(file_name, True)
  33.  
  34. total_duplicates = 0
  35.  
  36. print("Total Items: ", len(all_items))
  37. print("Total Unique Items: ", len(unique_items))
  38.  
  39. for x in unique_items:
  40. if all_items.count(x) > 1:
  41. total_duplicates +=1
  42. print ("Duplicate Item: ", x)
  43.  
  44. print("Total Duplicates Found: ", total_duplicates)
Add Comment
Please, Sign In to add comment