Advertisement
mikedopp

DuplicateStuff

Aug 11th, 2017
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. workflow Find-Dupes {
  2.     [cmdletbinding()]
  3.     param(
  4.         [int]$ThrottleLimit = 3
  5.     )
  6.    
  7.     #   Define script Variables
  8.  
  9.     [int]$IndexCount = '0'
  10.     $Path = "F:\TEST\"
  11.     $Log = ($Path + 'dupelog.txt')
  12.     $DirectoryIndex = ($Path + 'TotalIndex.csv')
  13.     $TotalCorrect = ($Path + 'TotalCorrect.csv')
  14.     [int]$DupeCount = '0'
  15.     [int]$FileCount = '0'
  16.        
  17.     Function Index-Folders
  18.     {
  19.         #   Test to see if $Path is a valid location and if not end the program.
  20.         if (!(Test-Path -PathType Container $Path))
  21.         {
  22.             Write-Error "Invalid path specified."
  23.             Exit
  24.         }
  25.  
  26.         [string](get-date) + ' : Scanning Path .... ' + $Path | Add-Content -Path $Log
  27.         Write-Host "Folder Scan Started" -ForegroundColor Yellow
  28.         $Folders = Get-ChildItem -Directory -Path $Path -Recurse | Select FullName
  29.         Write-Host "Folder Scan Complete" -ForegroundColor Yellow
  30.         [string](get-date) + ' : ' + $Path + ' scanned successfully.' | Add-Content -Path $Log
  31.         return $Folders
  32.        
  33.     }
  34.  
  35.     Function Index-Dupes ($Index)
  36.     {
  37.  
  38.         [bool]$MasterFound = $false
  39.         [bool]$DupeFound = $false
  40.         $FilesList = Import-Csv -Path $Index
  41.         $duplicates = $FilesList.HashVal | Group-Object | ?{ $_.Count -gt 1 }
  42.    
  43.         foreach ($dupe in $duplicates)
  44.         {
  45.             foreach ($file in $FilesList)
  46.             {
  47.                 if (([string]$file.HashVal -eq [string]$dupe.Name) -and (!$MasterFound))
  48.                 {
  49.                     $File.IsDuped = $True
  50.                     $MasterFound = $true
  51.                     $file.IsMaster = $true
  52.                 }
  53.                 elseif (([string]$file.HashVal -eq [string]$dupe.Name) -and ($MasterFound))
  54.                 {
  55.                     $File.IsDuped = $True
  56.                     $DupeFound = $true
  57.                     $File.Action = 'Archive'
  58.                     $WORKFLOW:DupeCount++
  59.                 }
  60.            
  61.             }
  62.        
  63.             $MasterFound = $false
  64.         }
  65.    
  66.         $FilesList | Export-Csv -Path $TotalCorrect -NoTypeInformation -Append -Force
  67.         Remove-Item -Path $FIndex -Force
  68.  
  69.     }
  70.  
  71.     $FoldersIndexed = Index-Folders
  72.     Class Image
  73.     {
  74.         [string]$directory
  75.         [string]$filename
  76.         [bool]$IsDuped
  77.         [bool]$IsArchived
  78.         [string]$ArchivePath
  79.         [string]$HashVal
  80.         [bool]$IsMaster
  81.         [string]$Action
  82.     }
  83.  
  84.     foreach -parallel -throttlelimit $ThrottleLimit ($folder in $FoldersIndexed)
  85.         {
  86.             $files = gci -Path $folder.FullName -Filter *.dex
  87.             $FIndex = ($Path + 'FolderIndex' + $IndexCount + '.csv')
  88.             foreach ($file in $files)
  89.             {
  90.                 $CurrentImage = [Image]::new()
  91.                 $CurrentImage.filename = $file.Name
  92.                 $CurrentImage.directory = $file.Directory
  93.                 $CurrentImage.HashVal = (Get-FileHash -Algorithm MD5 -Path $file.FullName).Hash
  94.                 $LogString = [string](get-date) + ' : ' + [string]($CurrentImage.Directory + '\' + $CurrentImage.filename) + ' has been indexed'
  95.                 Add-Content -Path $Log -Value $LogString
  96.                 $WORKFLOW:FileCount++
  97.                 $CurrentImage | Export-CSV -Path $FolderIndex -NoTypeInformation -Append
  98.             }
  99.        
  100.             if ((get-content $FolderIndex -ErrorAction SilentlyContinue) -ne $NULL)
  101.             {
  102.                 Index-Dupes -Index $FIndex
  103.             }
  104.             $WORKFLOW:IndexCount++
  105.         }
  106.  
  107.     $percentageOfDupes = ($WORKFLOW:DupeCount / $WORKFLOW:FileCount) * 100
  108.  
  109.     Write-Host ("Dupes that can be removed account for " + $percentageOfDupes + '% of the total files.')
  110. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement