Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- workflow Find-Dupes {
- [cmdletbinding()]
- param(
- [int]$ThrottleLimit = 3
- )
- # Define script Variables
- [int]$IndexCount = '0'
- $Path = "F:\TEST\"
- $Log = ($Path + 'dupelog.txt')
- $DirectoryIndex = ($Path + 'TotalIndex.csv')
- $TotalCorrect = ($Path + 'TotalCorrect.csv')
- [int]$DupeCount = '0'
- [int]$FileCount = '0'
- Function Index-Folders
- {
- # Test to see if $Path is a valid location and if not end the program.
- if (!(Test-Path -PathType Container $Path))
- {
- Write-Error "Invalid path specified."
- Exit
- }
- [string](get-date) + ' : Scanning Path .... ' + $Path | Add-Content -Path $Log
- Write-Host "Folder Scan Started" -ForegroundColor Yellow
- $Folders = Get-ChildItem -Directory -Path $Path -Recurse | Select FullName
- Write-Host "Folder Scan Complete" -ForegroundColor Yellow
- [string](get-date) + ' : ' + $Path + ' scanned successfully.' | Add-Content -Path $Log
- return $Folders
- }
- Function Index-Dupes ($Index)
- {
- [bool]$MasterFound = $false
- [bool]$DupeFound = $false
- $FilesList = Import-Csv -Path $Index
- $duplicates = $FilesList.HashVal | Group-Object | ?{ $_.Count -gt 1 }
- foreach ($dupe in $duplicates)
- {
- foreach ($file in $FilesList)
- {
- if (([string]$file.HashVal -eq [string]$dupe.Name) -and (!$MasterFound))
- {
- $File.IsDuped = $True
- $MasterFound = $true
- $file.IsMaster = $true
- }
- elseif (([string]$file.HashVal -eq [string]$dupe.Name) -and ($MasterFound))
- {
- $File.IsDuped = $True
- $DupeFound = $true
- $File.Action = 'Archive'
- $WORKFLOW:DupeCount++
- }
- }
- $MasterFound = $false
- }
- $FilesList | Export-Csv -Path $TotalCorrect -NoTypeInformation -Append -Force
- Remove-Item -Path $FIndex -Force
- }
- $FoldersIndexed = Index-Folders
- Class Image
- {
- [string]$directory
- [string]$filename
- [bool]$IsDuped
- [bool]$IsArchived
- [string]$ArchivePath
- [string]$HashVal
- [bool]$IsMaster
- [string]$Action
- }
- foreach -parallel -throttlelimit $ThrottleLimit ($folder in $FoldersIndexed)
- {
- $files = gci -Path $folder.FullName -Filter *.dex
- $FIndex = ($Path + 'FolderIndex' + $IndexCount + '.csv')
- foreach ($file in $files)
- {
- $CurrentImage = [Image]::new()
- $CurrentImage.filename = $file.Name
- $CurrentImage.directory = $file.Directory
- $CurrentImage.HashVal = (Get-FileHash -Algorithm MD5 -Path $file.FullName).Hash
- $LogString = [string](get-date) + ' : ' + [string]($CurrentImage.Directory + '\' + $CurrentImage.filename) + ' has been indexed'
- Add-Content -Path $Log -Value $LogString
- $WORKFLOW:FileCount++
- $CurrentImage | Export-CSV -Path $FolderIndex -NoTypeInformation -Append
- }
- if ((get-content $FolderIndex -ErrorAction SilentlyContinue) -ne $NULL)
- {
- Index-Dupes -Index $FIndex
- }
- $WORKFLOW:IndexCount++
- }
- $percentageOfDupes = ($WORKFLOW:DupeCount / $WORKFLOW:FileCount) * 100
- Write-Host ("Dupes that can be removed account for " + $percentageOfDupes + '% of the total files.')
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement