DragonHawk

Compare-File.ps1

May 12th, 2021
486
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <#
  2. .SYNOPSIS
  3.  
  4. Report differences in the contents of two files, or two trees of files.
  5.  
  6. .DESCRIPTION
  7.  
  8. Compares the two main parameters (arbitrarily designated "Left" and
  9. "Right) and reports differences in content. If the two parameters are
  10. both files, the content of those two files is compared. If the two
  11. parameters are both folders (directories), each item in the two
  12. compared: Files as described, folders recursively.
  13.  
  14. Each difference found is reported to standard output. Left/Right/Both is
  15. given, followed by the type of difference. If walking a set of
  16. folders, the path of each item is given, relative to the root of the
  17. both directories.
  18.  
  19. .PARAMETER Left
  20.  
  21. Path name of a file or folder to compare.  Arbitrarily designated "Left".
  22.  
  23. .PARAMETER Right
  24.  
  25. Path name of a file or folder to compare.  Arbitrarily designated "Right".
  26.  
  27. .PARAMETER Same
  28.  
  29. Also report items where the content is the same.
  30.  
  31. .PARAMETER ShowDebug
  32.  
  33. Shortcut to setting DebugPreference=Continue for this script run.
  34. Debugging information is always written to the Debug output/object
  35. stream. This switch will reveal that, without the constant prompting
  36. that a full -Debug entails.
  37.  
  38. .NOTES
  39.  
  40. While the ideal goal would be to be able to compare anything that
  41. supported Get-Content, performance limitations prevent the use of that
  42. API. (It is several orders of magnitude slower, and very memory
  43. inefficient.) Currently the .NET API is used instead. This limits the
  44. operation to regular files only.
  45.  
  46. File metadata (dates, ownership, etc) is ignored. This is by design;
  47. files with different metadata can still have the same contents.
  48.  
  49. Alternate Data Streams (forks) are ignored.  This may change some day.
  50.  
  51. Content is compared even if file sizes differ. This way we know if files
  52. are the same up to that point, and one is simply truncated/shorter, or
  53. if the files have differing content as well.
  54.  
  55. .EXAMPLE
  56.  
  57. Compare-File C:\FOO C:\BAR
  58.  
  59. Compares FOO and BAR, and reports differences in content.
  60.  
  61. .INPUTS
  62.  
  63. None.  You cannot pipe objects to this script.
  64.  
  65. .OUTPUTS
  66.  
  67. Differences are reported as strings to the output stream, one line per
  68. difference (typically, one line per file). Only the relative path (from
  69. the root of the two compares) is reported. Empty output means no
  70. differences were found.
  71.  
  72. .LINK
  73. Compare-Object
  74.  
  75. #>
  76.  
  77. ########################################################################
  78. # parameters
  79.  
  80. [CmdletBinding()]
  81. Param(
  82.  
  83. [Parameter(
  84.     Mandatory=$true,
  85.     Position=0,
  86.     HelpMessage="Path name of the first file or folder to compare. "
  87.     )]
  88. [string]$Left,
  89.  
  90. [Parameter(
  91.     Mandatory=$True,
  92.     Position=1,
  93.     HelpMessage="Path name of the second file or folder to compare. "
  94.     )]
  95. [string]$Right,
  96.  
  97. [Parameter(
  98.     Mandatory=$False,
  99.     HelpMessage="Explicitly report items which have the same content?"
  100.     )]
  101. [switch]$Same = $false,
  102.  
  103. [Parameter(
  104.     Mandatory=$False,
  105.     HelpMessage="Display debug output (without debug prompting)?"
  106.     )]
  107. [switch]$ShowDebug
  108.  
  109. )
  110.  
  111. ########################################################################
  112. # safety
  113.  
  114. # throw errors on undefined variables
  115. Set-StrictMode -Version 1
  116.  
  117. # stop immediately on error
  118. $ErrorActionPreference = [System.Management.Automation.ActionPreference]::Stop
  119.  
  120. # Display debug output (without debug prompting)
  121. If ($ShowDebug) { $DebugPreference = 'Continue' }
  122.  
  123. ########################################################################
  124. # constants
  125.  
  126. # size of two compare buffers, in bytes
  127. # 1024 * 1024 = 1 binary megabyte
  128. Set-Variable -option Constant -name BufSize -value (1024 * 1024)
  129.  
  130. # modes to use when calling [System.IO.File]::Open()
  131. Set-Variable -option Constant -name FileMode   -value ([System.IO.FileMode]::Open)
  132. Set-Variable -option Constant -name FileAccess -value ([IO.FileAccess]::Read)
  133. Set-Variable -option Constant -name FileShare  -value ([IO.FileShare]::Read)
  134.  
  135. # messages for Report-Diff - all same length for alignment
  136. Set-Variable -option Constant -name DiffMissR   -value 'Right: Missing'
  137. Set-Variable -option Constant -name DiffMissL   -value 'Left : Missing'
  138. Set-Variable -option Constant -name DiffShortR  -value 'Right: Shorter'
  139. Set-Variable -option Constant -name DiffShortL  -value 'Left : Shorter'
  140. Set-Variable -option Constant -name DiffContent -value 'Both : Differ '
  141. Set-Variable -option Constant -name DiffNone    -value 'Both : Same   '
  142.  
  143. ########################################################################
  144. # external library functions
  145.  
  146. # Microsoft Visual C Run-Time (MSVCRT)
  147. # helped by https://www.powershellgallery.com/packages/PSMemory/1.0.0/Content/PSMemory.psm1
  148. $MSCRTWrapperCode = @"
  149. using System;
  150. using System.Runtime.InteropServices;
  151. public static class MSCRTWrapper {
  152.     [DllImport("msvcrt.dll", CallingConvention=CallingConvention.Cdecl)]
  153.     public static extern int memcmp(byte[] b1, byte[] b2, long count);
  154.     }
  155. "@
  156.  
  157. # compile and add the MSCRTWrapper class
  158. # Add-Type is too noisy for -ShowDebug
  159. Add-Type -Debug:$false -TypeDefinition $MSCRTWrapperCode
  160.  
  161. ########################################################################
  162. # PoSh subroutines
  163.  
  164. # ----------------------------------------------------------------------
  165. function Walk-LeftTree ($LeftPath, $RightPath) {
  166. # Starting from Left as relative root, walk the tree and compare against
  167. # Right.  When things exist on both sides, compare their content, too.
  168.  
  169. Write-Debug "walking left"
  170.  
  171. # so "Resolve-Path -Relative" will return path relative to $LeftPath
  172. Push-Location $LeftPath
  173.  
  174. try {
  175.     Get-ChildItem -Recurse | Resolve-Path -Relative | ForEach-Object {
  176.        
  177.         $rel = $_
  178.         # nuke leading ".\" if present
  179.         if ($rel.substring(0, 2) -eq '.\') { $rel = $rel.substring(2) }
  180.         Write-Debug "rel=<$rel>"
  181.         $script:Items++
  182.        
  183.         # make sure counterpart exists on the other side
  184.         if (-not (Test-Path $RightPath\$rel)) {
  185.             Report-Diff $rel $DiffMissR
  186.             $script:MissingRight++
  187.             return
  188.             }
  189.    
  190.         # compare contents for files (directories just have to exist)
  191.         if (Test-Path -Type Container $rel) {
  192.             Write-Debug "skipping directory"
  193.             }
  194.         else {
  195.             Compare-FileContent "$LeftPath\$rel" "$RightPath\$rel" $rel
  196.             }
  197.  
  198.         } # ForEach-Object
  199.     } # try
  200. finally {
  201.     Pop-Location
  202.     }
  203.    
  204. } # Walk-LeftTree
  205.  
  206. # ----------------------------------------------------------------------
  207. function Walk-RightTree ($LeftPath, $RightPath) {
  208. # We have now checked everything in Left and made sure the counterpart in
  209. # Right exists and has the same content. But if something does *not*
  210. # exist in Left but *does* exist in Right, walking Left of course will
  211. # not find it. So now we walk Right and make sure everything in Right
  212. # also exists in Left. We do not need to check here; anything existing in
  213. # both will have already been found and compared in Walk-LeftTree.
  214.  
  215. Write-Debug "walking right"
  216. Push-Location $RightPath
  217.  
  218. try {
  219.     Get-ChildItem -Recurse | Resolve-Path -Relative | ForEach-Object {
  220.        
  221.         $rel = $_
  222.         # nuke leading ".\" if present
  223.         if ($rel.substring(0, 2) -eq '.\') { $rel = $rel.substring(2) }
  224.         Write-Debug "rel=<$rel>"
  225.         # do not count this one (yet), Left may already have counted it
  226.        
  227.         if (-not (Test-Path $LeftPath\$rel)) {
  228.             Report-Diff $rel $DiffMissL
  229.             $script:MissingLeft++
  230.             # since Left didn't see this one, count it here
  231.             $Items++
  232.             return
  233.             }
  234.  
  235.         } # ForEach-Object
  236.     } # try
  237. finally {
  238.     Pop-Location
  239.     }
  240.  
  241. } # Walk-RightTree
  242.    
  243. # ----------------------------------------------------------------------
  244. function Compare-FileContent ($LeftPath, $RightPath, $ReportingName) {
  245.  
  246. Write-Debug "getting items"
  247. $LeftItem  = Get-Item $LeftPath
  248. $RightItem = Get-Item $RightPath
  249.  
  250. Write-Debug "opening streams"
  251. $LeftStream   = [System.IO.File]::Open($LeftItem,  $FileMode, $FileAccess, $FileShare)
  252. $RightStream  = [System.IO.File]::Open($RightItem, $FileMode, $FileAccess, $FileShare)         
  253.  
  254. Compare-Stream $LeftStream $RightStream $ReportingName
  255.  
  256. Write-Debug "closing streams"
  257. $LeftStream.Close()
  258. $RightStream.Close()
  259.  
  260. } # Compare-FileContent
  261.  
  262. # ----------------------------------------------------------------------
  263. function Compare-Stream ($LeftStream, $RightStream, $ReportingName) {
  264.  
  265. Write-Debug "comparing streams"
  266.  
  267. # start with ContentSize equal to entire buffer
  268. # a short read for either will reset it to the shorter length
  269. $ContentSize = $BufSize
  270.  
  271. $foundDiff = $false
  272.  
  273. $done = $false
  274. while (-not $done) {
  275.  
  276.     $LeftCount  =  $LeftStream.Read($LeftBuf,  0, $BufSize)
  277.     $RightCount = $RightStream.Read($RightBuf, 0, $BufSize)
  278.    
  279.     if (($LeftCount -eq 0) -and ($RightCount -eq 0)) {
  280.         Write-Debug "read zero from both, EOF, same"
  281.         break
  282.         }
  283.  
  284.     # if we read short from either stream:
  285.     # - report the shorter item
  286.     # - reduce scope of compare to the shorter size
  287.        
  288.     if ($LeftCount -lt $RightCount) {
  289.         Report-Diff $ReportingName $DiffShortL
  290.         $foundDiff = $true
  291.         $script:SizeDiff++
  292.         $ContentSize = $LeftCount
  293.         $done = $true
  294.         }
  295.    
  296.     if ($RightCount -lt $LeftCount) {
  297.         Report-Diff $ReportingName $DiffShortR
  298.         $foundDiff = $true
  299.         $script:SizeDiff++
  300.         $ContentSize = $RightCount
  301.         $done = $true
  302.         }
  303.    
  304.     # don't bother comparing if one file ended right on buffer boundary
  305.     if ($ContentSize -eq 0) { break }
  306.    
  307.     Write-Debug "comparing content"
  308.    
  309.     if (Compare-Buffer $LeftBuf $RightBuf $ContentSize) {
  310.         Report-Diff $ReportingName $DiffContent
  311.         $foundDiff = $true
  312.         $script:ContentDiff++
  313.         break
  314.         }
  315.        
  316.     } # while
  317.  
  318. # this isn't really a *difference* we are reporting, but oh well   
  319. if ( $Same -and (-not $foundDiff) ) {
  320.     Report-Diff $ReportingName $DiffNone
  321.     }
  322.  
  323. } # Compare-Stream
  324.  
  325. # ----------------------------------------------------------------------
  326. function Compare-Buffer ($LeftBuf, $RightBuf, $Length) {
  327. # memcmp() from MSVCRT
  328.  
  329. return (([MSCRTWrapper]::memcmp($LeftBuf, $RightBuf, $Length)) -ne 0)
  330.  
  331. } # Compare-Buffer
  332.  
  333. # ----------------------------------------------------------------------
  334. function Report-Diff ($Thing, $Diff) {
  335. # report a difference reason ($Diff), for optional reporting name $Thing
  336.  
  337. # the "two files" case has no reporting name, just the difference
  338. $msg = if ($Thing) { "${Diff}: $Thing" } else { $Diff }
  339.  
  340. Write-Output $msg
  341.  
  342. } # Report-Diff
  343.  
  344. ########################################################################
  345. # main program
  346.  
  347. # init counters
  348. $Items = $MissingRight = $MissingLeft = $ContentDiff = $SizeDiff = 0
  349.  
  350. # make sure the given parameters are valid paths (throw errors if not)
  351. $left  = Resolve-Path $left
  352. $right = Resolve-Path $right
  353. Write-Debug "Left=<$Left>"
  354. Write-Debug "Right=<$Right>"
  355.  
  356. # are the given parameters directories/folders/containers?
  357. $LeftDir  = Test-Path -Type Container $left
  358. $RightDir = Test-Path -Type Container $right
  359. if ($LeftDir -ne $RightDir) { throw "Parameter type mismatch.  Both must be files, or both must be folders." }
  360.  
  361. # allocate buffers, will be used for read chunks of each file
  362. $LeftBuf  = New-Object byte[] $BufSize
  363. $RightBuf = New-Object byte[] $BufSize
  364.  
  365. if ($LeftDir) {
  366.     # two trees
  367.     Walk-LeftTree  $Left $Right
  368.     Walk-RightTree $Left $Right
  369.     }
  370. else {
  371.     # two files
  372.     $Items = 1
  373.     Compare-FileContent $Left $Right $null
  374.     }
  375.  
  376. # done, report stats (kinda useless for "two files" case, but not wrong)
  377. Write-Verbose "$Items items, $ContentDiff content differed, $SizeDiff size differed, $MissingLeft missing from left, $MissingRight from right"
  378.  
  379. ##### END ##############################################################
  380.  
RAW Paste Data