Advertisement
Guest User

AGPL PDF metadata title compare

a guest
Nov 20th, 2019
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. Licensed under AGPL
  2. [CmdletBinding()]
  3. param ( [string]$Path = "$env:USERPROFILE\Documents\testpdfs\",
  4.         [string]$Output = "$env:USERPROFILE\Documents\PDF_Metadata_Report2.csv",
  5.         [string]$DLL = "c:\Temp\itextsharp.dll"
  6. )
  7. $Runtime = Measure-Command {
  8. # Define the varibles for the script.
  9. $Path = "\\domain.com\PDFs"
  10. $ErrorActionPreference = "silentlycontinue"
  11. $Pattern = ">(.*?)<"
  12. $Count = 0
  13. $Matched = 0
  14.  
  15. # Prepare the console for output
  16. cls
  17.  
  18. # Give the user a welcome message
  19. Write-Output @"
  20. ================================================================================
  21.                                PDF METADATA ANALYSER
  22. ================================================================================
  23. `n
  24. "@
  25.  
  26.  
  27.  
  28. # Define the array
  29. Write-Output "Defining the array`n"
  30. $Report = @()
  31.  
  32.  
  33. # This is the location used for the itextsharp dll
  34. Write-Output "Loading the iTextSharp DLL`n"
  35. Add-Type -path $DLL
  36.  
  37.  
  38.  
  39. # Define the file/s to be edited. Filter for only PDFs
  40. Write-Output "Scanning files. This could take a while...`n"
  41.  
  42. $PDFs = Get-ChildItem -Path $path -Filter *.pdf -Recurse | %{Write-Host Examining file: $_.fullname; $_}
  43.  
  44.  
  45. # Loop through the path specified and all sub folders and compare the filename vs the metadata title.
  46. ForEach ($PDF in $PDFs)
  47.     {
  48.         # Increment the count of files processed.
  49.         $Count = $Count + 1
  50.  
  51.         # Tell the prompt which file is currently being processed
  52.         write-output "The current PDF is $($PDF.name)"
  53.  
  54.         # Use iTextSharp to grab the PDF metadata and add it to a variable.
  55.         $MyPDF = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $PDF.FullName -ErrorAction ignore
  56.  
  57.         # Grab the metadata title and add it to a variable.
  58.         $Field = $MyPDF.Info.Title
  59.  
  60.         # If the field varible isn't blank
  61.         if($Field)
  62.             {
  63.                 Write-Output "The metadata title is $($Field)"
  64.             }
  65.  
  66.             # If the field varible is blank
  67.             else
  68.                 {
  69.                     Write-Output "There is no metadata title."
  70.                 }
  71.  
  72.         # Do a text search on the PDF and look for the pattern "Producer" then trim the string.
  73.         $Producer = Select-String -Pattern "producer" -path $PDF.FullName
  74.         $producertimeout = $null
  75. $ProducerTimeout = Get-Date
  76.             Do {
  77.                 $ProdOut = [regex]::match($Producer,$Pattern)
  78.                 $ProdOut = $ProdOut.ToString()
  79.                 $ProdOut = $ProdOut.trim(">","<")
  80.                 write-host (Get-Date) $producertimeout $PDF.FullName
  81.                 }
  82.             While($ProducerTimeout.Addseconds(2) -gt (Get-Date))
  83.  
  84.         # If the prodout varible isn't blank
  85.         if($Prodout)
  86.             {
  87.                 Write-Output "The producer is $($ProdOut)"
  88.             }
  89.  
  90.             # If the prodout varible is blank.
  91.             else
  92.                 {
  93.                     Write-Output "There is no producer information"
  94.                 }
  95.        
  96.         # Add a blank line for easier reading at the console
  97.         Write-Output "`n"
  98.  
  99.         # Check to see if the title is empty.
  100.         IF($Field)
  101.             {
  102.             # If the metadata title doesn't match the filename after removing the extension, add the title, filename, full path, and the producer to an array.
  103.             IF($Field -ne $PDF.Name.Substring(0,$PDF.Name.Length-4))
  104.                 {
  105.                     $Result = "" | Select FileName, OtherName, Filepath, Producer
  106.                     $Result.FileName = $PDF.Name
  107.                     $Result.OtherName = $Field
  108.                     $Result.Filepath = $PDF.FullName
  109.                     $Result.Producer = $ProdOut
  110.                     $Report += $Result
  111.                     $Matched = $Matched + 1
  112.                
  113.                 }
  114.             }
  115.     }
  116.     } # Measure command
  117.  
  118.     # Export the results of array to CSV
  119.     $Report | Export-CSV $Output -Force
  120.     Write-Output "Processed $($Count) and matched $($Matched) files and took $($Runtime.TotalMinutes)"
  121.     Write-Output "Results are located at $($Output)"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement