Advertisement
Guest User

Get Emtadata information from PDF and compare against actual

a guest
Nov 20th, 2019
146
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2.  
  3. [CmdletBinding()]
  4. param ( [string]$Path = "$env:USERPROFILE\Documents\testpdfs\",
  5.         [string]$Output = "$env:USERPROFILE\Documents\PDF_Metadata_Report2.csv",
  6.         [string]$DLL = "c:\Temp\itextsharp.dll"
  7. )
  8. $Runtime = Measure-Command {
  9. # Define the varibles for the script.
  10. $Path = "\\domain.com\PDFs"
  11. $ErrorActionPreference = "silentlycontinue"
  12. $Pattern = ">(.*?)<"
  13. $Count = 0
  14. $Matched = 0
  15.  
  16. # Prepare the console for output
  17. cls
  18.  
  19. # Give the user a welcome message
  20. Write-Output @"
  21. ================================================================================
  22.                                PDF METADATA ANALYSER
  23. ================================================================================
  24. `n
  25. "@
  26.  
  27.  
  28.  
  29. # Define the array
  30. Write-Output "Defining the array`n"
  31. $Report = @()
  32.  
  33.  
  34. # This is the location used for the itextsharp dll
  35. Write-Output "Loading the iTextSharp DLL`n"
  36. Add-Type -path $DLL
  37.  
  38.  
  39.  
  40. # Define the file/s to be edited. Filter for only PDFs
  41. Write-Output "Scanning files. This could take a while...`n"
  42.  
  43. $PDFs = Get-ChildItem -Path $path -Filter *.pdf -Recurse | %{Write-Host Examining file: $_.fullname; $_}
  44.  
  45.  
  46. # Loop through the path specified and all sub folders and compare the filename vs the metadata title.
  47. ForEach ($PDF in $PDFs)
  48.     {
  49.         # Increment the count of files processed.
  50.         $Count = $Count + 1
  51.  
  52.         # Tell the prompt which file is currently being processed
  53.         write-output "The current PDF is $($PDF.name)"
  54.  
  55.         # Use iTextSharp to grab the PDF metadata and add it to a variable.
  56.         $MyPDF = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $PDF.FullName -ErrorAction ignore
  57.  
  58.         # Grab the metadata title and add it to a variable.
  59.         $Field = $MyPDF.Info.Title
  60.  
  61.         # If the field varible isn't blank
  62.         if($Field)
  63.             {
  64.                 Write-Output "The metadata title is $($Field)"
  65.             }
  66.  
  67.             # If the field varible is blank
  68.             else
  69.                 {
  70.                     Write-Output "There is no metadata title."
  71.                 }
  72.  
  73.         # Do a text search on the PDF and look for the pattern "Producer" then trim the string.
  74.         $Producer = Select-String -Pattern "producer" -path $PDF.FullName
  75.         $producertimeout = $null
  76. $ProducerTimeout = Get-Date
  77.             Do {
  78.                 $ProdOut = [regex]::match($Producer,$Pattern)
  79.                 $ProdOut = $ProdOut.ToString()
  80.                 $ProdOut = $ProdOut.trim(">","<")
  81.                 write-host (Get-Date) $producertimeout $PDF.FullName
  82.                 }
  83.             While($ProducerTimeout.Addseconds(2) -gt (Get-Date))
  84.  
  85.         # If the prodout varible isn't blank
  86.         if($Prodout)
  87.             {
  88.                 Write-Output "The producer is $($ProdOut)"
  89.             }
  90.  
  91.             # If the prodout varible is blank.
  92.             else
  93.                 {
  94.                     Write-Output "There is no producer information"
  95.                 }
  96.        
  97.         # Add a blank line for easier reading at the console
  98.         Write-Output "`n"
  99.  
  100.         # Check to see if the title is empty.
  101.         IF($Field)
  102.             {
  103.             # If the metadata title doesn't match the filename after removing the extension, add the title, filename, full path, and the producer to an array.
  104.             IF($Field -ne $PDF.Name.Substring(0,$PDF.Name.Length-4))
  105.                 {
  106.                     $Result = "" | Select FileName, OtherName, Filepath, Producer
  107.                     $Result.FileName = $PDF.Name
  108.                     $Result.OtherName = $Field
  109.                     $Result.Filepath = $PDF.FullName
  110.                     $Result.Producer = $ProdOut
  111.                     $Report += $Result
  112.                     $Matched = $Matched + 1
  113.                
  114.                 }
  115.             }
  116.     }
  117.     } # Measure command
  118.  
  119.     # Export the results of array to CSV
  120.     $Report | Export-CSV $Output -Force
  121.     Write-Output "Processed $($Count) and matched $($Matched) files and took $($Runtime.TotalMinutes)"
  122.     Write-Output "Results are located at $($Output)"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement