Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [CmdletBinding()]
- param ( [string]$Path = "$env:USERPROFILE\Documents\testpdfs\",
- [string]$Output = "$env:USERPROFILE\Documents\PDF_Metadata_Report2.csv",
- [string]$DLL = "c:\Temp\itextsharp.dll"
- )
- $Runtime = Measure-Command {
- # Define the varibles for the script.
- $Path = "\\domain.com\PDFs"
- $ErrorActionPreference = "silentlycontinue"
- $Pattern = ">(.*?)<"
- $Count = 0
- $Matched = 0
- # Prepare the console for output
- cls
- # Give the user a welcome message
- Write-Output @"
- ================================================================================
- PDF METADATA ANALYSER
- ================================================================================
- `n
- "@
- # Define the array
- Write-Output "Defining the array`n"
- $Report = @()
- # This is the location used for the itextsharp dll
- Write-Output "Loading the iTextSharp DLL`n"
- Add-Type -path $DLL
- # Define the file/s to be edited. Filter for only PDFs
- Write-Output "Scanning files. This could take a while...`n"
- $PDFs = Get-ChildItem -Path $path -Filter *.pdf -Recurse | %{Write-Host Examining file: $_.fullname; $_}
- # Loop through the path specified and all sub folders and compare the filename vs the metadata title.
- ForEach ($PDF in $PDFs)
- {
- # Increment the count of files processed.
- $Count = $Count + 1
- # Tell the prompt which file is currently being processed
- write-output "The current PDF is $($PDF.name)"
- # Use iTextSharp to grab the PDF metadata and add it to a variable.
- $MyPDF = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $PDF.FullName -ErrorAction ignore
- # Grab the metadata title and add it to a variable.
- $Field = $MyPDF.Info.Title
- # If the field varible isn't blank
- if($Field)
- {
- Write-Output "The metadata title is $($Field)"
- }
- # If the field varible is blank
- else
- {
- Write-Output "There is no metadata title."
- }
- # Do a text search on the PDF and look for the pattern "Producer" then trim the string.
- $Producer = Select-String -Pattern "producer" -path $PDF.FullName
- $producertimeout = $null
- $ProducerTimeout = Get-Date
- Do {
- $ProdOut = [regex]::match($Producer,$Pattern)
- $ProdOut = $ProdOut.ToString()
- $ProdOut = $ProdOut.trim(">","<")
- write-host (Get-Date) $producertimeout $PDF.FullName
- }
- While($ProducerTimeout.Addseconds(2) -gt (Get-Date))
- # If the prodout varible isn't blank
- if($Prodout)
- {
- Write-Output "The producer is $($ProdOut)"
- }
- # If the prodout varible is blank.
- else
- {
- Write-Output "There is no producer information"
- }
- # Add a blank line for easier reading at the console
- Write-Output "`n"
- # Check to see if the title is empty.
- IF($Field)
- {
- # If the metadata title doesn't match the filename after removing the extension, add the title, filename, full path, and the producer to an array.
- IF($Field -ne $PDF.Name.Substring(0,$PDF.Name.Length-4))
- {
- $Result = "" | Select FileName, OtherName, Filepath, Producer
- $Result.FileName = $PDF.Name
- $Result.OtherName = $Field
- $Result.Filepath = $PDF.FullName
- $Result.Producer = $ProdOut
- $Report += $Result
- $Matched = $Matched + 1
- }
- }
- }
- } # Measure command
- # Export the results of array to CSV
- $Report | Export-CSV $Output -Force
- Write-Output "Processed $($Count) and matched $($Matched) files and took $($Runtime.TotalMinutes)"
- Write-Output "Results are located at $($Output)"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement