Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # PDF Files Check Script (created by beancurd1, please distribute the code with this session, thanks)
- # It uses itextsharp.dll (downloaded from SourceForge) to parse PDF files, extract the first date it found
- # compare it against a predefined Date. Email the PDF file names to people if they doesn't match the predefined date
- Add-Type -Path .\itextsharp.dll
- $validDate = "11 Dec 2015"
- $day = ([datetime]$validDate).ToString('dd') ; $day = $day -replace "^0", ""
- #Define valid Month+Year format here, this will combine with $day in the search
- $validMYArray = @(([datetime]$validDate).ToString(' MMM yyyy'))
- $validMYArray += ([datetime]$validDate).ToString(' MMMM yyyy')
- $validMYArray += ([datetime]$validDate).ToString('/MM/yyyy')
- $validMYArray += ([datetime]$validDate).ToString('-MMM-yy')
- # PDF Counters
- $countTotal = $countGood = $countBad = 0
- $badDate = $foundDate = ""
- $PDFPath="\\server\share"
- $badPDF="Bad PDF (e.g. incorrect date, empty date):`n`n"
- Write-Host "Mapping a Drive..."
- New-PSDrive -Name NetworkDrive -PSProvider FileSystem -Root $PDFPath
- Write-Host "Parsing PDF Files..."
- #################################################################
- ### Search PDF Files from UNC folder, parse each PDF ###
- ### output PDFs with incorrect date ###
- #################################################################
- Get-ChildItem -Path NetworkDrive:\ -Filter *.pdf -Recurse |
- Foreach-Object{
- $countTotal++
- $reader = New-Object iTextSharp.text.pdf.pdfreader -ArgumentList $_.FullName
- $pageText = [iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($reader, 1) -join "" -split "`n"
- # search each line, look for a date which match the format defined in above
- :loop ForEach ($line in $pageText) {
- ForEach ($validMY in $validMYArray) {
- if ($line -cmatch "0?$day$validMY") {
- $countGood++
- $foundDate="Yes"
- break loop
- } elseif ($line -cmatch "[0-3]?\d$validMY" -and $badDate -eq "") {
- # extract incorrect date and append it to the PDFs
- $badDate = [regex]::Matches($line, "([0-3]?\d$validMY)")[0].Groups[1].Value
- break loop
- }
- }
- }
- if ($foundDate -ne "Yes") {
- $countBad++
- $badPDF += $_.FullName + " ($badDate)`t`n" #<-Insert a Tab character before `n to avoid Outlook Extra Line Break issue
- $badDate = ""
- }
- $foundDate = ""
- }
- $reader.Dispose() #<-Destroy/free the Object, it locks the PDF files otherwise
- # Unmap the drive
- Remove-PSDrive -Name NetworkDrive
- $stopWatch.Stop()
- # Remove "\\server\share\" from file path
- $badPDF = $badPDF -replace "\\\\.*\\", ""
- Write-Host "$badPDF`n`n$countTotal PDFs, Good=$countGood, Bad=$countBad $($stopWatch.Elapsed.TotalSeconds) sec
- $PDFPath$validDateNum"
- Write-Host "Email Result..."
- $messageParameters = @{
- Subject = "PDF Checked has finish"
- Body = "Say something here"
- From = "a@yahoo.com"
- To = "b@yahoo.com"
- SmtpServer = "mailserver"
- }
- Send-MailMessage @messageParameters
Add Comment
Please, Sign In to add comment