Guest User

Untitled

a guest
Dec 31st, 2015
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.11 KB | None | 0 0
  1. # PDF Files Check Script (created by beancurd1, please distribute the code with this session, thanks)
  2. # It uses itextsharp.dll (downloaded from SourceForge) to parse PDF files, extract the first date it found
  3. # compare it against a predefined Date. Email the PDF file names to people if they doesn't match the predefined date
  4.  
  5. Add-Type -Path .\itextsharp.dll
  6. $validDate = "11 Dec 2015"
  7.  
  8. $day = ([datetime]$validDate).ToString('dd') ; $day = $day -replace "^0", ""
  9. #Define valid Month+Year format here, this will combine with $day in the search
  10. $validMYArray = @(([datetime]$validDate).ToString(' MMM yyyy'))
  11. $validMYArray += ([datetime]$validDate).ToString(' MMMM yyyy')
  12. $validMYArray += ([datetime]$validDate).ToString('/MM/yyyy')
  13. $validMYArray += ([datetime]$validDate).ToString('-MMM-yy')
  14.  
  15. # PDF Counters
  16. $countTotal = $countGood = $countBad = 0
  17. $badDate = $foundDate = ""
  18. $PDFPath="\\server\share"
  19. $badPDF="Bad PDF (e.g. incorrect date, empty date):`n`n"
  20.  
  21. Write-Host "Mapping a Drive..."
  22. New-PSDrive -Name NetworkDrive -PSProvider FileSystem -Root $PDFPath
  23.  
  24. Write-Host "Parsing PDF Files..."
  25. #################################################################
  26. ### Search PDF Files from UNC folder, parse each PDF ###
  27. ### output PDFs with incorrect date ###
  28. #################################################################
  29. Get-ChildItem -Path NetworkDrive:\ -Filter *.pdf -Recurse |
  30. Foreach-Object{
  31. $countTotal++
  32. $reader = New-Object iTextSharp.text.pdf.pdfreader -ArgumentList $_.FullName
  33. $pageText = [iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($reader, 1) -join "" -split "`n"
  34.  
  35. # search each line, look for a date which match the format defined in above
  36. :loop ForEach ($line in $pageText) {
  37. ForEach ($validMY in $validMYArray) {
  38. if ($line -cmatch "0?$day$validMY") {
  39. $countGood++
  40. $foundDate="Yes"
  41. break loop
  42. } elseif ($line -cmatch "[0-3]?\d$validMY" -and $badDate -eq "") {
  43. # extract incorrect date and append it to the PDFs
  44. $badDate = [regex]::Matches($line, "([0-3]?\d$validMY)")[0].Groups[1].Value
  45. break loop
  46. }
  47. }
  48. }
  49. if ($foundDate -ne "Yes") {
  50. $countBad++
  51. $badPDF += $_.FullName + " ($badDate)`t`n" #<-Insert a Tab character before `n to avoid Outlook Extra Line Break issue
  52. $badDate = ""
  53. }
  54. $foundDate = ""
  55. }
  56. $reader.Dispose() #<-Destroy/free the Object, it locks the PDF files otherwise
  57.  
  58. # Unmap the drive
  59. Remove-PSDrive -Name NetworkDrive
  60. $stopWatch.Stop()
  61.  
  62. # Remove "\\server\share\" from file path
  63. $badPDF = $badPDF -replace "\\\\.*\\", ""
  64. Write-Host "$badPDF`n`n$countTotal PDFs, Good=$countGood, Bad=$countBad $($stopWatch.Elapsed.TotalSeconds) sec
  65. $PDFPath$validDateNum"
  66.  
  67. Write-Host "Email Result..."
  68. $messageParameters = @{
  69. Subject = "PDF Checked has finish"
  70. Body = "Say something here"
  71. From = "a@yahoo.com"
  72. To = "b@yahoo.com"
  73. SmtpServer = "mailserver"
  74. }
  75. Send-MailMessage @messageParameters
Add Comment
Please, Sign In to add comment