Advertisement
Guest User

Untitled

a guest
Apr 13th, 2016
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 2.55 KB | None | 0 0
  1.         public Test()
  2.         {
  3.             var sb = SBReadPdfToOutputFile(@"C:\Users\Global\Downloads\LA30AA(US).pdf");
  4.             ReadPdfToOutputFile(@"C:\Users\Global\Downloads\LA30AA(US).pdf");
  5.             var nonSb = File.ReadAllText(System.IO.Path.GetDirectoryName(@"C:\Users\Global\Downloads\LA30AA(US).pdf") + "\\output.txt");
  6.             if (sb.Equals(nonSb))
  7.                 MessageBox.Show("They're the same");
  8.         }
  9.         public static void ReadPdfToOutputFile(string fileName)
  10.         {
  11.  
  12.             string testFilePath = System.IO.Path.GetDirectoryName(fileName) + "\\output.txt";
  13.             if (System.IO.File.Exists(fileName))
  14.             {
  15.                 PdfReader pdfReader = new PdfReader(fileName);
  16.  
  17.                 for (int page = 1; page <= pdfReader.NumberOfPages; page++)
  18.                 {
  19.                     //We can change out the Type of extraction We use when getting Text from PDF
  20.                     ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); //TopToBottomTextExtractionStrategy() or SimpleTextExtractionStrategy();
  21.                     string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
  22.                     currentText = Encoding.Default.GetString(Encoding.ASCII.GetBytes(currentText));
  23.                     using (StreamWriter sr = File.AppendText(testFilePath))
  24.                     {
  25.                         sr.Write(currentText);
  26.                     }
  27.  
  28.                 }
  29.                 pdfReader.Close();
  30.             }
  31.         }
  32.         public static String SBReadPdfToOutputFile(string fileName)
  33.         {
  34.  
  35.             string testFilePath = System.IO.Path.GetDirectoryName(fileName) + "\\SBoutput.txt";
  36.             if (System.IO.File.Exists(fileName))
  37.             {
  38.                 PdfReader pdfReader = new PdfReader(fileName);
  39.  
  40.                 StringBuilder sb = new StringBuilder();
  41.                 for (int page = 1; page <= pdfReader.NumberOfPages; page++)
  42.                 {
  43.                     //We can change out the Type of extraction We use when getting Text from PDF
  44.                     ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); //TopToBottomTextExtractionStrategy() or SimpleTextExtractionStrategy();
  45.                     var text = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
  46.                     sb.Append(Encoding.Default.GetString(Encoding.ASCII.GetBytes(text)));
  47.                 }
  48.                 pdfReader.Close();
  49.                 return sb.ToString();
  50.             }
  51.             return null;
  52.         }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement