Advertisement
Tician

C# pdf to text

Sep 16th, 2016
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 1.80 KB | None | 0 0
  1. //A very simple solution to reading pdf-Files to text with Visual Studio and C#.
  2.  
  3. //Download NuGet from here https://www.nuget.org/
  4. //Right click your new project and go to "Manage nuget package" (or something similar). Download iTextSharp from the online menu for your project.
  5. //This example reads a simple pdf-File (mine has just 1 page, you might have to modify it if you have more pages but I am really not sure) and gives you the text in a console. Do whatever you want with it :)
  6.  
  7. using System;
  8. using System.Collections.Generic;
  9. using System.Linq;
  10. using System.Text;
  11. using System.Threading.Tasks;
  12. using iTextSharp.text.pdf;
  13. using iTextSharp.text.pdf.parser;
  14.  
  15. namespace ReadPDF
  16. {
  17.     class Program
  18.     {
  19.         static void Main(string[] args)
  20.         {
  21.             try
  22.             {
  23.                 var text = new StringBuilder();
  24.                 var currentText = "";
  25.  
  26.                 using (var pdfReader = new PdfReader("C:\\Temp\\example.pdf"))
  27.                 {
  28.                     for (var page = 1; page <= pdfReader.NumberOfPages; page++)
  29.                     {
  30.                         ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
  31.  
  32.                         currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
  33.  
  34.                         currentText = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
  35.  
  36.                         text.Append(currentText);
  37.                     }
  38.                 }
  39.  
  40.                 Console.WriteLine(currentText); //Show me the text from the file
  41.                 Console.ReadKey();
  42.             }
  43.             catch (Exception ex)
  44.             {
  45.                 Console.WriteLine(Convert.ToString(ex));
  46.             }          
  47.         }
  48.     }
  49. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement