Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //A very simple solution to reading pdf-Files to text with Visual Studio and C#.
- //Download NuGet from here https://www.nuget.org/
- //Right click your new project and go to "Manage nuget package" (or something similar). Download iTextSharp from the online menu for your project.
- //This example reads a simple pdf-File (mine has just 1 page, you might have to modify it if you have more pages but I am really not sure) and gives you the text in a console. Do whatever you want with it :)
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- using iTextSharp.text.pdf;
- using iTextSharp.text.pdf.parser;
- namespace ReadPDF
- {
- class Program
- {
- static void Main(string[] args)
- {
- try
- {
- var text = new StringBuilder();
- var currentText = "";
- using (var pdfReader = new PdfReader("C:\\Temp\\example.pdf"))
- {
- for (var page = 1; page <= pdfReader.NumberOfPages; page++)
- {
- ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
- currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
- currentText = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
- text.Append(currentText);
- }
- }
- Console.WriteLine(currentText); //Show me the text from the file
- Console.ReadKey();
- }
- catch (Exception ex)
- {
- Console.WriteLine(Convert.ToString(ex));
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement