Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- static void Main(string[] args)
- {
- SetLicense();
- var path = Path.Combine(Environment.CurrentDirectory, "artwork1.pdf");
- using (var doc = new Document(path))
- {
- var tableOptions = new TextSearchOptions(false)
- {
- IgnoreShadowText = true,
- SearchForTextRelatedGraphics = false,
- UseFontEngineEncoding = false
- };
- var tableAbsorber = new TableAbsorber(tableOptions);
- Console.WriteLine("Starting the TableAbsorber");
- var page = doc.Pages.First();
- tableAbsorber.Visit(page);
- Console.WriteLine("Done with the TableAbsorber");
- var tables = tableAbsorber.TableList
- .Where(x => x.Rectangle.Width < 100)
- .Where(x => x.Rectangle.Height > 100);
- Console.WriteLine($"There are {tables.Count()} tables out of {tableAbsorber.TableList.Count()}.");
- Console.Read();
- var index = 0;
- foreach (var table in tables)
- {
- Console.WriteLine($"{++index} of {tables.Count()}");
- Console.WriteLine($"{table.Rectangle.Width} by {table.Rectangle.Height}");
- DrawRectangleOnPage(table.Rectangle, page);
- var text = ExtractText(table.Rectangle, page);
- if (!string.IsNullOrEmpty(text))
- {
- var search = Regex.Replace(text.ToLower(), "[^a-z0-9:]", string.Empty);
- Console.WriteLine(search);
- }
- else
- {
- Console.WriteLine("No text.");
- }
- }
- SaveDoc(doc);
- }
- }
- private static string ExtractText(Aspose.Pdf.Rectangle rectangle, Page page)
- {
- var options = new TextSearchOptions(rectangle)
- {
- LimitToPageBounds = true,
- IsRegularExpressionUsed = false,
- IgnoreShadowText = false,
- };
- var absorber = new TextAbsorber(options);
- page.Accept(absorber);
- return absorber.Text;
- }
- private static void DrawRectangleOnPage(Rectangle rectangle, Page page)
- {
- page.Contents.Add(new Aspose.Pdf.Operators.GSave());
- page.Contents.Add(new Aspose.Pdf.Operators.ConcatenateMatrix(1, 0, 0, 1, 0, 0));
- page.Contents.Add(new Aspose.Pdf.Operators.SetRGBColorStroke(0, 1, 0));
- page.Contents.Add(new Aspose.Pdf.Operators.SetLineWidth(2));
- page.Contents.Add(
- new Aspose.Pdf.Operators.Re(rectangle.LLX,
- rectangle.LLY,
- rectangle.Width,
- rectangle.Height));
- page.Contents.Add(new Aspose.Pdf.Operators.ClosePathStroke());
- page.Contents.Add(new Aspose.Pdf.Operators.GRestore());
- }
- private static void DrawPolygonOnPage(Point[] polygon, Page page)
- {
- page.Contents.Add(new Aspose.Pdf.Operators.GSave());
- page.Contents.Add(new Aspose.Pdf.Operators.ConcatenateMatrix(1, 0, 0, 1, 0, 0));
- page.Contents.Add(new Aspose.Pdf.Operators.SetRGBColorStroke(0, 0, 1));
- page.Contents.Add(new Aspose.Pdf.Operators.SetLineWidth(1));
- page.Contents.Add(new Aspose.Pdf.Operators.MoveTo(polygon[0].X, polygon[0].Y));
- for (var i = 1; i < polygon.Length; i++)
- {
- page.Contents.Add(new Aspose.Pdf.Operators.LineTo(polygon[i].X, polygon[i].Y));
- }
- page.Contents.Add(new Aspose.Pdf.Operators.LineTo(polygon[0].X, polygon[0].Y));
- page.Contents.Add(new Aspose.Pdf.Operators.ClosePathStroke());
- page.Contents.Add(new Aspose.Pdf.Operators.GRestore());
- }
- private static void SetLicense()
- {
- var license = new Aspose.Pdf.License();
- license.SetLicense("Aspose.Pdf.lic");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement