Advertisement
Guest User

Untitled

a guest
Sep 22nd, 2019
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.43 KB | None | 0 0
  1. static void Main(string[] args)
  2. {
  3. SetLicense();
  4.  
  5. var path = Path.Combine(Environment.CurrentDirectory, "artwork1.pdf");
  6. using (var doc = new Document(path))
  7. {
  8. var tableOptions = new TextSearchOptions(false)
  9. {
  10. IgnoreShadowText = true,
  11. SearchForTextRelatedGraphics = false,
  12. UseFontEngineEncoding = false
  13. };
  14. var tableAbsorber = new TableAbsorber(tableOptions);
  15.  
  16. Console.WriteLine("Starting the TableAbsorber");
  17.  
  18. var page = doc.Pages.First();
  19. tableAbsorber.Visit(page);
  20.  
  21. Console.WriteLine("Done with the TableAbsorber");
  22.  
  23. var tables = tableAbsorber.TableList
  24. .Where(x => x.Rectangle.Width < 100)
  25. .Where(x => x.Rectangle.Height > 100);
  26.  
  27. Console.WriteLine($"There are {tables.Count()} tables out of {tableAbsorber.TableList.Count()}.");
  28. Console.Read();
  29.  
  30. var index = 0;
  31.  
  32. foreach (var table in tables)
  33. {
  34. Console.WriteLine($"{++index} of {tables.Count()}");
  35. Console.WriteLine($"{table.Rectangle.Width} by {table.Rectangle.Height}");
  36. DrawRectangleOnPage(table.Rectangle, page);
  37.  
  38. var text = ExtractText(table.Rectangle, page);
  39. if (!string.IsNullOrEmpty(text))
  40. {
  41. var search = Regex.Replace(text.ToLower(), "[^a-z0-9:]", string.Empty);
  42. Console.WriteLine(search);
  43. }
  44. else
  45. {
  46. Console.WriteLine("No text.");
  47. }
  48. }
  49.  
  50. SaveDoc(doc);
  51. }
  52. }
  53.  
  54. private static string ExtractText(Aspose.Pdf.Rectangle rectangle, Page page)
  55. {
  56. var options = new TextSearchOptions(rectangle)
  57. {
  58. LimitToPageBounds = true,
  59. IsRegularExpressionUsed = false,
  60. IgnoreShadowText = false,
  61. };
  62. var absorber = new TextAbsorber(options);
  63. page.Accept(absorber);
  64. return absorber.Text;
  65. }
  66.  
  67. private static void DrawRectangleOnPage(Rectangle rectangle, Page page)
  68. {
  69. page.Contents.Add(new Aspose.Pdf.Operators.GSave());
  70. page.Contents.Add(new Aspose.Pdf.Operators.ConcatenateMatrix(1, 0, 0, 1, 0, 0));
  71. page.Contents.Add(new Aspose.Pdf.Operators.SetRGBColorStroke(0, 1, 0));
  72. page.Contents.Add(new Aspose.Pdf.Operators.SetLineWidth(2));
  73. page.Contents.Add(
  74. new Aspose.Pdf.Operators.Re(rectangle.LLX,
  75. rectangle.LLY,
  76. rectangle.Width,
  77. rectangle.Height));
  78. page.Contents.Add(new Aspose.Pdf.Operators.ClosePathStroke());
  79. page.Contents.Add(new Aspose.Pdf.Operators.GRestore());
  80. }
  81.  
  82. private static void DrawPolygonOnPage(Point[] polygon, Page page)
  83. {
  84. page.Contents.Add(new Aspose.Pdf.Operators.GSave());
  85. page.Contents.Add(new Aspose.Pdf.Operators.ConcatenateMatrix(1, 0, 0, 1, 0, 0));
  86. page.Contents.Add(new Aspose.Pdf.Operators.SetRGBColorStroke(0, 0, 1));
  87. page.Contents.Add(new Aspose.Pdf.Operators.SetLineWidth(1));
  88. page.Contents.Add(new Aspose.Pdf.Operators.MoveTo(polygon[0].X, polygon[0].Y));
  89. for (var i = 1; i < polygon.Length; i++)
  90. {
  91. page.Contents.Add(new Aspose.Pdf.Operators.LineTo(polygon[i].X, polygon[i].Y));
  92. }
  93. page.Contents.Add(new Aspose.Pdf.Operators.LineTo(polygon[0].X, polygon[0].Y));
  94. page.Contents.Add(new Aspose.Pdf.Operators.ClosePathStroke());
  95. page.Contents.Add(new Aspose.Pdf.Operators.GRestore());
  96. }
  97.  
  98. private static void SetLicense()
  99. {
  100. var license = new Aspose.Pdf.License();
  101. license.SetLicense("Aspose.Pdf.lic");
  102. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement