Advertisement
dereksir

Untitled

Nov 2nd, 2023 (edited)
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 3.67 KB | None | 0 0
  1. using System;
  2. using System.Net.Http;
  3. using mshtml;
  4. using HtmlAgilityPack;
  5. using AngleSharp.Dom;
  6. using AngleSharp.Html.Parser;
  7. using CsQuery;
  8. using Fizzler.Systems.HtmlAgilityPack;
  9. using OpenQA.Selenium;
  10. using OpenQA.Selenium.Chrome;
  11. using Majestic12;
  12. using BenchmarkDotNet.Attributes;
  13.  
  14. public class HtmlParserBenchmark
  15. {
  16.     private string url = "https://scrapeme.live/shop/Pikachu/";
  17.     private HttpClient httpClient = new HttpClient();
  18.     private string htmlContent;
  19.  
  20.     [GlobalSetup]
  21.     public async Task GlobalSetup()
  22.     {
  23.         // Load the HTML content from the URL
  24.         HttpResponseMessage response = await httpClient.GetAsync(url);
  25.         htmlContent = await response.Content.ReadAsStringAsync();
  26.     }
  27.  
  28.     [Benchmark]
  29.     public string HtmlAgilityPackParser()
  30.     {
  31.         var doc = new HtmlDocument();
  32.         doc.LoadHtml(htmlContent);
  33.         HtmlNode stockAmount = doc.DocumentNode.SelectSingleNode("//*[@id='product-752']/div[2]/p[2]");
  34.         return stockAmount.InnerText;
  35.     }
  36.  
  37.     [Benchmark]
  38.     public string AngleSharpParser()
  39.     {
  40.         var context = BrowsingContext.New(Configuration.Default);
  41.         var document = context.OpenAsync(req => req.Content(htmlContent));
  42.         var stockAmountNode = document.Result.QuerySelector(".stock");
  43.         return stockAmountNode.TextContent;
  44.     }
  45.  
  46.     [Benchmark]
  47.     public string CsQueryParser()
  48.     {
  49.         var cq = CQ.Create(htmlContent);
  50.         var stockAmount = cq.Find(".stock").Text();
  51.         return stockAmount;
  52.     }
  53.  
  54.     [Benchmark]
  55.     public string FizzlerParser()
  56.     {
  57.         var doc = new HtmlDocument();
  58.         doc.LoadHtml(htmlContent);
  59.         var stockAmount = doc.DocumentNode.QuerySelector(".stock").InnerText;
  60.         return stockAmount.Trim();
  61.     }
  62.  
  63.     [Benchmark]
  64.     public string MsHtmlParser()
  65.     {
  66.         // Create an HTMLDocument and cast it to IHTMLDocument2
  67.         var htmlDoc = new HTMLDocument();
  68.         var ihtmlDoc = (IHTMLDocument2)htmlDoc;
  69.  
  70.         // Open the document and write the HTML content
  71.         ihtmlDoc.open();
  72.         ihtmlDoc.write(htmlContent);
  73.         ihtmlDoc.close();
  74.  
  75.         // Access the document's DOM
  76.         var allElements = ihtmlDoc.all;
  77.    
  78.         // Find the element with class "stock"
  79.         foreach (IHTMLElement element in allElements)
  80.         {
  81.             if (element.className == "stock")
  82.             {
  83.                 return element.innerText;
  84.             }
  85.         }
  86.  
  87.         return null;
  88.     }
  89.  
  90.     [Benchmark]
  91.     public string SeleniumParser()
  92.     {
  93.         // Initialize the Chrome WebDriver
  94.         using (var driver = new ChromeDriver())
  95.         {
  96.         // Navigate to the webpage
  97.         driver.Navigate().GoToUrl(url);
  98.  
  99.         // Find and interact with elements on the page
  100.         var stockElement = driver.FindElement(By.XPath("//*[@id="product-752"]/div[2]/p[2]"));
  101.  
  102.         // Extract the desired information
  103.         return stockElement.Text;
  104.         }
  105.     }
  106.  
  107.     [Benchmark]
  108.     public string Majestic12Parser()
  109.     {
  110.         HTMLparser oP = new HTMLparser();
  111.         oP.Init(htmlContent);
  112.         HTMLchunk oChunk;
  113.         while ((oChunk = oP.ParseNext()) != null)
  114.         {
  115.             if (oChunk.oType == HTMLchunkType.OpenTag)
  116.             {
  117.                 string classAttribute = oChunk.oParams["class"] as string;
  118.                 if (classAttribute.Contains("stock"))
  119.                 {
  120.                     return oChunk.oHTML.Trim();
  121.                 }
  122.             }
  123.         }
  124.         return string.Empty;
  125.     }
  126.     public static void Main(string[] args)
  127.     {
  128.         var summary = BenchmarkRunner.Run<HtmlParserBenchmark>();
  129.     }    
  130. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement