Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Net.Http;
- using mshtml;
- using HtmlAgilityPack;
- using AngleSharp.Dom;
- using AngleSharp.Html.Parser;
- using CsQuery;
- using Fizzler.Systems.HtmlAgilityPack;
- using OpenQA.Selenium;
- using OpenQA.Selenium.Chrome;
- using Majestic12;
- using BenchmarkDotNet.Attributes;
- public class HtmlParserBenchmark
- {
- private string url = "https://scrapeme.live/shop/Pikachu/";
- private HttpClient httpClient = new HttpClient();
- private string htmlContent;
- [GlobalSetup]
- public async Task GlobalSetup()
- {
- // Load the HTML content from the URL
- HttpResponseMessage response = await httpClient.GetAsync(url);
- htmlContent = await response.Content.ReadAsStringAsync();
- }
- [Benchmark]
- public string HtmlAgilityPackParser()
- {
- var doc = new HtmlDocument();
- doc.LoadHtml(htmlContent);
- HtmlNode stockAmount = doc.DocumentNode.SelectSingleNode("//*[@id='product-752']/div[2]/p[2]");
- return stockAmount.InnerText;
- }
- [Benchmark]
- public string AngleSharpParser()
- {
- var context = BrowsingContext.New(Configuration.Default);
- var document = context.OpenAsync(req => req.Content(htmlContent));
- var stockAmountNode = document.Result.QuerySelector(".stock");
- return stockAmountNode.TextContent;
- }
- [Benchmark]
- public string CsQueryParser()
- {
- var cq = CQ.Create(htmlContent);
- var stockAmount = cq.Find(".stock").Text();
- return stockAmount;
- }
- [Benchmark]
- public string FizzlerParser()
- {
- var doc = new HtmlDocument();
- doc.LoadHtml(htmlContent);
- var stockAmount = doc.DocumentNode.QuerySelector(".stock").InnerText;
- return stockAmount.Trim();
- }
- [Benchmark]
- public string MsHtmlParser()
- {
- // Create an HTMLDocument and cast it to IHTMLDocument2
- var htmlDoc = new HTMLDocument();
- var ihtmlDoc = (IHTMLDocument2)htmlDoc;
- // Open the document and write the HTML content
- ihtmlDoc.open();
- ihtmlDoc.write(htmlContent);
- ihtmlDoc.close();
- // Access the document's DOM
- var allElements = ihtmlDoc.all;
- // Find the element with class "stock"
- foreach (IHTMLElement element in allElements)
- {
- if (element.className == "stock")
- {
- return element.innerText;
- }
- }
- return null;
- }
- [Benchmark]
- public string SeleniumParser()
- {
- // Initialize the Chrome WebDriver
- using (var driver = new ChromeDriver())
- {
- // Navigate to the webpage
- driver.Navigate().GoToUrl(url);
- // Find and interact with elements on the page
- var stockElement = driver.FindElement(By.XPath("//*[@id="product-752"]/div[2]/p[2]"));
- // Extract the desired information
- return stockElement.Text;
- }
- }
- [Benchmark]
- public string Majestic12Parser()
- {
- HTMLparser oP = new HTMLparser();
- oP.Init(htmlContent);
- HTMLchunk oChunk;
- while ((oChunk = oP.ParseNext()) != null)
- {
- if (oChunk.oType == HTMLchunkType.OpenTag)
- {
- string classAttribute = oChunk.oParams["class"] as string;
- if (classAttribute.Contains("stock"))
- {
- return oChunk.oHTML.Trim();
- }
- }
- }
- return string.Empty;
- }
- public static void Main(string[] args)
- {
- var summary = BenchmarkRunner.Run<HtmlParserBenchmark>();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment