Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Linq;
- using System.Net;
- using System.Text;
- using System.Threading;
- using System.Threading.Tasks;
- using System.Windows;
- using System.Windows.Controls;
- using System.Windows.Data;
- using System.Windows.Documents;
- using System.Windows.Input;
- using System.Windows.Media;
- using System.Windows.Media.Imaging;
- using System.Windows.Navigation;
- using System.Windows.Shapes;
- namespace SlickBookDataStealer
- {
- /// <summary>
- /// Interaction logic for MainWindow.xaml
- /// </summary>
- public partial class MainWindow : Window
- {
- public List<Category> CategoryList = new List<Category>();
- public readonly Uri BOL_COM_BASE_URL = new Uri("https://www.bol.com");
- public readonly int PRODUCT_COUNT_PER_CATEGORY = 250;
- public MainWindow()
- {
- InitializeComponent();
- CategoryList.Add(new Category() { Id = 1, Description = "Literatuur & Romans", PageLink = "/nl/l/literatuur-romans/N/24410/?view=list" });
- CategoryList.Add(new Category() { Id = 2, Description = "Thrillers & Spanning", PageLink = "/nl/l/thrillers/N/2551/?view=list" });
- CategoryList.Add(new Category() { Id = 3, Description = "Kookboeken", PageLink = "/nl/l/koken-eten-drinken/N/1701/?view=list" });
- CategoryList.Add(new Category() { Id = 4, Description = "Kinderboeken", PageLink = "/nl/l/kinderboeken/N/24421/?view=list" });
- CategoryList.Add(new Category() { Id = 5, Description = "Studieboeken", PageLink = "/nl/l/boeken-studieboeken/N/8299+4273962347/?view=list" });
- CategoryList.Add(new Category() { Id = 6, Description = "Biografieën", PageLink = "/nl/l/literaire-non-fictie-biografieen/N/24415+23928/?view=list" });
- CategoryList.Add(new Category() { Id = 7, Description = "Fantasy, Horror & Sci-fi", PageLink = "/nl/l/fantasy-science-fiction/N/2510/?view=list" });
- CategoryList.Add(new Category() { Id = 8, Description = "Geschiedenis & Politiek", PageLink = "/nl/l/geschiedenisboeken/N/22671/?view=list" });
- CategoryList.Add(new Category() { Id = 9, Description = "Gezin & Gezondheid", PageLink = "/nl/l/gezondheid-psychologie/N/1969/?view=list" });
- CategoryList.Add(new Category() { Id = 10, Description = "Hobby, Huis & Tuin", PageLink = "/nl/l/hobby-huis-en-tuinboeken/N/2666/?view=list" });
- CategoryList.Add(new Category() { Id = 11, Description = "Kunst & Fotografie", PageLink = "/nl/l/kunstboeken/N/2271/?view=list" });
- CategoryList.Add(new Category() { Id = 12, Description = "Managementboeken", PageLink = "/nl/l/boeken/N/8299+23864/?view=list" });
- CategoryList.Add(new Category() { Id = 13, Description = "Reizen & Vakantie", PageLink = "/nl/l/reisboeken/N/2787/?view=list" });
- CategoryList.Add(new Category() { Id = 14, Description = "Religie & Spiritualiteit", PageLink = "/nl/l/spiritualiteit/N/2601/?view=list" });
- CategoryList.Add(new Category() { Id = 15, Description = "Sportboeken", PageLink = "/nl/l/outdoor-sportboeken/N/2806/?view=list" });
- CategoryList.Add(new Category() { Id = 16, Description = "Stripboeken", PageLink = "/nl/l/stripboeken/N/7311/?view=list" });
- CategoryList.Add(new Category() { Id = 17, Description = "Young Adult", PageLink = "/nl/l/young-adult-boeken/N/10756/?view=list" });
- var productsList = getProducts();
- }
- public List<Product> getProducts()
- {
- var productList = new List<Product>();
- foreach (var category in CategoryList) //Loop through all hardcoded categories
- {
- int pagenumber = 1;
- for (int i = 1; i < PRODUCT_COUNT_PER_CATEGORY; pagenumber++)
- {
- var doc = getPageHtml(new Uri(BOL_COM_BASE_URL, category.PageLink + $"&page={pagenumber}").AbsoluteUri);
- var productItemRows = doc.DocumentNode.SelectNodes("//*[contains(@class,'product-item--row')]");
- foreach (var productItemRow in productItemRows) //Loop through all product rows in the product list of this page
- {
- var tempProductItemRow = productItemRow;
- var specs = tempProductItemRow.SelectNodes(".//*[contains(@class,'product-small-specs')]").FirstOrDefault();
- if (specs != null && specs.InnerText.Contains("Ebook"))
- {
- //i--; //test dit
- continue;
- }
- var productTitle = tempProductItemRow.SelectSingleNode(".//*[contains(@class,'product-title')]");
- if (i <= 500 && productTitle != null && productTitle.ChildNodes[1].Attributes["href"]?.Value != null)
- {
- var productPage = getPageHtml(new Uri(BOL_COM_BASE_URL, productTitle.ChildNodes[1].Attributes["href"].Value).AbsoluteUri);
- var product = new Product() { };
- product.Name = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'pdp-header__title')]").FirstOrDefault()?.InnerText;
- product.CategoryId = category.Id;
- product.InStock = new Random().Next(70);
- //TODO: BUG: Bestaat niet bij pagina 18???
- product.Description = productPage.DocumentNode.SelectSingleNode(".//div[@data-test='description']").InnerText.Replace(@"\n", "").Trim();
- product.Price = double.Parse(productPage.DocumentNode.SelectSingleNode(".//*[contains(@class,'product-prices__bol-price')]").InnerText.Replace(@"\n", "").Trim());
- product.Author = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'specs__party')]").FirstOrDefault()?.InnerText;
- product.Language = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'product-small-specs--large')]").FirstOrDefault()?.ChildNodes[1]?.InnerText.Replace(@"\n", "").Trim();
- product.Version = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'product-small-specs--large')]").FirstOrDefault()?.ChildNodes[3]?.InnerText.Replace(@"\n", "").Trim();
- var specifications = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'specs__list')]");
- product.ReleasedOn = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'product-small-specs--large')]").FirstOrDefault()?.ChildNodes[7]?.InnerText.Replace(@"\n", "").Trim();
- product.PageCount = 0;
- double ratingCount1Temp;
- product.ratingCount1 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[9]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount1Temp) ? ratingCount1Temp : 0.0;
- double ratingCount2Temp;
- product.ratingCount2 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[7]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount2Temp) ? ratingCount2Temp : 0.0;
- double ratingCount3Temp;
- product.ratingCount3 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[5]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount3Temp) ? ratingCount3Temp : 0.0;
- double ratingCount4Temp;
- product.ratingCount4 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[3]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount4Temp) ? ratingCount4Temp : 0.0;
- double ratingCount5Temp;
- product.ratingCount5 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[1]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount5Temp) ? ratingCount5Temp : 0.0;
- productList.Add(product);
- i++;
- Thread.Sleep(300);
- }
- }
- }
- }
- return productList;
- }
- private HtmlAgilityPack.HtmlDocument getPageHtml(string pageLink)
- {
- HttpWebRequest request = (HttpWebRequest)WebRequest.Create(pageLink);
- HttpWebResponse response = (HttpWebResponse)request.GetResponse();
- StreamReader sr = new StreamReader(response.GetResponseStream());
- HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
- doc.Load(sr);
- return doc;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement