Advertisement
Guest User

Untitled

a guest
Oct 15th, 2018
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.96 KB | None | 0 0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Net;
  6. using System.Text;
  7. using System.Threading;
  8. using System.Threading.Tasks;
  9. using System.Windows;
  10. using System.Windows.Controls;
  11. using System.Windows.Data;
  12. using System.Windows.Documents;
  13. using System.Windows.Input;
  14. using System.Windows.Media;
  15. using System.Windows.Media.Imaging;
  16. using System.Windows.Navigation;
  17. using System.Windows.Shapes;
  18.  
  19. namespace SlickBookDataStealer
  20. {
  21. /// <summary>
  22. /// Interaction logic for MainWindow.xaml
  23. /// </summary>
  24. public partial class MainWindow : Window
  25. {
  26. public List<Category> CategoryList = new List<Category>();
  27. public readonly Uri BOL_COM_BASE_URL = new Uri("https://www.bol.com");
  28. public readonly int PRODUCT_COUNT_PER_CATEGORY = 250;
  29. public MainWindow()
  30. {
  31. InitializeComponent();
  32.  
  33. CategoryList.Add(new Category() { Id = 1, Description = "Literatuur & Romans", PageLink = "/nl/l/literatuur-romans/N/24410/?view=list" });
  34. CategoryList.Add(new Category() { Id = 2, Description = "Thrillers & Spanning", PageLink = "/nl/l/thrillers/N/2551/?view=list" });
  35. CategoryList.Add(new Category() { Id = 3, Description = "Kookboeken", PageLink = "/nl/l/koken-eten-drinken/N/1701/?view=list" });
  36. CategoryList.Add(new Category() { Id = 4, Description = "Kinderboeken", PageLink = "/nl/l/kinderboeken/N/24421/?view=list" });
  37. CategoryList.Add(new Category() { Id = 5, Description = "Studieboeken", PageLink = "/nl/l/boeken-studieboeken/N/8299+4273962347/?view=list" });
  38. CategoryList.Add(new Category() { Id = 6, Description = "Biografieën", PageLink = "/nl/l/literaire-non-fictie-biografieen/N/24415+23928/?view=list" });
  39. CategoryList.Add(new Category() { Id = 7, Description = "Fantasy, Horror & Sci-fi", PageLink = "/nl/l/fantasy-science-fiction/N/2510/?view=list" });
  40. CategoryList.Add(new Category() { Id = 8, Description = "Geschiedenis & Politiek", PageLink = "/nl/l/geschiedenisboeken/N/22671/?view=list" });
  41. CategoryList.Add(new Category() { Id = 9, Description = "Gezin & Gezondheid", PageLink = "/nl/l/gezondheid-psychologie/N/1969/?view=list" });
  42. CategoryList.Add(new Category() { Id = 10, Description = "Hobby, Huis & Tuin", PageLink = "/nl/l/hobby-huis-en-tuinboeken/N/2666/?view=list" });
  43. CategoryList.Add(new Category() { Id = 11, Description = "Kunst & Fotografie", PageLink = "/nl/l/kunstboeken/N/2271/?view=list" });
  44. CategoryList.Add(new Category() { Id = 12, Description = "Managementboeken", PageLink = "/nl/l/boeken/N/8299+23864/?view=list" });
  45. CategoryList.Add(new Category() { Id = 13, Description = "Reizen & Vakantie", PageLink = "/nl/l/reisboeken/N/2787/?view=list" });
  46. CategoryList.Add(new Category() { Id = 14, Description = "Religie & Spiritualiteit", PageLink = "/nl/l/spiritualiteit/N/2601/?view=list" });
  47. CategoryList.Add(new Category() { Id = 15, Description = "Sportboeken", PageLink = "/nl/l/outdoor-sportboeken/N/2806/?view=list" });
  48. CategoryList.Add(new Category() { Id = 16, Description = "Stripboeken", PageLink = "/nl/l/stripboeken/N/7311/?view=list" });
  49. CategoryList.Add(new Category() { Id = 17, Description = "Young Adult", PageLink = "/nl/l/young-adult-boeken/N/10756/?view=list" });
  50.  
  51. var productsList = getProducts();
  52. }
  53.  
  54. public List<Product> getProducts()
  55. {
  56. var productList = new List<Product>();
  57.  
  58. foreach (var category in CategoryList) //Loop through all hardcoded categories
  59. {
  60. int pagenumber = 1;
  61. for (int i = 1; i < PRODUCT_COUNT_PER_CATEGORY; pagenumber++)
  62. {
  63. var doc = getPageHtml(new Uri(BOL_COM_BASE_URL, category.PageLink + $"&page={pagenumber}").AbsoluteUri);
  64.  
  65. var productItemRows = doc.DocumentNode.SelectNodes("//*[contains(@class,'product-item--row')]");
  66.  
  67. foreach (var productItemRow in productItemRows) //Loop through all product rows in the product list of this page
  68. {
  69. var tempProductItemRow = productItemRow;
  70. var specs = tempProductItemRow.SelectNodes(".//*[contains(@class,'product-small-specs')]").FirstOrDefault();
  71. if (specs != null && specs.InnerText.Contains("Ebook"))
  72. {
  73. //i--; //test dit
  74. continue;
  75. }
  76.  
  77. var productTitle = tempProductItemRow.SelectSingleNode(".//*[contains(@class,'product-title')]");
  78. if (i <= 500 && productTitle != null && productTitle.ChildNodes[1].Attributes["href"]?.Value != null)
  79. {
  80. var productPage = getPageHtml(new Uri(BOL_COM_BASE_URL, productTitle.ChildNodes[1].Attributes["href"].Value).AbsoluteUri);
  81. var product = new Product() { };
  82.  
  83.  
  84. product.Name = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'pdp-header__title')]").FirstOrDefault()?.InnerText;
  85. product.CategoryId = category.Id;
  86. product.InStock = new Random().Next(70);
  87.  
  88.  
  89. //TODO: BUG: Bestaat niet bij pagina 18???
  90. product.Description = productPage.DocumentNode.SelectSingleNode(".//div[@data-test='description']").InnerText.Replace(@"\n", "").Trim();
  91.  
  92. product.Price = double.Parse(productPage.DocumentNode.SelectSingleNode(".//*[contains(@class,'product-prices__bol-price')]").InnerText.Replace(@"\n", "").Trim());
  93. product.Author = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'specs__party')]").FirstOrDefault()?.InnerText;
  94. product.Language = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'product-small-specs--large')]").FirstOrDefault()?.ChildNodes[1]?.InnerText.Replace(@"\n", "").Trim();
  95. product.Version = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'product-small-specs--large')]").FirstOrDefault()?.ChildNodes[3]?.InnerText.Replace(@"\n", "").Trim();
  96.  
  97. var specifications = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'specs__list')]");
  98.  
  99. product.ReleasedOn = productPage.DocumentNode.SelectNodes(".//*[contains(@class,'product-small-specs--large')]").FirstOrDefault()?.ChildNodes[7]?.InnerText.Replace(@"\n", "").Trim();
  100.  
  101. product.PageCount = 0;
  102.  
  103. double ratingCount1Temp;
  104. product.ratingCount1 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[9]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount1Temp) ? ratingCount1Temp : 0.0;
  105. double ratingCount2Temp;
  106. product.ratingCount2 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[7]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount2Temp) ? ratingCount2Temp : 0.0;
  107. double ratingCount3Temp;
  108. product.ratingCount3 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[5]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount3Temp) ? ratingCount3Temp : 0.0;
  109. double ratingCount4Temp;
  110. product.ratingCount4 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[3]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount4Temp) ? ratingCount4Temp : 0.0;
  111. double ratingCount5Temp;
  112. product.ratingCount5 = double.TryParse(productPage.DocumentNode.SelectSingleNode(".//ul[contains(@class,'review-summary__ratings')]")?.ChildNodes[1]?.InnerText.Replace(@"\n", "").Replace("beoordelingen", "").Trim(), out ratingCount5Temp) ? ratingCount5Temp : 0.0;
  113.  
  114.  
  115.  
  116. productList.Add(product);
  117. i++;
  118. Thread.Sleep(300);
  119. }
  120. }
  121. }
  122. }
  123. return productList;
  124. }
  125.  
  126. private HtmlAgilityPack.HtmlDocument getPageHtml(string pageLink)
  127. {
  128. HttpWebRequest request = (HttpWebRequest)WebRequest.Create(pageLink);
  129. HttpWebResponse response = (HttpWebResponse)request.GetResponse();
  130. StreamReader sr = new StreamReader(response.GetResponseStream());
  131. HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
  132. doc.Load(sr);
  133. return doc;
  134. }
  135.  
  136. }
  137. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement