Advertisement
Guest User

Untitled

a guest
Feb 28th, 2020
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.17 KB | None | 0 0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Net;
  5. using System.Text;
  6. using System.Xml;
  7. using System.Net.Http;
  8. using System.Net.Http.Headers;
  9.  
  10. namespace icecat_scapper.SolrData
  11. {
  12. class Tester
  13. {
  14. private static readonly HttpClient client = new HttpClient();
  15.  
  16. public static void Main(string[] args)
  17. {
  18. string xml;
  19. string brand_id = "728";
  20.  
  21. // Reads all text from xml file in project,
  22. // and loads it as an XmlDocument object type
  23. xml = File.ReadAllText(@"C:\Users\nas\source\repos\IcecatScraper\files.index.xml");
  24. XmlDocument xmlDoc = new XmlDocument();
  25. xmlDoc.LoadXml(xml);
  26.  
  27. // Creates a list of all the nodes in the XmlDocument named "<file>"
  28. XmlNodeList nodeList =
  29. (xmlDoc.SelectNodes("ICECAT-interface/files.index/file"));
  30. string product_id;
  31. string supplier_id;
  32. int onMarket;
  33.  
  34. int productCount = 1;
  35.  
  36. foreach (XmlNode elem in nodeList)
  37. {
  38. supplier_id = elem.Attributes["Supplier_id"].Value;
  39. onMarket = Convert.ToInt32(elem.Attributes["On_Market"].Value);
  40. if (supplier_id.Equals(brand_id) && onMarket == 1)
  41. {
  42. product_id = elem.Attributes["Product_ID"].Value;
  43.  
  44. if (Int32.TryParse(product_id, out int x))
  45. {
  46. Product p = ConvertProduct(x);
  47.  
  48. /* string solrFields = "{\""id":"30408763",
  49. "name_s":"H50-50",
  50. "brand_s":"Lenovo",
  51. "imageURL_s":"http://images.icecat.biz/img/gallery_mediums/30408763_5687.jpg",
  52. "ShortSummaryDescription_txt":["Lenovo IdeaCentre H50-50, 3.2 GHz, 4th gen Intel® Core™ i5, 4 GB, 1000 GB, DVD±RW, Windows 8.1"],
  53. "LongSummaryDescription_txt":["Lenovo IdeaCentre H50-50. Processor frequency: 3.2 GHz, Processor family: 4th gen Intel® Core™ i5, Processor model: i5-4460. Internal memory: 4 GB, Internal memory type: DDR3-SDRAM, Memory clock speed: 1600 MHz. Total storage capacity: 1000 GB, Storage media: HDD. Optical drive type: DVD±RW. On-board graphics adapter model: Intel® HD Graphics 4600. Operating system installed: Windows 8.1, Operating system architecture: 64-bit. Chassis type: Tower. Product type: PC. Product colour: Black"],
  54. "Category_s":"pc}";
  55. */
  56. string solrFieldsXML = "<add>" + "\n" +
  57. "<doc>" + "\n" +
  58. "<field name=\"id\">" + p.ID + "</field>" + "\n" +
  59. "<field name=\"name_s\">" + p.Name + "</field>" + "\n" +
  60. "<field name=\"title_s\">" + p.Title + "</field>" + "\n" +
  61. "<field name=\"imageURL_s\">" + p.ImageURL + "</field>" + "\n" +
  62. "<field name=\"ShortSummaryDescription_txt\">" + p.ShortSummary + "</field>" + "\n" +
  63. "<field name=\"LongSummaryDescription_txt\">" + p.LongSummary + "</field>" + "\n" +
  64. "<field name=\"LongSummaryDescription_txt\">" + p.LongSummary + "</field>" + "\n" +
  65. "<field name=\"price_d\">" + p.Price + "</field>" + "\n" +
  66. "<field name=\"inStock_i\">" + p.Stock + "</field>" + "\n" +
  67. "<field name=\"category_s\">" + p.Category + "</field>" + "\n" +
  68. "</doc>" + "\n" +
  69. "</add>";
  70.  
  71. System.IO.File.WriteAllText(@"C:\Users\nas\Documents\solr-8.4.1\solr-8.4.1\example\exampledocs\product_" + p.ID +".xml", solrFieldsXML);
  72. // PostToSolr("<add commitWithin=\"2000\">" + solrFieldsXML + "</add>");
  73. Console.WriteLine(productCount +" added : " + p.Name);
  74. productCount++;
  75. // Console.WriteLine(p.Name);
  76. }
  77. }
  78. }
  79.  
  80.  
  81. }
  82.  
  83.  
  84. public static Product ConvertProduct(int index)
  85. {
  86. string xml = GetXmlFromIndex(index);
  87. XmlDocument xmlDoc = new XmlDocument();
  88. xmlDoc.LoadXml(xml);
  89.  
  90. string iD = xmlDoc.SelectSingleNode("ICECAT-interface/Product/@ID").Value;
  91. string name = xmlDoc.SelectSingleNode("ICECAT-interface/Product/@Name").Value;
  92. string title = xmlDoc.SelectSingleNode("ICECAT-interface/Product/@Title").Value;
  93. string imageUrl = xmlDoc.SelectSingleNode("ICECAT-interface/Product/@Pic500x500").Value;
  94. string shortSummary = xmlDoc.SelectSingleNode("ICECAT-interface/Product/SummaryDescription/ShortSummaryDescription").InnerText;
  95. string longSummary = xmlDoc.SelectSingleNode("ICECAT-interface/Product/SummaryDescription/LongSummaryDescription").InnerText;
  96. double price = Math.Round(new Random().NextDouble() * (1000.0 - 50.0) + 50.0, 2);
  97. int stock = new Random().Next(10, 100);
  98. string cat = xmlDoc.SelectSingleNode("ICECAT-interface/Product/Category/@ID").Value;
  99.  
  100. string longDescription;
  101. string shortDescription;
  102. try
  103. {
  104. longDescription = xmlDoc.SelectSingleNode("ICECAT-interface/Product/ProductDescription/@LongDesc").Value;
  105. shortDescription = xmlDoc.SelectSingleNode("ICECAT-interface/Product/ProductDescription/@ShortDesc").Value;
  106. }
  107. catch (Exception)
  108. {
  109. longDescription = null;
  110. shortDescription = null;
  111. }
  112.  
  113.  
  114.  
  115.  
  116. Product product = new Product(iD, name, title, imageUrl, longDescription, shortDescription, shortSummary, longSummary, price, stock, cat);
  117.  
  118. return product;
  119. }
  120.  
  121. private static string GetXmlFromIndex(int index)
  122. {
  123. string url = @"https://data.Icecat.biz/export/freexml.int/en/";
  124. byte[] unconvertedByteArray = DownloadIcecatFile(index.ToString() + ".xml", url);
  125. string xml = Encoding.UTF8.GetString(unconvertedByteArray);
  126. return xml;
  127. }
  128.  
  129. private static byte[] DownloadIcecatFile(string index, string url)
  130. {
  131. Byte[] fileData;
  132. int count = 0;
  133. int maxTries = 1000000;
  134.  
  135. string strDownloadURL = url + index;
  136. string strUser = "alphaslo";
  137. string strPWD = "KJ6j1c9y8c2YwMq8GTjc";
  138.  
  139. // Creating an instance of a WebClient
  140. WebClient req = new WebClient();
  141.  
  142. // Creating an instance of a credential cache,
  143. // and passing the username and password to it
  144. CredentialCache cache = new CredentialCache();
  145. cache.Add(new Uri(strDownloadURL), "basic", new NetworkCredential(strUser, strPWD));
  146. req.Credentials = cache;
  147.  
  148. // Some times the program times out when trying to get xml as bytes,
  149. // so we let it try multiple tries. Up to 10 retries
  150. while (true)
  151. {
  152.  
  153. try
  154. {
  155. fileData = req.DownloadData(strDownloadURL);
  156. break;
  157. }
  158. catch (WebException ex)
  159. {
  160. if (++count == maxTries) throw ex;
  161. }
  162.  
  163.  
  164. }
  165.  
  166. return fileData;
  167. }
  168.  
  169. /*
  170. */
  171.  
  172.  
  173.  
  174. public static async void PostToSolr(string data)
  175. {
  176. /*
  177. var content = new StringContent(data, Encoding.UTF8, "application/xml");
  178. content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/xml");
  179. var response = await client.PostAsync("http://localhost:8983/solr/IcecatProducts/update", content);
  180. string responseMsg = await response.Content.ReadAsStringAsync();
  181. */
  182.  
  183. HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://localhost:8983/solr/IcecatProducts/update");
  184. byte[] bytes;
  185. bytes = System.Text.Encoding.ASCII.GetBytes(data);
  186. request.ContentType = "text/xml; encoding='utf-8'";
  187. client.DefaultRequestHeaders.Accept.Clear();
  188. client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/xml"));
  189. request.ContentLength = bytes.Length;
  190. request.Method = "POST";
  191. Stream requestStream = request.GetRequestStream();
  192. requestStream.Write(bytes, 0, bytes.Length);
  193. requestStream.Close();
  194. }
  195.  
  196. }
  197.  
  198.  
  199.  
  200. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement