Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Net;
- using System.Text;
- using System.Xml;
- using System.Net.Http;
- using System.Net.Http.Headers;
- namespace icecat_scapper.SolrData
- {
- class Tester
- {
- private static readonly HttpClient client = new HttpClient();
- public static void Main(string[] args)
- {
- string xml;
- string brand_id = "728";
- // Reads all text from xml file in project,
- // and loads it as an XmlDocument object type
- xml = File.ReadAllText(@"C:\Users\nas\source\repos\IcecatScraper\files.index.xml");
- XmlDocument xmlDoc = new XmlDocument();
- xmlDoc.LoadXml(xml);
- // Creates a list of all the nodes in the XmlDocument named "<file>"
- XmlNodeList nodeList =
- (xmlDoc.SelectNodes("ICECAT-interface/files.index/file"));
- string product_id;
- string supplier_id;
- int onMarket;
- int productCount = 1;
- foreach (XmlNode elem in nodeList)
- {
- supplier_id = elem.Attributes["Supplier_id"].Value;
- onMarket = Convert.ToInt32(elem.Attributes["On_Market"].Value);
- if (supplier_id.Equals(brand_id) && onMarket == 1)
- {
- product_id = elem.Attributes["Product_ID"].Value;
- if (Int32.TryParse(product_id, out int x))
- {
- Product p = ConvertProduct(x);
- /* string solrFields = "{\""id":"30408763",
- "name_s":"H50-50",
- "brand_s":"Lenovo",
- "imageURL_s":"http://images.icecat.biz/img/gallery_mediums/30408763_5687.jpg",
- "ShortSummaryDescription_txt":["Lenovo IdeaCentre H50-50, 3.2 GHz, 4th gen Intel® Core™ i5, 4 GB, 1000 GB, DVD±RW, Windows 8.1"],
- "LongSummaryDescription_txt":["Lenovo IdeaCentre H50-50. Processor frequency: 3.2 GHz, Processor family: 4th gen Intel® Core™ i5, Processor model: i5-4460. Internal memory: 4 GB, Internal memory type: DDR3-SDRAM, Memory clock speed: 1600 MHz. Total storage capacity: 1000 GB, Storage media: HDD. Optical drive type: DVD±RW. On-board graphics adapter model: Intel® HD Graphics 4600. Operating system installed: Windows 8.1, Operating system architecture: 64-bit. Chassis type: Tower. Product type: PC. Product colour: Black"],
- "Category_s":"pc}";
- */
- string solrFieldsXML = "<add>" + "\n" +
- "<doc>" + "\n" +
- "<field name=\"id\">" + p.ID + "</field>" + "\n" +
- "<field name=\"name_s\">" + p.Name + "</field>" + "\n" +
- "<field name=\"title_s\">" + p.Title + "</field>" + "\n" +
- "<field name=\"imageURL_s\">" + p.ImageURL + "</field>" + "\n" +
- "<field name=\"ShortSummaryDescription_txt\">" + p.ShortSummary + "</field>" + "\n" +
- "<field name=\"LongSummaryDescription_txt\">" + p.LongSummary + "</field>" + "\n" +
- "<field name=\"LongSummaryDescription_txt\">" + p.LongSummary + "</field>" + "\n" +
- "<field name=\"price_d\">" + p.Price + "</field>" + "\n" +
- "<field name=\"inStock_i\">" + p.Stock + "</field>" + "\n" +
- "<field name=\"category_s\">" + p.Category + "</field>" + "\n" +
- "</doc>" + "\n" +
- "</add>";
- System.IO.File.WriteAllText(@"C:\Users\nas\Documents\solr-8.4.1\solr-8.4.1\example\exampledocs\product_" + p.ID +".xml", solrFieldsXML);
- // PostToSolr("<add commitWithin=\"2000\">" + solrFieldsXML + "</add>");
- Console.WriteLine(productCount +" added : " + p.Name);
- productCount++;
- // Console.WriteLine(p.Name);
- }
- }
- }
- }
- public static Product ConvertProduct(int index)
- {
- string xml = GetXmlFromIndex(index);
- XmlDocument xmlDoc = new XmlDocument();
- xmlDoc.LoadXml(xml);
- string iD = xmlDoc.SelectSingleNode("ICECAT-interface/Product/@ID").Value;
- string name = xmlDoc.SelectSingleNode("ICECAT-interface/Product/@Name").Value;
- string title = xmlDoc.SelectSingleNode("ICECAT-interface/Product/@Title").Value;
- string imageUrl = xmlDoc.SelectSingleNode("ICECAT-interface/Product/@Pic500x500").Value;
- string shortSummary = xmlDoc.SelectSingleNode("ICECAT-interface/Product/SummaryDescription/ShortSummaryDescription").InnerText;
- string longSummary = xmlDoc.SelectSingleNode("ICECAT-interface/Product/SummaryDescription/LongSummaryDescription").InnerText;
- double price = Math.Round(new Random().NextDouble() * (1000.0 - 50.0) + 50.0, 2);
- int stock = new Random().Next(10, 100);
- string cat = xmlDoc.SelectSingleNode("ICECAT-interface/Product/Category/@ID").Value;
- string longDescription;
- string shortDescription;
- try
- {
- longDescription = xmlDoc.SelectSingleNode("ICECAT-interface/Product/ProductDescription/@LongDesc").Value;
- shortDescription = xmlDoc.SelectSingleNode("ICECAT-interface/Product/ProductDescription/@ShortDesc").Value;
- }
- catch (Exception)
- {
- longDescription = null;
- shortDescription = null;
- }
- Product product = new Product(iD, name, title, imageUrl, longDescription, shortDescription, shortSummary, longSummary, price, stock, cat);
- return product;
- }
- private static string GetXmlFromIndex(int index)
- {
- string url = @"https://data.Icecat.biz/export/freexml.int/en/";
- byte[] unconvertedByteArray = DownloadIcecatFile(index.ToString() + ".xml", url);
- string xml = Encoding.UTF8.GetString(unconvertedByteArray);
- return xml;
- }
- private static byte[] DownloadIcecatFile(string index, string url)
- {
- Byte[] fileData;
- int count = 0;
- int maxTries = 1000000;
- string strDownloadURL = url + index;
- string strUser = "alphaslo";
- string strPWD = "KJ6j1c9y8c2YwMq8GTjc";
- // Creating an instance of a WebClient
- WebClient req = new WebClient();
- // Creating an instance of a credential cache,
- // and passing the username and password to it
- CredentialCache cache = new CredentialCache();
- cache.Add(new Uri(strDownloadURL), "basic", new NetworkCredential(strUser, strPWD));
- req.Credentials = cache;
- // Some times the program times out when trying to get xml as bytes,
- // so we let it try multiple tries. Up to 10 retries
- while (true)
- {
- try
- {
- fileData = req.DownloadData(strDownloadURL);
- break;
- }
- catch (WebException ex)
- {
- if (++count == maxTries) throw ex;
- }
- }
- return fileData;
- }
- /*
- */
- public static async void PostToSolr(string data)
- {
- /*
- var content = new StringContent(data, Encoding.UTF8, "application/xml");
- content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/xml");
- var response = await client.PostAsync("http://localhost:8983/solr/IcecatProducts/update", content);
- string responseMsg = await response.Content.ReadAsStringAsync();
- */
- HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://localhost:8983/solr/IcecatProducts/update");
- byte[] bytes;
- bytes = System.Text.Encoding.ASCII.GetBytes(data);
- request.ContentType = "text/xml; encoding='utf-8'";
- client.DefaultRequestHeaders.Accept.Clear();
- client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/xml"));
- request.ContentLength = bytes.Length;
- request.Method = "POST";
- Stream requestStream = request.GetRequestStream();
- requestStream.Write(bytes, 0, bytes.Length);
- requestStream.Close();
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement