Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //Input: (Read the input parameters from a configuration file.)
- //
- // 1.Path to the folder with XML files to be processed.
- // 2.XPath expression pointing to a node in XML.
- // 3.Number of concurrent threads.
- //
- //Output: Sorted list of all different values that this node has with number of occurrences of each value, sorted by number of //occurrences, descending. If a node is missing in a certain XML it should be considered as having "N/A" value.
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- using System.Xml;
- using System.Xml.Linq;
- using System.Xml.XPath;
- using System.Configuration;
- using System.Collections.Specialized;
- namespace AnotherTry
- {
- class Program
- {
- static void Main(string[] args)
- {
- NameValueCollection settings = ConfigurationManager.GetSection("AnotherXML.appSettings") as NameValueCollection;
- string folder1 = @"C:\XML";
- string folder2 = @"D:\Downloads\XML";
- string folder3 = @"C:\XMLAnother";
- string folder = settings["folder"];
- string xpath1 = "docID";
- string xpath2 = "sort/@s_c";
- string xpath3 = "book";
- string xpath4 = "protein/name";
- string xpath5 = "industrySet";
- string xpath = settings["xpath"];
- int threadNum = Convert.ToInt32(settings["threads"]);
- string[] xmlFiles = Directory.GetFiles(folder, "*.xml");
- Dictionary<string, int> dict = new Dictionary<string, int>();
- foreach (var path in xmlFiles)
- {
- bool isFound = false;
- bool isFoundInFile = false;
- long fileSize = new FileInfo(path).Length;
- XmlReaderSettings xmlSettings = new XmlReaderSettings();
- xmlSettings.DtdProcessing = DtdProcessing.Parse;
- using (XmlReader reader = XmlReader.Create(path, xmlSettings))
- {
- if (fileSize > 10000000)
- {
- reader.ReadStartElement();
- }
- //reader.MoveToContent();
- // Parse the file and return each of the nodes.
- while (!reader.EOF)
- {
- if (reader.NodeType == XmlNodeType.Element || reader.NodeType == XmlNodeType.Attribute)
- {
- XElement el = XElement.ReadFrom(reader) as XElement;
- RemoveNamespaces(el);
- var elements = (IEnumerable<object>)el.XPathEvaluate(xpath);
- foreach (var e in elements)
- {
- string key = string.Empty;
- if (e is XAttribute)
- {
- key = ((XAttribute)e).Value;
- //Console.WriteLine(key);
- isFound = true;
- isFoundInFile = true;
- }
- if (e is XElement)
- {
- key = ((XElement)e).Value;
- //Console.WriteLine(key);
- isFound = true;
- isFoundInFile = true;
- }
- if (isFound)
- {
- if (dict.ContainsKey(key))
- {
- dict[key]++;
- }
- else
- {
- dict.Add(key, 1);
- }
- }
- isFound = false;
- }
- }
- else
- reader.Read();
- }
- }
- var nakey = "N/A";
- if (!isFoundInFile)
- {
- if (dict.ContainsKey(nakey))
- {
- dict[nakey]++;
- }
- else
- {
- dict.Add(nakey, 1);
- }
- }
- }
- var ordered = dict.OrderByDescending(x => x.Value);
- Console.WriteLine();
- PrintEnumerable(ordered);
- //PrintInFileEnumerable(ordered);
- Console.WriteLine();
- Console.WriteLine("Закончили");
- Console.ReadLine();
- }
- private static void RemoveNamespaces(XElement document)
- {
- var elements = document.Descendants();
- elements.Attributes().Where(a => a.IsNamespaceDeclaration).Remove();
- foreach (var element in elements)
- {
- element.Name = element.Name.LocalName;
- var strippedAttributes =
- from originalAttribute in element.Attributes().ToArray()
- select (object)new XAttribute(originalAttribute.Name.LocalName, originalAttribute.Value);
- //Note that this also strips the attributes' line number information
- element.ReplaceAttributes(strippedAttributes.ToArray());
- }
- }
- private static void PrintDictionary(Dictionary<string, int> dict)
- {
- foreach (var kvp in dict)
- {
- Console.WriteLine($"Key: {kvp.Key}, value: {kvp.Value}");
- }
- }
- private static void PrintEnumerable(IOrderedEnumerable<KeyValuePair<string, int>> keyValuePairs)
- {
- foreach (var kvp in keyValuePairs)
- {
- Console.WriteLine($"{kvp.Key}, {kvp.Value}");
- }
- }
- private static void PrintInFileEnumerable(IOrderedEnumerable<KeyValuePair<string, int>> keyValuePairs)
- {
- using (StreamWriter sw = new StreamWriter(@"D:\Downloads\info.txt", false, Encoding.Default))
- {
- foreach (var kvp in keyValuePairs)
- {
- sw.WriteLine($"{kvp.Key}, {kvp.Value}");
- }
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement