Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // SalvageFromIcyVeins.cs "$Revision: 2505 $" "$Date: 2019-08-17 11:03:04 +0300 (la, 17 elo 2019) $"
- // https://www.nuget.org/packages/HtmlAgilityPack/
- using HtmlAgilityPack;
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Xml.XPath;
- namespace LogDataCollector
- {
- public enum HeroClass : int
- {
- DemonHunter = 0,
- Barbarian = 1,
- Wizard = 2,
- WitchDoctor = 3,
- Monk = 4,
- Crusader = 5,
- Necromancer = 6,
- Follower = 7, // For our convenience only.
- Total = 8,
- None = 9, // Change Follower -> None for final output.
- }
- public class SalvageFromIcyVeins
- {
- private class Build
- {
- public readonly HeroClass HeroClass;
- public readonly List<string> BuildNames;
- public Build(HeroClass heroClass, string buildName)
- {
- HeroClass = heroClass;
- BuildNames = new List<string>() { buildName };
- }
- }
- private const string html = "https://www.icy-veins.com/d3/legendary-item-salvage-guide";
- public bool SkipLoN = false; // You can skip LoN build in the UI as well so include them by default.
- public bool FileTest = false;
- public HashSet<HeroClass> HeroClasses = new HashSet<HeroClass>()
- {
- HeroClass.DemonHunter,
- HeroClass.Barbarian,
- HeroClass.Wizard,
- HeroClass.WitchDoctor,
- HeroClass.Monk,
- HeroClass.Crusader,
- HeroClass.Necromancer,
- HeroClass.Follower,
- };
- // Replace with Console.WriteLine for simple work around to write somewhere or {} to disable.
- private void WriteLine(object value) { OUT.WriteLine(value.ToString()); }
- private void WriteLine(string value = null) { OUT.WriteLine(value); }
- private void WriteLine(string format, params object[] arg) { OUT.WriteLine(string.Format(format, arg)); }
- public void LoadData(string _dataDir)
- {
- var filename = Path.Combine(_dataDir, "salvage-guide.html");
- var cacheDir = Path.Combine(_dataDir, "htmlCache");
- var salvageItemsDataFile = Path.Combine(_dataDir, "SalvageItemsData.cs");
- if (!Directory.Exists(cacheDir))
- {
- Directory.CreateDirectory(cacheDir);
- }
- var htmlDoc = (HtmlDocument)null;
- if (FileTest && File.Exists(filename))
- {
- this.WriteLine("Load HTML from: {0}", html);
- htmlDoc = new HtmlDocument();
- htmlDoc.Load(filename);
- }
- else
- {
- // Must create in this order.
- var htmlWeb = new HtmlWeb();
- htmlWeb.CachePath = cacheDir;
- htmlWeb.UsingCache = true;
- this.WriteLine("Load HTML from: {0}", html);
- htmlDoc = htmlWeb.Load(html);
- this.WriteLine("Load HTML took: {0} ms", htmlWeb.RequestDuration);
- }
- var salvageTable = XPathExpression.Compile("//table[@class='salvage_table']");
- var table = htmlDoc.DocumentNode.SelectSingleNode(salvageTable);
- if (table == null)
- {
- this.WriteLine("TABLE not found in HTML document, expr={0}", salvageTable.Expression);
- return;
- }
- try
- {
- Dictionary<string, List<Build>> itemBuilds = new Dictionary<string, List<Build>>();
- parseTable(table, itemBuilds);
- printItemsAndBuilds(itemBuilds, salvageItemsDataFile);
- }
- catch (Exception x)
- {
- this.WriteLine(x);
- if (Debugger.IsAttached) Debugger.Break();
- return;
- }
- }
- private List<ItemData> loadItemData()
- {
- return new List<ItemData>();
- }
- private void parseTable(HtmlNode table, Dictionary<string, List<Build>> itemBuilds)
- {
- var rowCount = 0;
- foreach (var row in table.SelectNodes("//tr"))
- {
- rowCount += 1;
- var elements = row.Elements("td");
- var columns = elements?.ToList();
- if (columns?.Count != 2)
- {
- if (rowCount > 1)
- {
- this.WriteLine("row{0,4} INVALID columns={1}", rowCount, columns?.Count);
- }
- continue;
- }
- // Find item name from link
- var links = columns[0].SelectNodes("./span/a")?.ToList();
- if (links?.Count != 1)
- {
- this.WriteLine("row{0,4} INVALID links={1}", rowCount, links?.Count);
- continue;
- }
- var itemName = links[0].InnerText;
- if (itemName.StartsWith("Hellfire ")) // Ignore Hellfire Amulet and Ring because they are so special and must be judged by the user!
- {
- this.WriteLine("row{0,4} {1} SKIPPED", rowCount, itemName);
- continue;
- }
- this.WriteLine("row{0,4} {1}", rowCount, itemName);
- // Find build names for unordered list (inside a link).
- var items = columns[1].SelectNodes("./ul/li")?.ToList();
- if (items?.Count == 0)
- {
- this.WriteLine("row{0,4} INVALID items={1}", rowCount, items?.Count);
- continue;
- }
- if (!itemBuilds.TryGetValue(itemName, out var buildList))
- {
- buildList = new List<Build>();
- itemBuilds.Add(itemName, buildList);
- }
- foreach (var item in items)
- {
- var buildNameText = item.InnerText.Trim(); // Trim to be sure.
- while (buildNameText.Contains(" ")) // InnerText can return double spaces which we must fix for parser.
- {
- buildNameText = buildNameText.Replace(" ", " ");
- }
- if (SkipLoN && buildNameText.StartsWith("LoN ")) // Filter LoN builds
- continue;
- if (buildNameText.EndsWith("outdated")) // outdated!
- continue;
- if (buildNameText.EndsWith(" (Cube)")) // Only for Cube!
- continue;
- if (buildNameText.Contains("Dungeon Guide")) // Set Dungeon Guide skipped!
- continue;
- if (buildNameText.Contains("The Thrill")) // The Thrill Conquest Build!
- continue;
- var tuple = parseBuild(buildNameText);
- if (tuple != null)
- {
- var heroClass = tuple.Item1;
- if (!HeroClasses.Contains(heroClass)) // Filter builds by class.
- continue;
- var buildName = tuple.Item2;
- this.WriteLine(" {0,-11} {1}", heroClass, buildName);
- var build = buildList.FirstOrDefault(x => x.HeroClass == heroClass);
- if (build == null)
- {
- build = new Build(heroClass, buildName);
- buildList.Add(build);
- }
- else
- {
- build.BuildNames.Add(buildName);
- }
- }
- }
- }
- this.WriteLine("{0} rows processed", rowCount);
- }
- private void printItemsAndBuilds(Dictionary<string, List<Build>> itemBuilds, string salvageItemsDataFile)
- {
- // HeroClass -> build name -> item name
- var counters = new int[(int)HeroClass.Total + 1];
- var totalIndex = (int)HeroClass.Total;
- var builder = new StringBuilder().AppendLine();
- var keys = itemBuilds.Keys.ToList();
- keys.Sort();
- builder
- .Append(" public sealed class SalvageItemsData").AppendLine()
- .Append(" {").AppendLine()
- .Append(" public readonly Dictionary<string, Build[]> HeroBuilds = new Dictionary<string, Build[]>()").AppendLine()
- .Append(" {").AppendLine();
- foreach (var itemName in keys)
- {
- var builds = itemBuilds[itemName];
- if (builds.Count == 0)
- {
- continue;
- }
- // Item name.
- builder
- .Append(" { ")
- .AppendFormat("\"{0}\", new Build[] ", itemName)
- .Append("{")
- .AppendLine();
- counters[totalIndex] += 1;
- builds.Sort((a, b) => a.HeroClass.CompareTo(b.HeroClass));
- foreach (var build in builds)
- {
- // Hero class, convert Follower to None.
- var heroClass = build.HeroClass == HeroClass.Follower ? HeroClass.None : build.HeroClass;
- builder
- .AppendFormat(" new Build(HeroClass.{0},", heroClass.ToString())
- .AppendLine();
- // Build names - sort and loop.
- build.BuildNames.Sort();
- var heroIndex = (int)build.HeroClass;
- var lastIndex = build.BuildNames.Count - 1;
- for (var i = 0; i < build.BuildNames.Count; ++i)
- {
- var buildName = build.BuildNames[i];
- builder
- .AppendFormat(" \"{0}\"", buildName);
- if (i < lastIndex)
- {
- builder
- .Append(",")
- .AppendLine();
- }
- else
- {
- builder
- .Append("),")
- .AppendLine();
- }
- counters[heroIndex] += 1;
- }
- }
- builder
- .Append(" }},").AppendLine();
- }
- builder
- .Append(" };").AppendLine()
- .Append(" }").AppendLine();
- var result = builder.ToString();
- File.WriteAllText(salvageItemsDataFile, result);
- this.WriteLine(result);
- this.WriteLine("{0} items processed", keys.Count);
- builder
- .Clear().AppendLine();
- for (int i = 0; i < counters.Length; ++i)
- {
- var heroClass = (HeroClass)i;
- builder
- .AppendFormat("{0,-11}", heroClass.ToString())
- .AppendFormat("{0,5}", counters[i])
- .AppendLine();
- }
- builder
- .AppendFormat("{0,-11}", "Skipped")
- .AppendFormat("{0,5}", keys.Count - counters[totalIndex])
- .AppendLine();
- this.WriteLine(builder.ToString());
- }
- private static Tuple<HeroClass, string> parseBuild(string buildNameText)
- {
- string[] tokens = parseAsTokens(buildNameText);
- if (!Enum.TryParse(tokens[1], out HeroClass heroClass))
- throw new InvalidOperationException(tokens[1]);
- var name = tokens[0];
- if (name == "Support")
- {
- name += " " + tokens[1]; // Support build is class specific!
- }
- var spec = tokens[2];
- spec = spec.Replace(" + ", "+").Replace(" variation)", ")").Replace(" variations)", ")");
- return new Tuple<HeroClass, string>(heroClass, name + " " + spec);
- }
- private static string[] parseAsTokens(string buildName)
- {
- var pos2 = buildName.IndexOf(" (");
- if (pos2 == -1)
- throw new InvalidOperationException(buildName);
- var pos1 = buildName.IndexOf(" Demon Hunter (");
- if (pos1 == -1)
- pos1 = buildName.IndexOf(" Barbarian (");
- if (pos1 == -1)
- pos1 = buildName.IndexOf(" Wizard (");
- if (pos1 == -1)
- pos1 = buildName.IndexOf(" Witch Doctor (");
- if (pos1 == -1)
- pos1 = buildName.IndexOf(" Monk (");
- if (pos1 == -1)
- pos1 = buildName.IndexOf(" Crusader (");
- if (pos1 == -1)
- pos1 = buildName.IndexOf(" Necromancer (");
- if (pos1 == -1 && buildName.Contains("Follower Guide ("))
- {
- return new string[] {
- "Follower",
- HeroClass.Follower.ToString(),
- buildName.Substring(pos2 + 1),
- };
- }
- if (pos1 == -1)
- throw new InvalidOperationException(buildName);
- return new string[] {
- buildName.Substring(0, pos1),
- buildName.Substring(pos1 + 1, pos2 - pos1 - 1).Replace(" ", ""),
- buildName.Substring(pos2 + 1),
- };
- }
- private static string parseBaseName(string buildName)
- {
- var pos = buildName.IndexOf(" (");
- if (pos == -1)
- throw new InvalidOperationException(buildName);
- return buildName.Substring(0, pos);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement