Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using HtmlAgilityPack;
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Net;
- using System.Text.RegularExpressions;
- class Program
- {
- static void Main()
- {
- Regex r0 = new Regex("(?: |^)(\\p{Ll}+)(?:[ .,?!]|$)", RegexOptions.Compiled);
- IEnumerable<string> ieSsE0 = F0(r0);
- using (FileStream fs0 = File.Open("all_words.txt", FileMode.Create))
- using (StreamWriter sw0 = new StreamWriter(fs0))
- {
- foreach (string s0 in new SortedSet<string>(ieSsE0))
- {
- Console.WriteLine("Found {0}...", s0);
- sw0.WriteLine(s0);
- }
- }
- Console.WriteLine("Finished!");
- }
- static IEnumerable<string> F0(Regex r0)
- {
- Console.WriteLine("Scanning /list...");
- for (char c0 = 'a'; c0 <= 'z'; c0++)
- foreach (string s0 in F1(c0, r0))
- yield return s0;
- yield break;
- }
- static IEnumerable<string> F1(char c0, Regex r0)
- {
- Console.WriteLine("Scanning /list/{0}...", c0);
- for (int i0 = 1;; i0++)
- {
- IEnumerable<string> ieSsE0;
- try { ieSsE0 = F2(c0, i0, r0); }
- catch { yield break; }
- foreach (string s0 in ieSsE0)
- yield return s0;
- }
- }
- static IEnumerable<string> F2(char c0, int i0, Regex r0)
- {
- Console.WriteLine("Scanning /list/{0}/{1}...", c0, i0);
- HttpWebRequest hwr0 = WebRequest.CreateHttp("http://dictionary.reference.com/list/" + c0 + '/' + i0);
- hwr0.AllowAutoRedirect = false;
- using (HttpWebResponse hwr1 = (HttpWebResponse)hwr0.GetResponse())
- {
- if (hwr1.StatusCode == HttpStatusCode.MovedPermanently)
- throw new Exception();
- using (Stream s0 = hwr1.GetResponseStream())
- {
- HtmlDocument hd0 = new HtmlDocument();
- hd0.Load(s0);
- foreach (HtmlNode hn0 in hd0.DocumentNode.SelectNodes("/html/body/div[@class='content-container']/div[@class='words-list']/ul/li/span[@class='word']"))
- {
- if (r0.IsMatch(hn0.InnerText))
- yield return hn0.InnerText;
- foreach (string s1 in F3(hn0.ParentNode.SelectSingleNode("span[@class='definition-link']/a").Attributes["href"].Value, r0))
- yield return s1;
- }
- }
- }
- yield break;
- }
- static IEnumerable<string> F3(string s0, Regex r0)
- {
- Console.WriteLine("Scanning {0}...", new Uri(s0).AbsolutePath);
- using (WebResponse wr0 = WebRequest.Create(s0).GetResponse())
- using (Stream s1 = wr0.GetResponseStream())
- {
- HtmlDocument hd0 = new HtmlDocument();
- hd0.Load(s1);
- foreach (HtmlNode hn0 in hd0.DocumentNode.SelectNodes("//span"))
- foreach (Match m0 in r0.Matches(hn0.InnerText))
- yield return m0.Captures[0].Value;
- }
- yield break;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement