Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Net.Mail;
- using System.Net;
- using System.Configuration;
- using System.Threading.Tasks;
- using HtmlAgilityPack;
- namespace WebscrapeMailer
- {
- class Program
- {
- public static void Main()
- {
- // Store configurable values from App.config.
- string mailTo = ConfigurationManager.AppSettings["config.MailTo"];
- string mailFrom = ConfigurationManager.AppSettings["config.MailFrom"];
- string mailSubject = ConfigurationManager.AppSettings["config.MailSubject"];
- string mailServer = ConfigurationManager.AppSettings["config.MailServer"];
- //Create a dictionary which is like a dictionary in the real world.. except that instead of
- //you looking up a word and then its associated definitions, you can store any lookup value, and associate it with anything you want
- //"word" = webURL, "definition" = URLnode.
- Dictionary<string, string> webURLnodePairs = new Dictionary<string, string>();
- webURLnodePairs.Add(ConfigurationManager.AppSettings["config.webURL1"], ConfigurationManager.AppSettings["config.URL1Node"]);
- webURLnodePairs.Add(ConfigurationManager.AppSettings["config.webURL2"], ConfigurationManager.AppSettings["config.URL2Node"]);
- webURLnodePairs.Add(ConfigurationManager.AppSettings["config.webURL3"], ConfigurationManager.AppSettings["config.URL3Node"]);
- //Now imagine being able to spawn 3 separate requests for each screen scrape that execute at roughly the same time
- //It should be 3x as fast!
- var task = GetAllScrapes(webURLnodePairs); // Im calling an asynchronous method and telling it to wait
- task.Wait();
- List<string> scrapeResults = task.Result; //grab the result from the async call
- SendMail(scrapeResults, mailTo, mailFrom, mailSubject, mailServer);
- }
- static async Task<List<string>> GetAllScrapes(Dictionary<string, string> webURLnodePairs)
- {
- // This is a neat feature.. create a task that waits for all the taks you spawn off using
- //the select method and lambda expression to pick out the url and urlnodes from the dictionary.
- var results = await Task.WhenAll(webURLnodePairs.Select((url, index) => Scrape(url.Key, url.Value)));
- return results.ToList();
- }
- static async Task<string> Scrape(string url, string urlnode)
- {
- //each scrape is on its own thread
- WebClient client = new WebClient();
- //this is relatively unchanged from your code
- string html = client.DownloadString(url);
- HtmlAgilityPack.HtmlDocument doc3 = new HtmlAgilityPack.HtmlDocument();
- doc3.LoadHtml(html);
- HtmlNode node = doc3.DocumentNode.SelectSingleNode(urlnode);
- return node.InnerText;
- }
- static async void SendMail(List<string> scrapeValues, string mailTo, string mailFrom, string mailSubject, string mailServer)
- {
- //sending mail is also on its own thread..
- MailAddress to = new MailAddress(mailTo);
- MailAddress from = new MailAddress(mailFrom);
- MailMessage mail = new MailMessage(from, to);
- mail.Subject = mailSubject;
- mail.Body = "Scrapes: " + Environment.NewLine;
- foreach (string scrape in scrapeValues)
- {
- mail.Body += "Start of new scrape: ";
- mail.Body += scrape;
- mail.Body += Environment.NewLine;
- }
- // Create the smtp object.
- SmtpClient smtp = new SmtpClient();
- // Get relay details, then send.
- smtp.Host = mailServer;
- smtp.Port = 25;
- smtp.Send(mail);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement