Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 7th, 2012  |  syntax: None  |  size: 1.45 KB  |  hits: 14  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Web crawler time out - C#
  2. private void button1_Click(object sender, System.EventArgs e)
  3.     {
  4.  
  5.         string url = textBox1.Text;
  6.         HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
  7.         HttpWebResponse response = (HttpWebResponse)request.GetResponse();
  8.         StreamReader sr = new StreamReader(response.GetResponseStream());
  9.         string code = sr.ReadToEnd();
  10.         string re = "href="(.*?)"";
  11.         MatchCollection href = Regex.Matches(code, @re, RegexOptions.Singleline);
  12.         foreach (Match h in href)
  13.         {
  14.  
  15.             string link = h.Groups[1].Value;
  16.             if (!link.Contains("http://"))
  17.             {
  18.                 HttpWebRequest request2 = (HttpWebRequest)WebRequest.Create(url + link);
  19.                 HttpWebResponse response2 = (HttpWebResponse)request2.GetResponse();
  20.                 StreamReader sr2 = new StreamReader(response.GetResponseStream());
  21.                 string innerlink = sr.ReadToEnd();
  22.  
  23.  
  24.                 MatchCollection m2 = Regex.Matches(code, @"([w-]+(.[w-]+)*@([a-z0-9-]+(.[a-z0-9-]+)*?.[a-z]{2,6}|(d{1,3}.){3}d{1,3})(:d{4})?)", RegexOptions.Singleline);
  25.  
  26.  
  27.                 foreach (Match m in m2)
  28.                 {
  29.                     string email = m.Groups[1].Value;
  30.  
  31.                     if (!listBox1.Items.Contains(email))
  32.                     {
  33.                         listBox1.Items.Add(email);
  34.                     }
  35.                 }
  36.             }
  37.         }
  38.  
  39.          sr.Close();
  40.         }