
Untitled
By: a guest on
May 7th, 2012 | syntax:
None | size: 1.45 KB | hits: 14 | expires: Never
Web crawler time out - C#
private void button1_Click(object sender, System.EventArgs e)
{
string url = textBox1.Text;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
StreamReader sr = new StreamReader(response.GetResponseStream());
string code = sr.ReadToEnd();
string re = "href="(.*?)"";
MatchCollection href = Regex.Matches(code, @re, RegexOptions.Singleline);
foreach (Match h in href)
{
string link = h.Groups[1].Value;
if (!link.Contains("http://"))
{
HttpWebRequest request2 = (HttpWebRequest)WebRequest.Create(url + link);
HttpWebResponse response2 = (HttpWebResponse)request2.GetResponse();
StreamReader sr2 = new StreamReader(response.GetResponseStream());
string innerlink = sr.ReadToEnd();
MatchCollection m2 = Regex.Matches(code, @"([w-]+(.[w-]+)*@([a-z0-9-]+(.[a-z0-9-]+)*?.[a-z]{2,6}|(d{1,3}.){3}d{1,3})(:d{4})?)", RegexOptions.Singleline);
foreach (Match m in m2)
{
string email = m.Groups[1].Value;
if (!listBox1.Items.Contains(email))
{
listBox1.Items.Add(email);
}
}
}
}
sr.Close();
}