Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public class LinksHolder
- {
- private const int ErrorCode = -1;
- private const int noErrorCode = 0;
- private const int levelOutCode = 1;
- public string Title { get; set; }
- public string Url { get; set; }
- public List<LinksHolder> ListOfChildLinks { get; set; }
- public LinksHolder()
- {
- }
- public int Create(LinkItem _linkItem)
- {
- //if (level <= 0) return levelOutCode;
- if (_linkItem.ErrorString == "error: no_title" || _linkItem.Title == ""
- || _linkItem.ErrorString == "error: invalid_url" || _linkItem.Url == "")
- {
- return ErrorCode;
- }
- ListOfChildLinks = new List<LinksHolder>();
- this.Title = _linkItem.Title;
- this.Url = _linkItem.Url;
- foreach (string link in _linkItem.ListOfUrl)
- {
- LinksHolder tempLinksHolderInstance = new LinksHolder();
- //int code = tempLinksHolderInstance.Create(HtmlParser.Parse(link), level - 1);
- //if (code == noErrorCode)
- // ListOfChildLinks.Add(tempLinksHolderInstance);
- //else if (code == levelOutCode)
- //{
- // return levelOutCode;
- //}
- tempLinksHolderInstance = Add1(link);
- if (tempLinksHolderInstance != null)
- ListOfChildLinks.Add(this.Add1(link));
- }
- return noErrorCode;
- }
- public LinksHolder Add1(string _url)
- {
- LinksHolder tempLinksHolderInstance = new LinksHolder();
- LinkItem linkItemInstance = HtmlParser.Parse(_url);
- if (linkItemInstance.ErrorString == "error: no_title" || linkItemInstance.Title == ""
- || linkItemInstance.ErrorString == "error: invalid_url" || linkItemInstance.Url == "")
- {
- return null;
- }
- tempLinksHolderInstance.Title = linkItemInstance.Title;
- tempLinksHolderInstance.Url = linkItemInstance.Url;
- return tempLinksHolderInstance;
- }
- }
- public struct LinkItem
- {
- public string Title;
- public string Url;
- public string ErrorString;
- public List<string> ListOfUrl;
- }
- static public class HtmlParser
- {
- private const string _urlRegex = @"\b(?:https?://|www\.)\S+\b|href=\""(.*?)\""";
- private const string _titleRegex = @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>";
- static public LinkItem Parse(string _url)
- {
- if (HtmlParser.СheckUrl(_url) == false)
- return new LinkItem { ErrorString = "error: invalid_url", Url = _url };
- HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(_url);
- string htmlPageString;
- WebClient client = new WebClient();
- client.Encoding = System.Text.Encoding.UTF8;
- try
- {
- htmlPageString = client.DownloadString(_url);
- }
- catch
- {
- return new LinkItem { ErrorString = "error: invalid_url", Url = _url };
- }
- MatchCollection links = Regex.Matches(htmlPageString, _urlRegex, RegexOptions.Singleline);
- LinkItem linkItemInstance = new LinkItem();
- linkItemInstance.ListOfUrl = new List<string>();
- string title = Regex.Match(htmlPageString, _titleRegex, RegexOptions.IgnoreCase).Groups["Title"].Value;
- if (title == "")
- {
- return new LinkItem { ErrorString = "error: no_title", Url = _url };
- }
- linkItemInstance.Url = (_url);
- linkItemInstance.Title = title;
- foreach (Match item in links)
- {
- string urlString = item.Groups[1].Value;
- if (urlString != "" && urlString != @"/" && urlString != request.Address.AbsolutePath
- && urlString != request.Address.AbsoluteUri && urlString != @"//" && urlString[0] != '#')
- {
- if (HtmlParser.СheckUrl(urlString) == false)
- {
- if (urlString[0] != '/')
- {
- urlString = request.Address.Scheme + @"://" + request.Address.Host + "/" + urlString;
- }
- else
- {
- urlString = request.Address.Scheme + @"://" + request.Address.Host + urlString;
- }
- }
- if (HtmlParser.СheckUrl(urlString) == true)
- {
- linkItemInstance.ListOfUrl.Add(urlString);
- }
- }
- }
- return linkItemInstance;
- }
- public static bool СheckUrl(string UrlString)
- {
- Uri uriResult;
- bool result = Uri.TryCreate(UrlString, UriKind.Absolute, out uriResult)
- && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
- return result;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement