Advertisement
Guest User

Untitled

a guest
Jan 17th, 2016
304
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 4.77 KB | None | 0 0
  1. public class LinksHolder
  2. {
  3.     private const int ErrorCode = -1;
  4.     private const int noErrorCode = 0;
  5.     private const int levelOutCode = 1;
  6.     public string Title { get; set; }
  7.     public string Url { get; set; }
  8.     public List<LinksHolder> ListOfChildLinks { get; set; }
  9.  
  10.     public LinksHolder()
  11.     {
  12.     }
  13.     public int Create(LinkItem _linkItem)
  14.     {
  15.         //if (level <= 0) return levelOutCode;
  16.         if (_linkItem.ErrorString == "error: no_title" || _linkItem.Title == ""
  17.             || _linkItem.ErrorString == "error: invalid_url" || _linkItem.Url == "")
  18.         {
  19.             return ErrorCode;
  20.         }
  21.         ListOfChildLinks = new List<LinksHolder>();
  22.         this.Title = _linkItem.Title;
  23.         this.Url = _linkItem.Url;
  24.         foreach (string link in _linkItem.ListOfUrl)
  25.         {
  26.             LinksHolder tempLinksHolderInstance = new LinksHolder();
  27.             //int code = tempLinksHolderInstance.Create(HtmlParser.Parse(link), level - 1);
  28.             //if (code == noErrorCode)
  29.             //    ListOfChildLinks.Add(tempLinksHolderInstance);
  30.             //else if (code == levelOutCode)
  31.             //{
  32.             //    return levelOutCode;
  33.             //}
  34.             tempLinksHolderInstance = Add1(link);
  35.             if (tempLinksHolderInstance != null)
  36.                 ListOfChildLinks.Add(this.Add1(link));
  37.  
  38.         }
  39.         return noErrorCode;
  40.     }
  41.  
  42.     public LinksHolder Add1(string _url)
  43.     {
  44.         LinksHolder tempLinksHolderInstance = new LinksHolder();
  45.         LinkItem linkItemInstance = HtmlParser.Parse(_url);
  46.         if (linkItemInstance.ErrorString == "error: no_title" || linkItemInstance.Title == ""
  47.             || linkItemInstance.ErrorString == "error: invalid_url" || linkItemInstance.Url == "")
  48.         {
  49.             return null;
  50.         }
  51.         tempLinksHolderInstance.Title = linkItemInstance.Title;
  52.         tempLinksHolderInstance.Url = linkItemInstance.Url;
  53.         return tempLinksHolderInstance;
  54.     }
  55. }
  56.  
  57.  
  58. public struct LinkItem
  59. {
  60.     public string Title;
  61.     public string Url;
  62.     public string ErrorString;
  63.     public List<string> ListOfUrl;
  64. }
  65. static public class HtmlParser
  66. {
  67.     private const string _urlRegex = @"\b(?:https?://|www\.)\S+\b|href=\""(.*?)\""";
  68.    private const string _titleRegex = @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>";
  69.    static public LinkItem Parse(string _url)
  70.    {
  71.        if (HtmlParser.СheckUrl(_url) == false)
  72.            return new LinkItem { ErrorString = "error: invalid_url", Url = _url };
  73.        HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(_url);
  74.        string htmlPageString;
  75.        WebClient client = new WebClient();
  76.        client.Encoding = System.Text.Encoding.UTF8;
  77.        try
  78.        {
  79.            htmlPageString = client.DownloadString(_url);
  80.        }
  81.        catch
  82.        {
  83.            return new LinkItem { ErrorString = "error: invalid_url", Url = _url };
  84.        }
  85.        MatchCollection links = Regex.Matches(htmlPageString, _urlRegex, RegexOptions.Singleline);
  86.        LinkItem linkItemInstance = new LinkItem();
  87.        linkItemInstance.ListOfUrl = new List<string>();
  88.        string title = Regex.Match(htmlPageString, _titleRegex, RegexOptions.IgnoreCase).Groups["Title"].Value;
  89.        if (title == "")
  90.        {
  91.            return new LinkItem { ErrorString = "error: no_title", Url = _url };
  92.        }
  93.        linkItemInstance.Url = (_url);
  94.        linkItemInstance.Title = title;
  95.  
  96.        foreach (Match item in links)
  97.        {
  98.            string urlString = item.Groups[1].Value;
  99.            if (urlString != "" && urlString != @"/" && urlString != request.Address.AbsolutePath
  100.                && urlString != request.Address.AbsoluteUri && urlString != @"//" && urlString[0] != '#')
  101.             {
  102.                 if (HtmlParser.СheckUrl(urlString) == false)
  103.                 {
  104.                     if (urlString[0] != '/')
  105.                     {
  106.                         urlString = request.Address.Scheme + @"://" + request.Address.Host + "/" + urlString;
  107.                     }
  108.                     else
  109.                     {
  110.                         urlString = request.Address.Scheme + @"://" + request.Address.Host + urlString;
  111.                     }
  112.                 }
  113.                 if (HtmlParser.СheckUrl(urlString) == true)
  114.                 {
  115.                     linkItemInstance.ListOfUrl.Add(urlString);
  116.                 }
  117.             }
  118.         }
  119.         return linkItemInstance;
  120.     }
  121.  
  122.     public static bool СheckUrl(string UrlString)
  123.     {
  124.         Uri uriResult;
  125.         bool result = Uri.TryCreate(UrlString, UriKind.Absolute, out uriResult)
  126.             && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
  127.         return result;
  128.     }
  129. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement