Advertisement
Guest User

ExctractsInfoFromHTML

a guest
Jan 16th, 2014
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 2.71 KB | None | 0 0
  1. using System;
  2. using System.Text;
  3. using System.IO;
  4. using System.Security;
  5.  
  6.     /*Write a program that extracts from given HTML file its title (if available), and its body text without the HTML tags.
  7.      * Example:
  8.      * <html>
  9.      * <head><title>News</title></head>
  10.      * <body><p><a href="http://academy.telerik.com">
  11.      * TelerikAcademy</a>aims to provide free real-world practical training for young people who want to turn into
  12.      * skillful .NET software engineers.</p></body>
  13.      * </html>
  14.      */
  15.  
  16. class ExctractsInfoFromHTML
  17. {
  18.     static void Main()
  19.     {
  20.         string path = @"..\..\Files\html.html";
  21.  
  22.         try
  23.         {
  24.             string htmlFileContent = File.ReadAllText(path);
  25.  
  26.             string title = string.Empty;
  27.  
  28.             if (htmlFileContent.IndexOf("<title>") != -1)
  29.             {
  30.                 int startTitle = htmlFileContent.IndexOf('>',htmlFileContent.IndexOf("<title>")) + 1;
  31.                 int endTitle = htmlFileContent.IndexOf("</title>");
  32.  
  33.                 title = htmlFileContent.Substring(startTitle, endTitle - startTitle);
  34.             }
  35.  
  36.             int endsOpenBodyTag = htmlFileContent.IndexOf(">",htmlFileContent.IndexOf("<body>")) + 1;
  37.  
  38.             StringBuilder bodyContent = new StringBuilder();
  39.  
  40.             for (int i = endsOpenBodyTag; i < htmlFileContent.Length; i++)
  41.             {
  42.                 if (htmlFileContent[i] != '<')
  43.                 {
  44.                     bodyContent.Append(htmlFileContent[i]);
  45.                 }
  46.                 else
  47.                 {
  48.                     i = htmlFileContent.IndexOf('>', i);
  49.                     bodyContent.Append(" ");
  50.                 }
  51.             }
  52.  
  53.             Console.WriteLine(title != string.Empty ? "Title: '{0}'" : "No title available.", title);
  54.             Console.WriteLine("Body content: '{0}'", bodyContent.ToString());
  55.         }
  56.         catch (FileNotFoundException)
  57.         {
  58.             Console.WriteLine("The file specified in path({0}) was not found.", path);
  59.         }
  60.         catch (DirectoryNotFoundException)
  61.         {
  62.             Console.WriteLine("The specified path is invalid (for example, it is on an unmapped drive).");
  63.         }
  64.         catch (SecurityException)
  65.         {
  66.             Console.WriteLine("The caller does not have the required permission.");
  67.         }
  68.         catch (UnauthorizedAccessException)
  69.         {
  70.             Console.WriteLine("path specified a file that is read-only.");
  71.             Console.WriteLine("Or This operation is not supported on the current platform.");
  72.             Console.WriteLine("Or path specified a directory");
  73.             Console.WriteLine("Or the caller does not have the required permission.");
  74.         }
  75.     }
  76. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement