Share Pastebin
Guest
Public paste!

Mihai Nadas

By: a guest | May 7th, 2009 | Syntax: C# | Size: 1.79 KB | Hits: 361 | Expires: Never
Copy text to clipboard
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Net;
  6. using System.Text.RegularExpressions;
  7. using System.IO;
  8.  
  9. namespace WikiParser
  10. {
  11.     class Program
  12.     {
  13.         static void Main(string[] args)
  14.         {
  15.             var wikiPageUrl = "http://ro.wikipedia.org/wiki/Lista_universităţilor_din_Romānia";
  16.             var regEx = "<li><a href=\"(/wiki/|/w/index\\.php).+\".*title=\".+\">(.+)</a></li>";
  17.             var outputFilePath = @"d:\temp\s2b_universities.txt";
  18.  
  19.             if (File.Exists(outputFilePath))
  20.             {
  21.                 Console.Write("The file {0} already exists on the system. Would you like to overwrite it? (Y/N) ",
  22.                     outputFilePath);
  23.                 var result = Console.ReadLine().ToLower();
  24.                 if (result != "y")
  25.                     Environment.Exit(0);
  26.                 File.Delete(outputFilePath);
  27.             }
  28.  
  29.             Console.WriteLine("Downloading data from {0}.",wikiPageUrl);
  30.             var webClient = new WebClient();
  31.             webClient.Encoding = Encoding.UTF8;
  32.             webClient.UseDefaultCredentials = true;
  33.             var wikiString = webClient.DownloadString(wikiPageUrl);
  34.             Console.WriteLine("Download completed.");
  35.             Console.WriteLine("Applying {0} regex pattern on the data",regEx);
  36.             foreach (Match m in Regex.Matches(wikiString, regEx))
  37.             {
  38.                 File.AppendAllText(outputFilePath, m.Groups[2].Value + Environment.NewLine,Encoding.UTF8);
  39.                 Console.WriteLine("Writing {0} done.",m.Groups[2].Value);
  40.             }
  41.             Console.WriteLine("Done. The results are available in {0}. Press any key to continue.",outputFilePath);
  42.             Console.ReadLine();
  43.         }
  44.     }
  45. }