using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
namespace WikiParser
{
class Program
{
static void Main(string[] args)
{
var wikiPageUrl = "http://ro.wikipedia.org/wiki/Lista_universităţilor_din_Romānia";
var regEx = "<li><a href=\"(/wiki/|/w/index\\.php).+\".*title=\".+\">(.+)</a></li>";
var outputFilePath = @"d:\temp\s2b_universities.txt";
if (File.Exists(outputFilePath))
{
Console.Write("The file {0} already exists on the system. Would you like to overwrite it? (Y/N) ",
outputFilePath);
var result = Console.ReadLine().ToLower();
if (result != "y")
Environment.Exit(0);
File.Delete(outputFilePath);
}
Console.WriteLine("Downloading data from {0}.",wikiPageUrl);
var webClient
= new WebClient
();
webClient.Encoding = Encoding.UTF8;
webClient.UseDefaultCredentials = true;
var wikiString = webClient.DownloadString(wikiPageUrl);
Console.WriteLine("Download completed.");
Console.WriteLine("Applying {0} regex pattern on the data",regEx);
foreach (Match m in Regex.Matches(wikiString, regEx))
{
File.AppendAllText(outputFilePath, m.Groups[2].Value + Environment.NewLine,Encoding.UTF8);
Console.WriteLine("Writing {0} done.",m.Groups[2].Value);
}
Console.WriteLine("Done. The results are available in {0}. Press any key to continue.",outputFilePath);
Console.ReadLine();
}
}
}