Posted by Mihai Nadas on Thu 7 May 08:32
report abuse | download | new post
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Net;
- using System.Text.RegularExpressions;
- using System.IO;
- namespace WikiParser
- {
- class Program
- {
- static void Main(string[] args)
- {
- var wikiPageUrl = "http://ro.wikipedia.org/wiki/Lista_universităţilor_din_Romānia";
- var regEx = "<li><a href=\"(/wiki/|/w/index\\.php).+\".*title=\".+\">(.+)</a></li>";
- var outputFilePath = @"d:\temp\s2b_universities.txt";
- if (File.Exists(outputFilePath))
- {
- Console.Write("The file {0} already exists on the system. Would you like to overwrite it? (Y/N) ",
- outputFilePath);
- var result = Console.ReadLine().ToLower();
- if (result != "y")
- Environment.Exit(0);
- File.Delete(outputFilePath);
- }
- Console.WriteLine("Downloading data from {0}.",wikiPageUrl);
- webClient.Encoding = Encoding.UTF8;
- webClient.UseDefaultCredentials = true;
- var wikiString = webClient.DownloadString(wikiPageUrl);
- Console.WriteLine("Download completed.");
- Console.WriteLine("Applying {0} regex pattern on the data",regEx);
- foreach (Match m in Regex.Matches(wikiString, regEx))
- {
- File.AppendAllText(outputFilePath, m.Groups[2].Value + Environment.NewLine,Encoding.UTF8);
- Console.WriteLine("Writing {0} done.",m.Groups[2].Value);
- }
- Console.WriteLine("Done. The results are available in {0}. Press any key to continue.",outputFilePath);
- Console.ReadLine();
- }
- }
- }
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.