pastebin - collaborative debugging

pastebin is a collaborative debugging tool allowing you to share and modify code snippets while chatting on IRC, IM or a message board.

This site is developed to XHTML and CSS2 W3C standards. If you see this paragraph, your browser does not support those standards and you need to upgrade. Visit WaSP for a variety of options.

C# pastebin - collaborative debugging tool View Help


Posted by Mihai Nadas on Thu 7 May 08:32
report abuse | download | new post

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Net;
  6. using System.Text.RegularExpressions;
  7. using System.IO;
  8.  
  9. namespace WikiParser
  10. {
  11.     class Program
  12.     {
  13.         static void Main(string[] args)
  14.         {
  15.             var wikiPageUrl = "http://ro.wikipedia.org/wiki/Lista_universităţilor_din_Romānia";
  16.             var regEx = "<li><a href=\"(/wiki/|/w/index\\.php).+\".*title=\".+\">(.+)</a></li>";
  17.             var outputFilePath = @"d:\temp\s2b_universities.txt";
  18.  
  19.             if (File.Exists(outputFilePath))
  20.             {
  21.                 Console.Write("The file {0} already exists on the system. Would you like to overwrite it? (Y/N) ",
  22.                     outputFilePath);
  23.                 var result = Console.ReadLine().ToLower();
  24.                 if (result != "y")
  25.                     Environment.Exit(0);
  26.                 File.Delete(outputFilePath);
  27.             }
  28.  
  29.             Console.WriteLine("Downloading data from {0}.",wikiPageUrl);
  30.             var webClient = new WebClient();
  31.             webClient.Encoding = Encoding.UTF8;
  32.             webClient.UseDefaultCredentials = true;
  33.             var wikiString = webClient.DownloadString(wikiPageUrl);
  34.             Console.WriteLine("Download completed.");
  35.             Console.WriteLine("Applying {0} regex pattern on the data",regEx);
  36.             foreach (Match m in Regex.Matches(wikiString, regEx))
  37.             {
  38.                 File.AppendAllText(outputFilePath, m.Groups[2].Value + Environment.NewLine,Encoding.UTF8);
  39.                 Console.WriteLine("Writing {0} done.",m.Groups[2].Value);
  40.             }
  41.             Console.WriteLine("Done. The results are available in {0}. Press any key to continue.",outputFilePath);
  42.             Console.ReadLine();
  43.         }
  44.     }
  45. }

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with @@


Remember me so that I can delete my post