Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

Untitled

By: a guest on Dec 22nd, 2012  |  syntax: None  |  size: 0.91 KB  |  views: 20  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. public static string fix_encoding(string src)
  2.     {
  3.         StringWriter return_str = new StringWriter();
  4.         byte[] byte_array = Encoding.ASCII.GetBytes(src.Substring(0, src.Length));
  5.         int len = byte_array.Length;
  6.         byte byt;
  7.         for(var i=0; i<len; i+=1)
  8.         {
  9.             byt = byte_array[i];
  10.             if (byt == 63)
  11.             {
  12.                 return_str.Write(" ");
  13.             }
  14.             else
  15.             {
  16.                 return_str.Write(Encoding.ASCII.GetString(byte_array, i, 1));
  17.             }
  18.         }
  19.         return return_str.ToString();
  20.     }
  21.        
  22. StringWriter output = new StringWriter();
  23.                 output.WriteLine(PdfTextExtractor.GetTextFromPage(reader, page, new SimpleTextExtractionStrategy()));
  24.                 currentText = fix_encoding(output.ToString());
  25.        
  26. public static string remove_non_ascii(string src)
  27. {
  28.     return Regex.Replace(src, @"[^u0000-u007F]", " ");
  29. }