Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public static string fix_encoding(string src)
- {
- StringWriter return_str = new StringWriter();
- byte[] byte_array = Encoding.ASCII.GetBytes(src.Substring(0, src.Length));
- int len = byte_array.Length;
- byte byt;
- for(var i=0; i<len; i+=1)
- {
- byt = byte_array[i];
- if (byt == 63)
- {
- return_str.Write(" ");
- }
- else
- {
- return_str.Write(Encoding.ASCII.GetString(byte_array, i, 1));
- }
- }
- return return_str.ToString();
- }
- StringWriter output = new StringWriter();
- output.WriteLine(PdfTextExtractor.GetTextFromPage(reader, page, new SimpleTextExtractionStrategy()));
- currentText = fix_encoding(output.ToString());
- public static string remove_non_ascii(string src)
- {
- return Regex.Replace(src, @"[^u0000-u007F]", " ");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement