Advertisement
uwekeim

"Port" of WordPress "wptexturize" function to .NET/C#

Sep 25th, 2017
241
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 3.61 KB | None | 0 0
  1. namespace ZetaProducer.RuntimeBusinessLogic.Rendering.Helper
  2. {
  3.     using AngleSharp.Dom;
  4.     using AngleSharp.Parser.Html;
  5.     using System.Linq;
  6.     using System.Net;
  7.     using System.Text.RegularExpressions;
  8.  
  9.     public static class Texturizer
  10.     {
  11.         /// <summary>
  12.         /// Angelehnt an die WordPress-Funktion "wptexturize".
  13.         /// https://developer.wordpress.org/reference/functions/wptexturize
  14.         /// https://codex.wordpress.org/Function_Reference/wptexturize
  15.         /// Ersetzt bestimmte Zeichen.
  16.         ///
  17.         /// Ist aber noch lange nicht so umfangreich wie die WordPress-Funktion.
  18.         ///
  19.         /// Quelltext zu "wptexturize":
  20.         /// https://github.com/WordPress/WordPress/blob/master/wp-includes/formatting.php#L51
  21.         /// </summary>
  22.         public static string Texturize(string text)
  23.         {
  24.             if (string.IsNullOrWhiteSpace(text)) return text;
  25.  
  26.             var parser = new HtmlParser();
  27.             var document = parser.Parse(text);
  28.  
  29.             var it = document.CreateNodeIterator(document);
  30.  
  31.             INode node;
  32.             var didFindBody = false;
  33.  
  34.             while ((node = it.Next()) != null)
  35.             {
  36.                 if ((didFindBody || node.NodeName.ToLowerInvariant() == @"body") && !NoTexturizeTags.Contains(node.NodeName.ToLower()))
  37.                 {
  38.                     didFindBody = true;
  39.  
  40.                     var before = node.NodeValue; // TODO: Ist 'NodeValue' die korrekte Property?
  41.                     if (!string.IsNullOrEmpty(before))
  42.                     {
  43.                         var after = processOneNode(before);
  44.  
  45.                         if (after != before) node.NodeValue = after; // TODO: Ist 'NodeValue' die korrekte Property?
  46.                     }
  47.                 }
  48.             }
  49.  
  50.             return document.DocumentElement.GetElementsByTagName(@"body").First().InnerHtml;
  51.         }
  52.  
  53.         private static string processOneNode(string content)
  54.         {
  55.             var result = content;
  56.  
  57.             result = Regex.Replace(result, @"(^|\s+)-(\s+|$)", $@"$1{EmDash}$2", RegexOptions.Singleline);
  58.             result = Regex.Replace(result, @"(^|\b)\.\.\.(\b|$)", $@"$1{Ellipsis}$2", RegexOptions.Singleline);
  59.             result = Regex.Replace(result, @"(^|\s+)""(\w+)", $@"$1{OpeningDoubleQuote}$2", RegexOptions.Singleline);
  60.             result = Regex.Replace(result, @"(\w+)""(\s+|$|[!.,;])", $@"$1{ClosingDoubleQuote}$2", RegexOptions.Singleline);
  61.             result = Regex.Replace(result, @"(^|\s+)'(\w+)", $@"$1{OpeningSingleQuote}$2", RegexOptions.Singleline);
  62.             result = Regex.Replace(result, @"(\w+)'(\s+|$)", $@"$1{ClosingSingleQuote}$2", RegexOptions.Singleline);
  63.  
  64.             // TODO: Noch mehr (alle?) Regeln von WordPress übernehmen.
  65.  
  66.             // https://github.com/AngleSharp/AngleSharp/issues/361#issuecomment-230155588
  67.             var c = WebUtility.HtmlDecode(result);
  68.  
  69.             return c;
  70.         }
  71.  
  72.         private static readonly string[] NoTexturizeTags =
  73.         {
  74.             @"pre",
  75.             @"code",
  76.             @"kbd",
  77.             @"style",
  78.             @"script",
  79.             @"tt"
  80.         };
  81.  
  82.         private const string OpeningDoubleQuote = @"&bdquo;";
  83.         private const string ClosingDoubleQuote = @"&ldquo;";
  84.         private const string Apostrophe = @"&apos;";
  85.         private const string OpeningSingleQuote = @"&sbquo;";
  86.         private const string ClosingSingleQuote = @"&lsquo;";
  87.         private const string EmDash = @"&mdash;";
  88.         private const string EnDash = @"&ndash;";
  89.         private const string Ellipsis = @"&hellip;";
  90.     }
  91. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement