Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- namespace ZetaProducer.RuntimeBusinessLogic.Rendering.Helper
- {
- using AngleSharp.Dom;
- using AngleSharp.Parser.Html;
- using System.Linq;
- using System.Net;
- using System.Text.RegularExpressions;
- public static class Texturizer
- {
- /// <summary>
- /// Angelehnt an die WordPress-Funktion "wptexturize".
- /// https://developer.wordpress.org/reference/functions/wptexturize
- /// https://codex.wordpress.org/Function_Reference/wptexturize
- /// Ersetzt bestimmte Zeichen.
- ///
- /// Ist aber noch lange nicht so umfangreich wie die WordPress-Funktion.
- ///
- /// Quelltext zu "wptexturize":
- /// https://github.com/WordPress/WordPress/blob/master/wp-includes/formatting.php#L51
- /// </summary>
- public static string Texturize(string text)
- {
- if (string.IsNullOrWhiteSpace(text)) return text;
- var parser = new HtmlParser();
- var document = parser.Parse(text);
- var it = document.CreateNodeIterator(document);
- INode node;
- var didFindBody = false;
- while ((node = it.Next()) != null)
- {
- if ((didFindBody || node.NodeName.ToLowerInvariant() == @"body") && !NoTexturizeTags.Contains(node.NodeName.ToLower()))
- {
- didFindBody = true;
- var before = node.NodeValue; // TODO: Ist 'NodeValue' die korrekte Property?
- if (!string.IsNullOrEmpty(before))
- {
- var after = processOneNode(before);
- if (after != before) node.NodeValue = after; // TODO: Ist 'NodeValue' die korrekte Property?
- }
- }
- }
- return document.DocumentElement.GetElementsByTagName(@"body").First().InnerHtml;
- }
- private static string processOneNode(string content)
- {
- var result = content;
- result = Regex.Replace(result, @"(^|\s+)-(\s+|$)", $@"$1{EmDash}$2", RegexOptions.Singleline);
- result = Regex.Replace(result, @"(^|\b)\.\.\.(\b|$)", $@"$1{Ellipsis}$2", RegexOptions.Singleline);
- result = Regex.Replace(result, @"(^|\s+)""(\w+)", $@"$1{OpeningDoubleQuote}$2", RegexOptions.Singleline);
- result = Regex.Replace(result, @"(\w+)""(\s+|$|[!.,;])", $@"$1{ClosingDoubleQuote}$2", RegexOptions.Singleline);
- result = Regex.Replace(result, @"(^|\s+)'(\w+)", $@"$1{OpeningSingleQuote}$2", RegexOptions.Singleline);
- result = Regex.Replace(result, @"(\w+)'(\s+|$)", $@"$1{ClosingSingleQuote}$2", RegexOptions.Singleline);
- // TODO: Noch mehr (alle?) Regeln von WordPress übernehmen.
- // https://github.com/AngleSharp/AngleSharp/issues/361#issuecomment-230155588
- var c = WebUtility.HtmlDecode(result);
- return c;
- }
- private static readonly string[] NoTexturizeTags =
- {
- @"pre",
- @"code",
- @"kbd",
- @"style",
- @"script",
- @"tt"
- };
- private const string OpeningDoubleQuote = @"„";
- private const string ClosingDoubleQuote = @"“";
- private const string Apostrophe = @"'";
- private const string OpeningSingleQuote = @"‚";
- private const string ClosingSingleQuote = @"‘";
- private const string EmDash = @"—";
- private const string EnDash = @"–";
- private const string Ellipsis = @"…";
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement