Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function buildTrie(hash) {
- "use strict";
- // A very simple function to build a Trie
- // we could compress this later, but simplicity
- // is better for this example. If we don't
- // perform well, we'll try to optimize this a bit
- // there is a room for optimization here.
- var p, result = {}, leaf, i;
- for (p in hash) {
- if (hash.hasOwnProperty(p)) {
- leaf = result;
- i = 0;
- do {
- if (p[i] in leaf) {
- leaf = leaf[p[i]];
- } else {
- leaf = leaf[p[i]] = {};
- }
- i += 1;
- } while (i < p.length);
- // since, obviously, no character
- // equals to empty character, we'll
- // use it to store the reference to the
- // original value
- leaf[""] = hash[p];
- }
- }
- return result;
- }
- function prefixReplaceHtml(html, trie) {
- "use strict";
- var i, len = html.length, result = [], lastMatch = 0,
- current, leaf, match, matched, replacement;
- for (i = 0; i < len; i += 1) {
- current = html[i];
- if (current === "<") {
- // don't check for out of bounds access
- // assume we never face a situation, when
- // "<" is the last character in an HTML
- if (match) {
- result.push(
- html.substring(lastMatch, i - matched.length),
- "<a href=\"", match, "\">", replacement, "</a>");
- lastMatch = i - matched.length + replacement.length;
- i = lastMatch - 1;
- } else {
- if (matched) {
- // go back to the second character of the
- // matched string and try again
- i = i - matched.length;
- }
- }
- matched = match = replacement = leaf = "";
- if (html[i + 1] === "a") {
- // we want to skip replacing inside
- // anchor tags. We also assume they
- // are never nested, as valid HTML is
- // against that idea
- if (html[i + 2] in
- { " " : 1, "\t" : 1, "\r" : 1, "\n" : 1 }) {
- // this is certainly an anchor
- i = html.indexOf("</a", i + 3) + 3;
- continue;
- }
- }
- // if we got here, it's a regular tag, just look
- // for terminating ">"
- i = html.indexOf(">", i + 1);
- continue;
- }
- // if we got here, we need to start checking
- // for the match in the trie
- if (!leaf) {
- leaf = trie;
- }
- leaf = leaf[current];
- // we prefer longest possible match, just like POSIX
- // regular expressions do
- if (leaf && ("" in leaf)) {
- match = leaf[""];
- replacement = html.substring(
- i - (matched ? matched.length : 0), i + 1);
- }
- if (!leaf) {
- // newby-style inline (all hand work!) pay extra
- // attention, this code is duplicated few lines above
- if (match) {
- result.push(
- html.substring(lastMatch, i - matched.length),
- "<a href=\"", match, "\">", replacement, "</a>");
- lastMatch = i - matched.length + replacement.length;
- i = lastMatch - 1;
- } else {
- if (matched) {
- // go back to the second character of the
- // matched string and try again
- i = i - matched.length;
- }
- }
- matched = match = replacement = "";
- } else if (matched) {
- // perhaps a bit premature, but we'll try to avoid
- // string concatenation, when we can.
- matched = html.substring(i - matched.length, i + 1);
- } else {
- matched = current;
- }
- }
- return result.join("");
- }
- function testPrefixReplace() {
- "use strict";
- var trie = buildTrie(
- { "x" : "www.xxx.com", "yyy" : "www.y.com",
- "xy" : "www.xy.com", "yy" : "www.why.com" });
- // return trie["y"]["y"]["y"];
- return prefixReplaceHtml(
- "<html><head>x</head><body><a >yyy</a><p>" +
- "xyyy yy x xy</p><abrval><yy>xxy</yy>", trie);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement