function buildTrie(hash) { "use strict"; // A very simple function to build a Trie // we could compress this later, but simplicity // is better for this example. If we don't // perform well, we'll try to optimize this a bit // there is a room for optimization here. var p, result = {}, leaf, i; for (p in hash) { if (hash.hasOwnProperty(p)) { leaf = result; i = 0; do { if (p[i] in leaf) { leaf = leaf[p[i]]; } else { leaf = leaf[p[i]] = {}; } i += 1; } while (i < p.length); // since, obviously, no character // equals to empty character, we'll // use it to store the reference to the // original value leaf[""] = hash[p]; } } return result; } function prefixReplaceHtml(html, trie) { "use strict"; var i, len = html.length, result = [], lastMatch = 0, current, leaf, match, matched, replacement; for (i = 0; i < len; i += 1) { current = html[i]; if (current === "<") { // don't check for out of bounds access // assume we never face a situation, when // "<" is the last character in an HTML if (match) { result.push( html.substring(lastMatch, i - matched.length), "", replacement, ""); lastMatch = i - matched.length + replacement.length; i = lastMatch - 1; } else { if (matched) { // go back to the second character of the // matched string and try again i = i - matched.length; } } matched = match = replacement = leaf = ""; if (html[i + 1] === "a") { // we want to skip replacing inside // anchor tags. We also assume they // are never nested, as valid HTML is // against that idea if (html[i + 2] in { " " : 1, "\t" : 1, "\r" : 1, "\n" : 1 }) { // this is certainly an anchor i = html.indexOf("" i = html.indexOf(">", i + 1); continue; } // if we got here, we need to start checking // for the match in the trie if (!leaf) { leaf = trie; } leaf = leaf[current]; // we prefer longest possible match, just like POSIX // regular expressions do if (leaf && ("" in leaf)) { match = leaf[""]; replacement = html.substring( i - (matched ? matched.length : 0), i + 1); } if (!leaf) { // newby-style inline (all hand work!) pay extra // attention, this code is duplicated few lines above if (match) { result.push( html.substring(lastMatch, i - matched.length), "", replacement, ""); lastMatch = i - matched.length + replacement.length; i = lastMatch - 1; } else { if (matched) { // go back to the second character of the // matched string and try again i = i - matched.length; } } matched = match = replacement = ""; } else if (matched) { // perhaps a bit premature, but we'll try to avoid // string concatenation, when we can. matched = html.substring(i - matched.length, i + 1); } else { matched = current; } } return result.join(""); } function testPrefixReplace() { "use strict"; var trie = buildTrie( { "x" : "www.xxx.com", "yyy" : "www.y.com", "xy" : "www.xy.com", "yy" : "www.why.com" }); // return trie["y"]["y"]["y"]; return prefixReplaceHtml( "xyyy

" + "xyyy yy x xy

xxy", trie); }