Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function trim(str, chars) { return ltrim(rtrim(str, chars), chars); }
- function ltrim(str, chars) { chars = chars || "\\s"; return str.replace(new RegExp("^[" + chars + "]+", "g"), ""); }
- function rtrim(str, chars) { chars = chars || "\\s"; return str.replace(new RegExp("[" + chars + "]+$", "g"), ""); }
- (function() {
- // these methods perform operations on the readability structure, merging new values with
- // existing values.
- var textualCalc = [
- // simple space count with cleaned text
- function spaceToWeight(cleanText, regularText, readability) {
- var trimmedSplit = cleanText.split(" ");
- if (trimmedSplit.length > 1) {
- readability.weight += trimmedSplit.length;
- } else if (cleanText.length > 0) {
- readability.weight++;
- }
- return readability;
- }
- ];
- // Methods here resolve to true or false, being keep or remove. the caller does the maintenance
- var readabilityCalc = [
- // simple calculator based on the depth vs maxDepth vs weight
- function depthVsMaxDepthVsWeight(readability) {
- }
- ];
- // storage for all elements with a weight > 0
- var acceptableStack = [];
- var weightNodes = function(elem, depth) {
- var readability = {weight:0,children:0,maxDepth:depth,depth:depth, depths : [depth], element: elem };
- // find text node, apply textual calcs
- if (elem.nodeType === 3) {
- var regularText = elem.textContent || e.innerText;
- var cleanText = trim(regularText, " \r\n\t").replace(/ [ ]+/g," ");
- // run all of the nodes through the textual calculators
- for (var i=0;i<textualCalc.length; i++){
- readability = textualCalc[i](cleanText,regularText, readability);
- }
- } else {
- var currentChild = null;
- var childrenReadability = null;
- if (elem.childNodes && elem.childNodes.length) {
- // Keep a rolling tab of the number of children
- readability.children += elem.childNodes.length;
- for (var child in elem.childNodes) {
- currentChild = elem.childNodes[child];
- childrenReadability = weightNodes(elem.childNodes[child], depth+1);
- // merge what is possible into the readability count for this node.
- // weight
- readability.weight += childrenReadability.weight;
- // max depth
- if (childrenReadability.maxDepth > readability.maxDepth) {
- readability.maxDepth = childrenReadability.maxDepth;
- }
- // track children, possibly a sideeffect of depths, but oh well.
- readability.children += childrenReadability.children;
- // Track the actual depths in a single array, sort of looks like sax
- var sax = 0;
- for (;sax<childrenReadability.depths.length;sax++) {
- readability.depths.push(childrenReadability.depths[sax]);
- }
- }
- }
- }
- if (readability.weight > 0) {
- acceptableStack.push(readability);
- console.log(readability);
- }
- return readability;
- }
- console.clear();
- console.log("running..");
- weightNodes(document.body,0);
- console.log("weighted");
- // run all of the nodes through the final calculator
- for (var i=0;i<readabilityCalc.length; i++){
- for (var j=acceptableStack.length; j>-1; j--) {
- if (!(readabilityCalc[i])(acceptableStack[j]))
- {
- // remove element
- console.log("Removing element...");
- delete acceptableStack[j];
- }
- }
- }
- // the remaining elements can be pushed to the screen!
- })();
Add Comment
Please, Sign In to add comment