Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var blacklist = "," + [
- "a", // don't get text from anchor nodes
- "script" // or script elements
- ].join(",");
- var item, items = document.evaluate("//body//text()[contains(., 'http://')]", document, null, 6, null),
- urlRegex = /https?:\/\/([a-z0-9-]+\.)?([a-z0-9-]+\.)+[a-z0-9-]+(\/|\s|$)[^\s]*/i,
- urlRegexg = /https?:\/\/([a-z0-9-]+\.)?([a-z0-9-]+\.)+[a-z0-9-]+(\/|\s|$)[^\s]*/i,
- URLs = [], spaceRegex = /\s+/, tmp, txt, i, j,
- parent = function(e) {
- while((e = e.parentNode).nodeType !== 1) {}
- return e;
- }
- for(i = 0, item; i < items.snapshotLength; i++) {
- item = items.snapshotItem(i);
- txt = item.textContent;
- tmp = txt.split(spaceRegex);
- if(blacklist.indexOf(("," + parent(item).tagName.toLowerCase())) === -1 && urlRegex.test(txt)) {
- if(tmp.length > 1) { // if > 1 url in text node
- for(j = 0; j < tmp.length; j++) {
- if(urlRegex.test(tmp[j])) URLs.push(tmp[j].match(urlRegex)[0]);
- }
- } else { // if only 1 url in text node
- URLs.push(txt.match(urlRegex)[0]);
- }
- }
- }
- var result = URLs.length > 0 ? URLs.join("\n") : "No URL matches";
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement