Advertisement
Guest User

Untitled

a guest
May 22nd, 2014
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
D 0.84 KB | None | 0 0
  1. string[] scrapeTitles(M)(in M message)
  2. {
  3.     static re_url = ctRegex!(r"(https?|ftp)://[^\s/$.?#].[^\s]*", "i");
  4.     static re_title = ctRegex!(r"<title.*?>(.*?)<", "si");
  5.     static re_ws = ctRegex!(r"(\s{2,}|\n|\t)");
  6.  
  7.     return matchAll(message, re_url)
  8.               .map!(      match => match.captures[0] )
  9.               .map!(        url => get(url, limitRange("0-4096")).ifThrown([]) ) // just first 4k
  10.               .map!(    content => matchFirst(cast(char[])content, re_title) )
  11.               .array // cache to prevent multiple evaluations of preceding
  12.               .filter!( capture => !capture.empty )
  13.               .map!(    capture => capture[1].idup.entitiesToUni )
  14.               .map!(  uni_title => uni_title.replaceAll(re_ws, " ") )
  15.               .array
  16.               .ifThrown(string[].init); // [] should work, possible bug
  17. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement