Groovy Shell script - download all Lurk articles HTML

import groovy.grape.Grape
import groovyx.net.http.*
Grape.grab(group:'org.codehaus.groovy.modules.http-builder', module:'http-builder')

http = new HTTPBuilder("http://www.lurkmore.to/");


def getByRegex(http, tehUri, regex, collect) {
println "Requesting: "+tehUri
http.request(Method.GET, ContentType.TEXT) {
uri=tehUri

headers.'User-Agent' = 'Mozilla/5.0 Ubuntu/8.10 Firefox/3.0.4'
headers.'Accept-Charset' = 'UTF-8'
response.success = { resp ->
theReader = new java.io.BufferedReader(new java.io.InputStreamReader(resp.entity.content, "UTF-8"))
line = theReader.readLine()
while(line!=null) {
line = theReader.readLine()
matcher = (line =~ regex)
while(matcher.find()) {
if(matcher.groupCount()>0) {
url = matcher.group(1);
collect.add(url);
}}
}}}
}

def getContent(http, tehUri) {
println "Requesting: "+tehUri;
http.request(Method.GET, ContentType.TEXT) {
uri=tehUri
headers.'User-Agent' = 'Mozilla/5.0 Ubuntu/8.10 Firefox/3.0.4'
headers.'Accept-Charset' = 'UTF-8'

response.success = { resp ->
return resp.entity.content.text
}}}


indexesURLs=new HashSet<String>();
getByRegex(http, "http://www.lurkmore.to/\u0421\u043b\u0443\u0436\u0435\u0431\u043d\u0430\u044f:AllPages", /<a\s+href="(\/index.php\?title\=[^&]+&amp;from=[^&]+&amp;to=[^\"]+)\"/, indexesURLs);

indexesURLs = new java.util.ArrayList<String>(indexesURLs);

println "Found indexes URLs: "+indexesURLs.size();

pagesUrls = new HashSet<String>();
indexesURLs.each{
getByRegex(http, "http://www.lurkmore.to"+it.replace("&amp;","&"), /<td\s+width="33%".*<a\s+href="(\/[^\"]+)\"\s+title=\"/, pagesUrls);
}

pagesUrls = new java.util.ArrayList<String>(pagesUrls);
println "Found pages URLs: "+pagesUrls.size();

new File("lurk").mkdir()

i=0;
pagesUrls.[595..pageUrls.size()]{
try {
println "URL: "+i++;
file = new File("lurk/"+java.net.URLDecoder.decode(it, "UTF-8").replaceAll("[\\/\\\\]","_")+".html")
content = getContent(http, "http://www.lurkmore.to"+it.replace("&amp;", "&"))
file.createNewFile();
file.append(content);
} catch(Exception e) {
e.printStackTrace();
}
}