Advertisement
Guest User

Untitled

a guest
Apr 20th, 2018
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.98 KB | None | 0 0
  1. #!/bin/sh
  2. exec scala -deprecation -savecompiled "$0" "$@"
  3. !#
  4. import java.io.File;
  5. import java.io.FileOutputStream;
  6. import java.io.FileReader;
  7. import java.io.InputStream;
  8. import java.io.IOException;
  9. import java.net.URL;
  10. import java.util.regex.Pattern;
  11. import javax.swing.text.html.HTML;
  12. import javax.swing.text.html.HTMLEditorKit;
  13. import javax.swing.text.html.parser.ParserDelegator;
  14. import javax.swing.text.MutableAttributeSet;
  15.  
  16. object Extracter {
  17. def download(filename: String) = {
  18. var url = new URL(filename);
  19. var base = basename(filename);
  20. if (!exists(base)) {
  21. var connection = url.openConnection();
  22. var total = connection.getContentLength();
  23. var stream = connection.getInputStream();
  24. Console.println("Downloading " + filename);
  25. var bytes = new Array[Byte](500000);
  26. var x: Long = 0;
  27. var writer = new FileOutputStream(base);
  28. try {
  29. while (true) {
  30. if (total > 100) {
  31. var pc = (x / (total / 100));
  32. Console.print("Got " + x + " of " + total + " (" + pc + "%)\r");
  33. }
  34. var read = stream.read(bytes);
  35. if (read > 0) {
  36. writer.write(bytes, 0, read);
  37. x += read;
  38. } else {
  39. throw new IOException("meh");
  40. }
  41. }
  42. } catch {
  43. case e:IOException => Console.println("\nDone");
  44. }
  45. writer.close();
  46. }
  47. }
  48.  
  49. def basename(url: String) :String = {
  50. if (url.endsWith("/")) {
  51. return "index.html"
  52. }
  53. var name = (new File(url)).getName;
  54. return name match {
  55. case "" => "index.html"
  56. case _ => name;
  57. }
  58. }
  59.  
  60. def exists(filename: String) :Boolean = {
  61. return (new File(filename)).exists;
  62. }
  63.  
  64. def fakedownload(filename: String) = {
  65. Console.println("Downloaded "+ filename);
  66. }
  67.  
  68. class PrintLinks(pattern: Pattern) extends HTMLEditorKit.ParserCallback {
  69. override def handleStartTag(t: HTML.Tag, a: MutableAttributeSet, pos: Int) = {
  70. if (t == HTML.Tag.A) {
  71. var src = a.getAttribute(HTML.Attribute.HREF);
  72. var matcher = pattern.matcher(src.toString);
  73. if (matcher.matches) {
  74. Console.println(src);
  75. download(src.toString());
  76. // fakedownload(src.toString);
  77. }
  78. }
  79. }
  80. }
  81.  
  82. def main(args: Array[String]) = {
  83. try {
  84. var inpattern = args.length match {
  85. case 2 => args(1) + ".*"
  86. case _ => ""
  87. }
  88. var inputfile = args(0);
  89. if (inputfile.startsWith("http:")) {
  90. Console.println(basename(inputfile));
  91. download(inputfile);
  92. inputfile = basename(inputfile);
  93. }
  94. var pattern = Pattern.compile(".*" + inpattern);
  95. var reader = new FileReader(inputfile);
  96. var callback = new PrintLinks(pattern);
  97. new ParserDelegator().parse(reader, callback, true);
  98. } catch {
  99. case e:ArrayIndexOutOfBoundsException => Console.println(
  100. "Usage: extract FILENAME [pattern]");
  101. case e:IOException => Console.println(e.getMessage());
  102. }
  103. }
  104. }
  105.  
  106. Extracter.main(args)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement