SHARE
TWEET

Untitled

a guest Aug 12th, 2019 65 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ; ----------------------------------------------------------------------------
  2. ;
  3. ; AutoIt Version: 3.1.1.87
  4. ; Author:        AcidicChip <acidicchip@acidicchip.com>
  5. ;
  6. ; Script Name:  Web Media Spider
  7. ; Script Version: 0.21
  8. ;
  9. ; Script Function:
  10. ;   Spider the web and gather media file URLs
  11. ;
  12. ; ----------------------------------------------------------------------------
  13.  
  14. Opt("GUIOnEventMode", 1)
  15. Opt("TrayIconDebug", 1)
  16.  
  17. #include <Array.au3>
  18. #include <GUIConstants.au3>
  19.  
  20. Dim $collected[1]
  21. Dim $urls[1]
  22. Dim $urlon = 0
  23. Dim $urlnum = 0
  24. Dim $imagenum = 0
  25. Dim $audionum = 0
  26. Dim $videonum = 0
  27.  
  28. #region "GUI"
  29. GUICreate("Media Spider", 600, 100)
  30. $lblAction = GUICtrlCreateLabel("Action:", 0, 3, 35, 20)
  31. $txtAction = GUICtrlCreateInput("", 40, 0, 560, 20)
  32. GUICtrlSetState($txtAction, $GUI_DISABLE)
  33. $lblURL = GUICtrlCreateLabel("URL:", 0, 23, 35, 20)
  34. $txtURL = GUICtrlCreateInput("", 40, 20, 560, 20)
  35. GUICtrlSetState($txtURL, $GUI_DISABLE)
  36. $prgPercent = GUICtrlCreateProgress(0, 40, 560, 20)
  37. $txtPercent = GUICtrlCreateInput("0%", 560, 40, 40, 20)
  38. GUICtrlSetState($txtPercent, $GUI_DISABLE)
  39. $lblURLs = GUICtrlCreateLabel("URLs:", 0, 63, 35, 20)
  40. $txtURLs = GUICtrlCreateInput("0", 40, 60, 75, 20)
  41. GUICtrlSetState($txtURLs, $GUI_DISABLE)
  42. $lblAudio = GUICtrlCreateLabel("Audio:", 125, 63, 35, 20)
  43. $txtAudio = GUICtrlCreateInput("0", 160, 60, 75, 20)
  44. GUICtrlSetState($txtAudio, $GUI_DISABLE)
  45. $lblImages = GUICtrlCreateLabel("Images:", 245, 63, 36, 20)
  46. $txtImages = GUICtrlCreateInput("0", 285, 60, 75, 20)
  47. GUICtrlSetState($txtImages, $GUI_DISABLE)
  48. $lblVideos = GUICtrlCreateLabel("Flash:", 370, 63, 35, 20)
  49. $txtVideos = GUICtrlCreateInput("0", 410, 60, 75, 20)
  50. GUICtrlSetState($txtVideos, $GUI_DISABLE)
  51. $lblHistory = GUICtrlCreateLabel("History:", 490, 63, 35, 20)
  52. $txtHistory = GUICtrlCreateInput("0", 530, 60, 75, 20)
  53. GUICtrlSetState($txtHistory, $GUI_DISABLE)
  54. $lblStartURL = GUICtrlCreateLabel("Start URL:", 0, 83, 50, 20)
  55. $txtStartURL = GUICtrlCreateInput("http://www.flashgames555.com", 55, 80, 490, 20)
  56. $btnStartStop = GUICtrlCreateButton("Start", 550, 80, 50, 20)
  57. GUISetState(@SW_SHOW)
  58.  
  59. GUISetOnEvent($GUI_EVENT_CLOSE, "GUIClose")
  60. GUICtrlSetOnEvent($btnStartStop, "GUIStartStop")
  61. #endregion "GUI"
  62.  
  63. Func GUIClose()
  64.     Exit
  65. EndFunc  ;==>GUIClose
  66.  
  67. Func GUIStartStop()
  68.     If GUICtrlRead($btnStartStop) == "Start" Then
  69.         GUICtrlSetData($btnStartStop, "Stop")
  70.         GUICtrlSetState($txtStartURL, $GUI_DISABLE)
  71.         FileDelete("spider.urls.txt")
  72.         GetURLs(GUICtrlRead($txtStartURL))
  73.         Do
  74.         ;$url = $urls[1]
  75.             $urlon = $urlon + 1
  76.             $url = FileReadLine("spider.urls.txt", $urlon)
  77.         ;_ArrayDelete($urls, 1)
  78.             $urlnum = $urlnum - 1
  79.             GetURLs($url)
  80.         Until $urlnum <= 0 Or GUICtrlRead($btnStartStop) == "Start"
  81.     ;Until UBound($urls) <= 1 Or GUICtrlRead($btnStartStop) == "Start"
  82.     Else
  83.         GUICtrlSetData($btnStartStop, "Start")
  84.         GUICtrlSetState($txtStartURL, $GUI_ENABLE)
  85.     EndIf
  86. EndFunc  ;==>GUIStartStop
  87.  
  88. While 1
  89.     Sleep(250)
  90. Wend
  91.  
  92. Func Status($action, $url, $percent)
  93.     GUICtrlSetData($txtAction, $action)
  94.     If $url <> "" Then GUICtrlSetData($txtURL, $url)
  95.     GUICtrlSetData($prgPercent, $percent)
  96.     GUICtrlSetData($txtPercent, $percent & "%")
  97.  
  98.     GUICtrlSetData($txtURLs, $urlnum)
  99. ;GUICtrlSetData($txtURLs, UBound($urls))
  100.     GUICtrlSetData($txtAudio, $audionum)
  101.     GUICtrlSetData($txtImages, $imagenum)
  102.     GUICtrlSetData($txtVideos, $videonum)
  103.     GUICtrlSetData($txtHistory, UBound($collected))
  104. EndFunc  ;==>Status
  105.  
  106. Func _ArrayParse($str, $before, $after)
  107.     Return StringRegExp($str, "(?i)" & $before & "(.*?)" & $after, 3)
  108. EndFunc  ;==>_ArrayParse
  109.  
  110. Func AddURL($url)
  111.     If Not WasCollected($url) Then
  112.         _ArrayAdd($collected, $url)
  113.     ;_ArrayAdd($urls, $url)
  114.         FileWriteLine("spider.urls.txt", $url)
  115.         $urlnum = $urlnum + 1
  116.     EndIf
  117. EndFunc  ;==>AddURL
  118.  
  119. Func WasCollected($url)
  120.     $return = False
  121.     For $i = 1 To Ubound($collected) - 1 Step 1
  122.         If $collected[$i] == $url Then
  123.             $return = True
  124.             ExitLoop
  125.         EndIf
  126.     Next
  127.     If Not $return And UBound($collected) >= 1024 Then _ArrayDelete($collected, 1)
  128.     Return $return
  129. EndFunc  ;==>WasCollected
  130.  
  131. Func GetURI($url)
  132.     $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
  133.     $turl = StringMid($url, StringLen($uri) + 1)
  134.     If StringInStr($turl, "?") Then
  135.         $temp = StringSplit($turl, "?")
  136.         $turl = $temp[1]
  137.         $temp = StringSplit($turl, "/")
  138.         $uri = $uri & $temp[1] & "/"
  139.         For $i = 2 To UBound($temp) - 1 Step 1
  140.             If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  141.             $uri = $uri & $temp[$i] & "/"
  142.         Next
  143.         If Not InetGetSize(StringLeft($uri, StringLen($uri) - 1)) Then
  144.             $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
  145.             $temp = StringSplit($turl, "?")
  146.             $turl = $temp[1]
  147.             $temp = StringSplit($turl, "/")
  148.             $uri = $uri & $temp[1] & "/"
  149.             For $i = 2 To UBound($temp) - 2 Step 1
  150.                 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  151.                 $uri = $uri & $temp[$i] & "/"
  152.             Next
  153.         EndIf
  154.     Else
  155.         $temp = StringSplit($turl, "/")
  156.         $uri = $uri & $temp[1] & "/"
  157.         For $i = 2 To UBound($temp) - 1 Step 1
  158.             If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  159.             $uri = $uri & $temp[$i] & "/"
  160.         Next
  161.     EndIf
  162.  
  163.     Return $uri
  164. EndFunc  ;==>GetURI
  165.  
  166. Func GetURLs($url)
  167.     $uri = GetURI($url)
  168.  
  169.     $file = "spider.html.txt"
  170.     Status("Downloading", $url, 0)
  171.     $filesize = InetGetSize($url)
  172.     $lastsize = 0
  173.     $strikes = 0
  174.     InetGet($url, $file, 1, 0)
  175.     $html = FileRead($file, FileGetSize($file))
  176.     FileDelete($file)
  177.  
  178.     Status("Parsing URLs", $url, 0)
  179.     $tags = _ArrayParse($html, "<a", ">")
  180.     For $i = 0 To UBound($tags) - 1 Step 1
  181.         Status("Checking <A> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  182.         CheckURL($uri, $tags[$i], $url)
  183.     Next
  184.     $tags = _ArrayParse($html, "<param", ">")
  185.     For $i = 0 To UBound($tags) - 1 Step 1
  186.         Status("Checking <PARAM> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  187.         CheckURL($uri, $tags[$i], $url)
  188.     Next
  189.     $tags = _ArrayParse($html, "<embed", ">")
  190.     For $i = 0 To UBound($tags) - 1 Step 1
  191.         Status("Checking <EMBED> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  192.         CheckURL($uri, $tags[$i], $url)
  193.     Next
  194. EndFunc  ;==>GetURLs
  195.  
  196. Func CheckURL($uri, $str, $ref)
  197.     If StringInStr($str, "href=") Then
  198.         $turl = GetAttr($str, "href=")
  199.         If Not StringInStr(StringLeft($turl, 10), "://") Then
  200.             If StringLeft($turl, 1) == "/" Then
  201.                 $turl = $uri & StringMid($turl, 2)
  202.             Else
  203.                 $turl = $uri & $turl
  204.             EndIf
  205.         EndIf
  206.         CheckType($turl, $ref)
  207.     EndIf
  208.     If StringInStr($str, "src=") Then
  209.         $turl = GetAttr($str, "src=")
  210.         If Not StringInStr(StringLeft($turl, 10), "://") Then
  211.             If StringLeft($turl, 1) == "/" Then
  212.                 $turl = $uri & StringMid($turl, 2)
  213.             Else
  214.                 $turl = $uri & $turl
  215.             EndIf
  216.         EndIf
  217.         CheckType($turl, $ref)
  218.     EndIf
  219. EndFunc  ;==>CheckURL
  220.  
  221. Func GetAttr($str, $attr)
  222.     If StringInStr($str, $attr & '"') Then
  223.         $temp = _ArrayParse($str, $attr & '"', '"')
  224.         If UBound($temp) == 1 Then Return $temp[0]
  225.     ElseIf StringInStr($str, $attr & "'") Then
  226.         $temp = _ArrayParse($str, $attr & "'", "'")
  227.         If UBound($temp) == 1 Then Return $temp[0]
  228.     ElseIf StringInStr($str, $attr) Then
  229.         $temp = StringMid($str, StringInStr($str, $attr) + StringLen($attr))
  230.         If StringInStr($temp, " ") Then
  231.             $temp = StringMid($temp, 1, StringInStr($temp, " ") - 1)
  232.         EndIf
  233.         Return $temp
  234.     EndIf
  235. EndFunc  ;==>GetAttr
  236.  
  237. Func CheckType($url, $ref)
  238.  
  239.     If StringRight($url, 4) == ".swf" Or _
  240.             StringRight($url, 4) == ".dcr" then
  241. _addflash($url)
  242. AddURL($url)
  243. $videonum += 1
  244.     ;Do Nothing
  245.             Else
  246.     AddURL($url)
  247.     EndIf
  248. EndFunc  ;==>CheckType
  249.  
  250. Func _addflash($url)
  251.         FileWriteLine(@ScriptDir&"\flash.swf.txt", $url)
  252. EndFunc
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top