Advertisement
Guest User

Untitled

a guest
Aug 12th, 2019
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
AutoIt 8.34 KB | None | 0 0
  1. ; ----------------------------------------------------------------------------
  2. ;
  3. ; AutoIt Version: 3.1.1.87
  4. ; Author:        AcidicChip <acidicchip@acidicchip.com>
  5. ;
  6. ; Script Name:  Web Media Spider
  7. ; Script Version: 0.21
  8. ;
  9. ; Script Function:
  10. ;   Spider the web and gather media file URLs
  11. ;
  12. ; ----------------------------------------------------------------------------
  13.  
  14. Opt("GUIOnEventMode", 1)
  15. Opt("TrayIconDebug", 1)
  16.  
  17. #include <Array.au3>
  18. #include <GUIConstants.au3>
  19.  
  20. Dim $collected[1]
  21. Dim $urls[1]
  22. Dim $urlon = 0
  23. Dim $urlnum = 0
  24. Dim $imagenum = 0
  25. Dim $audionum = 0
  26. Dim $videonum = 0
  27.  
  28. #region "GUI"
  29. GUICreate("Media Spider", 600, 100)
  30. $lblAction = GUICtrlCreateLabel("Action:", 0, 3, 35, 20)
  31. $txtAction = GUICtrlCreateInput("", 40, 0, 560, 20)
  32. GUICtrlSetState($txtAction, $GUI_DISABLE)
  33. $lblURL = GUICtrlCreateLabel("URL:", 0, 23, 35, 20)
  34. $txtURL = GUICtrlCreateInput("", 40, 20, 560, 20)
  35. GUICtrlSetState($txtURL, $GUI_DISABLE)
  36. $prgPercent = GUICtrlCreateProgress(0, 40, 560, 20)
  37. $txtPercent = GUICtrlCreateInput("0%", 560, 40, 40, 20)
  38. GUICtrlSetState($txtPercent, $GUI_DISABLE)
  39. $lblURLs = GUICtrlCreateLabel("URLs:", 0, 63, 35, 20)
  40. $txtURLs = GUICtrlCreateInput("0", 40, 60, 75, 20)
  41. GUICtrlSetState($txtURLs, $GUI_DISABLE)
  42. $lblAudio = GUICtrlCreateLabel("Audio:", 125, 63, 35, 20)
  43. $txtAudio = GUICtrlCreateInput("0", 160, 60, 75, 20)
  44. GUICtrlSetState($txtAudio, $GUI_DISABLE)
  45. $lblImages = GUICtrlCreateLabel("Images:", 245, 63, 36, 20)
  46. $txtImages = GUICtrlCreateInput("0", 285, 60, 75, 20)
  47. GUICtrlSetState($txtImages, $GUI_DISABLE)
  48. $lblVideos = GUICtrlCreateLabel("Flash:", 370, 63, 35, 20)
  49. $txtVideos = GUICtrlCreateInput("0", 410, 60, 75, 20)
  50. GUICtrlSetState($txtVideos, $GUI_DISABLE)
  51. $lblHistory = GUICtrlCreateLabel("History:", 490, 63, 35, 20)
  52. $txtHistory = GUICtrlCreateInput("0", 530, 60, 75, 20)
  53. GUICtrlSetState($txtHistory, $GUI_DISABLE)
  54. $lblStartURL = GUICtrlCreateLabel("Start URL:", 0, 83, 50, 20)
  55. $txtStartURL = GUICtrlCreateInput("http://www.flashgames555.com", 55, 80, 490, 20)
  56. $btnStartStop = GUICtrlCreateButton("Start", 550, 80, 50, 20)
  57. GUISetState(@SW_SHOW)
  58.  
  59. GUISetOnEvent($GUI_EVENT_CLOSE, "GUIClose")
  60. GUICtrlSetOnEvent($btnStartStop, "GUIStartStop")
  61. #endregion "GUI"
  62.  
  63. Func GUIClose()
  64.     Exit
  65. EndFunc  ;==>GUIClose
  66.  
  67. Func GUIStartStop()
  68.     If GUICtrlRead($btnStartStop) == "Start" Then
  69.         GUICtrlSetData($btnStartStop, "Stop")
  70.         GUICtrlSetState($txtStartURL, $GUI_DISABLE)
  71.         FileDelete("spider.urls.txt")
  72.         GetURLs(GUICtrlRead($txtStartURL))
  73.         Do
  74.         ;$url = $urls[1]
  75.             $urlon = $urlon + 1
  76.             $url = FileReadLine("spider.urls.txt", $urlon)
  77.         ;_ArrayDelete($urls, 1)
  78.             $urlnum = $urlnum - 1
  79.             GetURLs($url)
  80.         Until $urlnum <= 0 Or GUICtrlRead($btnStartStop) == "Start"
  81.     ;Until UBound($urls) <= 1 Or GUICtrlRead($btnStartStop) == "Start"
  82.     Else
  83.         GUICtrlSetData($btnStartStop, "Start")
  84.         GUICtrlSetState($txtStartURL, $GUI_ENABLE)
  85.     EndIf
  86. EndFunc  ;==>GUIStartStop
  87.  
  88. While 1
  89.     Sleep(250)
  90. Wend
  91.  
  92. Func Status($action, $url, $percent)
  93.     GUICtrlSetData($txtAction, $action)
  94.     If $url <> "" Then GUICtrlSetData($txtURL, $url)
  95.     GUICtrlSetData($prgPercent, $percent)
  96.     GUICtrlSetData($txtPercent, $percent & "%")
  97.  
  98.     GUICtrlSetData($txtURLs, $urlnum)
  99. ;GUICtrlSetData($txtURLs, UBound($urls))
  100.     GUICtrlSetData($txtAudio, $audionum)
  101.     GUICtrlSetData($txtImages, $imagenum)
  102.     GUICtrlSetData($txtVideos, $videonum)
  103.     GUICtrlSetData($txtHistory, UBound($collected))
  104. EndFunc  ;==>Status
  105.  
  106. Func _ArrayParse($str, $before, $after)
  107.     Return StringRegExp($str, "(?i)" & $before & "(.*?)" & $after, 3)
  108. EndFunc  ;==>_ArrayParse
  109.  
  110. Func AddURL($url)
  111.     If Not WasCollected($url) Then
  112.         _ArrayAdd($collected, $url)
  113.     ;_ArrayAdd($urls, $url)
  114.         FileWriteLine("spider.urls.txt", $url)
  115.         $urlnum = $urlnum + 1
  116.     EndIf
  117. EndFunc  ;==>AddURL
  118.  
  119. Func WasCollected($url)
  120.     $return = False
  121.     For $i = 1 To Ubound($collected) - 1 Step 1
  122.         If $collected[$i] == $url Then
  123.             $return = True
  124.             ExitLoop
  125.         EndIf
  126.     Next
  127.     If Not $return And UBound($collected) >= 1024 Then _ArrayDelete($collected, 1)
  128.     Return $return
  129. EndFunc  ;==>WasCollected
  130.  
  131. Func GetURI($url)
  132.     $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
  133.     $turl = StringMid($url, StringLen($uri) + 1)
  134.     If StringInStr($turl, "?") Then
  135.         $temp = StringSplit($turl, "?")
  136.         $turl = $temp[1]
  137.         $temp = StringSplit($turl, "/")
  138.         $uri = $uri & $temp[1] & "/"
  139.         For $i = 2 To UBound($temp) - 1 Step 1
  140.             If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  141.             $uri = $uri & $temp[$i] & "/"
  142.         Next
  143.         If Not InetGetSize(StringLeft($uri, StringLen($uri) - 1)) Then
  144.             $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
  145.             $temp = StringSplit($turl, "?")
  146.             $turl = $temp[1]
  147.             $temp = StringSplit($turl, "/")
  148.             $uri = $uri & $temp[1] & "/"
  149.             For $i = 2 To UBound($temp) - 2 Step 1
  150.                 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  151.                 $uri = $uri & $temp[$i] & "/"
  152.             Next
  153.         EndIf
  154.     Else
  155.         $temp = StringSplit($turl, "/")
  156.         $uri = $uri & $temp[1] & "/"
  157.         For $i = 2 To UBound($temp) - 1 Step 1
  158.             If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  159.             $uri = $uri & $temp[$i] & "/"
  160.         Next
  161.     EndIf
  162.  
  163.     Return $uri
  164. EndFunc  ;==>GetURI
  165.  
  166. Func GetURLs($url)
  167.     $uri = GetURI($url)
  168.  
  169.     $file = "spider.html.txt"
  170.     Status("Downloading", $url, 0)
  171.     $filesize = InetGetSize($url)
  172.     $lastsize = 0
  173.     $strikes = 0
  174.     InetGet($url, $file, 1, 0)
  175.     $html = FileRead($file, FileGetSize($file))
  176.     FileDelete($file)
  177.  
  178.     Status("Parsing URLs", $url, 0)
  179.     $tags = _ArrayParse($html, "<a", ">")
  180.     For $i = 0 To UBound($tags) - 1 Step 1
  181.         Status("Checking <A> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  182.         CheckURL($uri, $tags[$i], $url)
  183.     Next
  184.     $tags = _ArrayParse($html, "<param", ">")
  185.     For $i = 0 To UBound($tags) - 1 Step 1
  186.         Status("Checking <PARAM> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  187.         CheckURL($uri, $tags[$i], $url)
  188.     Next
  189.     $tags = _ArrayParse($html, "<embed", ">")
  190.     For $i = 0 To UBound($tags) - 1 Step 1
  191.         Status("Checking <EMBED> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  192.         CheckURL($uri, $tags[$i], $url)
  193.     Next
  194. EndFunc  ;==>GetURLs
  195.  
  196. Func CheckURL($uri, $str, $ref)
  197.     If StringInStr($str, "href=") Then
  198.         $turl = GetAttr($str, "href=")
  199.         If Not StringInStr(StringLeft($turl, 10), "://") Then
  200.             If StringLeft($turl, 1) == "/" Then
  201.                 $turl = $uri & StringMid($turl, 2)
  202.             Else
  203.                 $turl = $uri & $turl
  204.             EndIf
  205.         EndIf
  206.         CheckType($turl, $ref)
  207.     EndIf
  208.     If StringInStr($str, "src=") Then
  209.         $turl = GetAttr($str, "src=")
  210.         If Not StringInStr(StringLeft($turl, 10), "://") Then
  211.             If StringLeft($turl, 1) == "/" Then
  212.                 $turl = $uri & StringMid($turl, 2)
  213.             Else
  214.                 $turl = $uri & $turl
  215.             EndIf
  216.         EndIf
  217.         CheckType($turl, $ref)
  218.     EndIf
  219. EndFunc  ;==>CheckURL
  220.  
  221. Func GetAttr($str, $attr)
  222.     If StringInStr($str, $attr & '"') Then
  223.         $temp = _ArrayParse($str, $attr & '"', '"')
  224.         If UBound($temp) == 1 Then Return $temp[0]
  225.     ElseIf StringInStr($str, $attr & "'") Then
  226.         $temp = _ArrayParse($str, $attr & "'", "'")
  227.         If UBound($temp) == 1 Then Return $temp[0]
  228.     ElseIf StringInStr($str, $attr) Then
  229.         $temp = StringMid($str, StringInStr($str, $attr) + StringLen($attr))
  230.         If StringInStr($temp, " ") Then
  231.             $temp = StringMid($temp, 1, StringInStr($temp, " ") - 1)
  232.         EndIf
  233.         Return $temp
  234.     EndIf
  235. EndFunc  ;==>GetAttr
  236.  
  237. Func CheckType($url, $ref)
  238.  
  239.     If StringRight($url, 4) == ".swf" Or _
  240.             StringRight($url, 4) == ".dcr" then
  241. _addflash($url)
  242. AddURL($url)
  243. $videonum += 1
  244.     ;Do Nothing
  245.             Else
  246.     AddURL($url)
  247.     EndIf
  248. EndFunc  ;==>CheckType
  249.  
  250. Func _addflash($url)
  251.         FileWriteLine(@ScriptDir&"\flash.swf.txt", $url)
  252. EndFunc
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement