Advertisement
Guest User

SWF-rip.au3

a guest
Aug 12th, 2019
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ; ----------------------------------------------------------------------------
  2. ;
  3. ; AutoIt Version: 3.1.1.87
  4. ; Author:        AcidicChip <acidicchip@acidicchip.com>
  5. ;
  6. ; Script Name:  Web Media Spider
  7. ; Script Version: 0.21
  8. ;
  9. ; Script Function:
  10. ;   Spider the web and gather media file URLs
  11. ;
  12. ; ----------------------------------------------------------------------------
  13.  
  14. Opt("GUIOnEventMode", 1)
  15. Opt("TrayIconDebug", 1)
  16.  
  17. #include <Array.au3>
  18. #include <GUIConstants.au3>
  19. #include <GUIConstantsEx.au3>
  20. #include <IE.au3>
  21. #include <WindowsConstants.au3>
  22.  
  23. Dim $collected[1]
  24. Dim $urls[1]
  25. Dim $urlon = 0
  26. Dim $urlnum = 0
  27. Dim $imagenum = 0
  28. Dim $audionum = 0
  29. Dim $videonum = 0
  30.  
  31. #Region "GUI"
  32. Local $oIE = _IECreate()
  33. GUICreate("Media Spider", 600, 100)
  34. $lblAction = GUICtrlCreateLabel("Action:", 0, 3, 35, 20)
  35. $txtAction = GUICtrlCreateInput("", 40, 0, 560, 20)
  36. GUICtrlSetState($txtAction, $GUI_DISABLE)
  37. $lblURL = GUICtrlCreateLabel("URL:", 0, 23, 35, 20)
  38. $txtURL = GUICtrlCreateInput("", 40, 20, 560, 20)
  39. GUICtrlSetState($txtURL, $GUI_DISABLE)
  40. $prgPercent = GUICtrlCreateProgress(0, 40, 560, 20)
  41. $txtPercent = GUICtrlCreateInput("0%", 560, 40, 40, 20)
  42. GUICtrlSetState($txtPercent, $GUI_DISABLE)
  43. $lblURLs = GUICtrlCreateLabel("URLs:", 0, 63, 35, 20)
  44. $txtURLs = GUICtrlCreateInput("0", 40, 60, 75, 20)
  45. GUICtrlSetState($txtURLs, $GUI_DISABLE)
  46. $lblAudio = GUICtrlCreateLabel("Audio:", 125, 63, 35, 20)
  47. $txtAudio = GUICtrlCreateInput("0", 160, 60, 75, 20)
  48. GUICtrlSetState($txtAudio, $GUI_DISABLE)
  49. $lblImages = GUICtrlCreateLabel("Images:", 245, 63, 36, 20)
  50. $txtImages = GUICtrlCreateInput("0", 285, 60, 75, 20)
  51. GUICtrlSetState($txtImages, $GUI_DISABLE)
  52. $lblVideos = GUICtrlCreateLabel("Videos:", 370, 63, 35, 20)
  53. $txtVideos = GUICtrlCreateInput("0", 410, 60, 75, 20)
  54. GUICtrlSetState($txtVideos, $GUI_DISABLE)
  55. $lblHistory = GUICtrlCreateLabel("History:", 490, 63, 35, 20)
  56. $txtHistory = GUICtrlCreateInput("0", 530, 60, 75, 20)
  57. GUICtrlSetState($txtHistory, $GUI_DISABLE)
  58. $lblStartURL = GUICtrlCreateLabel("Start URL:", 0, 83, 50, 20)
  59. $txtStartURL = GUICtrlCreateInput("http://www.flashgames555.com", 55, 80, 490, 20)
  60. $btnStartStop = GUICtrlCreateButton("Start", 550, 80, 50, 20)
  61. GUISetState(@SW_SHOW)
  62.  
  63. GUISetOnEvent($GUI_EVENT_CLOSE, "GUIClose")
  64. GUICtrlSetOnEvent($btnStartStop, "GUIStartStop")
  65. #EndRegion "GUI"
  66.  
  67. Func GUIClose()
  68.     Exit
  69. EndFunc   ;==>GUIClose
  70.  
  71. Func GUIStartStop()
  72.     If GUICtrlRead($btnStartStop) == "Start" Then
  73.         GUICtrlSetData($btnStartStop, "Stop")
  74.         GUICtrlSetState($txtStartURL, $GUI_DISABLE)
  75.         FileDelete("spider.urls.txt")
  76.         GetURLs(GUICtrlRead($txtStartURL))
  77.         Do
  78.             ;$url = $urls[1]
  79.             $urlon = $urlon + 1
  80.             $url = FileReadLine("spider.urls.txt", $urlon)
  81.             ;_ArrayDelete($urls, 1)
  82.             $urlnum = $urlnum - 1
  83.             GetURLs($url)
  84.         Until $urlnum <= 0 Or GUICtrlRead($btnStartStop) == "Start"
  85.         ;Until UBound($urls) <= 1 Or GUICtrlRead($btnStartStop) == "Start"
  86.     Else
  87.         GUICtrlSetData($btnStartStop, "Start")
  88.         GUICtrlSetState($txtStartURL, $GUI_ENABLE)
  89.     EndIf
  90. EndFunc   ;==>GUIStartStop
  91.  
  92. While 1
  93.     Sleep(250)
  94. WEnd
  95.  
  96. Func Status($action, $url, $percent)
  97.     GUICtrlSetData($txtAction, $action)
  98.     If $url <> "" Then GUICtrlSetData($txtURL, $url)
  99.     GUICtrlSetData($prgPercent, $percent)
  100.     GUICtrlSetData($txtPercent, $percent & "%")
  101.  
  102.     GUICtrlSetData($txtURLs, $urlnum)
  103.     ;GUICtrlSetData($txtURLs, UBound($urls))
  104.     GUICtrlSetData($txtAudio, $audionum)
  105.     GUICtrlSetData($txtImages, $imagenum)
  106.     GUICtrlSetData($txtVideos, $videonum)
  107.     GUICtrlSetData($txtHistory, UBound($collected))
  108. EndFunc   ;==>Status
  109.  
  110. Func _ArrayParse($str, $before, $after)
  111.     Return StringRegExp($str, "(?i)" & $before & "(.*?)" & $after, 3)
  112. EndFunc   ;==>_ArrayParse
  113.  
  114. Func AddURL($url)
  115.     If Not WasCollected($url) Then
  116.         _ArrayAdd($collected, $url)
  117.         ;_ArrayAdd($urls, $url)
  118.         FileWriteLine("spider.urls.txt", $url)
  119.         $urlnum = $urlnum + 1
  120.     EndIf
  121. EndFunc   ;==>AddURL
  122.  
  123. Func WasCollected($url)
  124.     $return = False
  125.     For $i = 1 To UBound($collected) - 1 Step 1
  126.         If $collected[$i] == $url Then
  127.             $return = True
  128.             ExitLoop
  129.         EndIf
  130.     Next
  131.     If Not $return And UBound($collected) >= 1024 Then _ArrayDelete($collected, 1)
  132.     Return $return
  133. EndFunc   ;==>WasCollected
  134.  
  135. Func GetURI($url)
  136.     $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
  137.     $turl = StringMid($url, StringLen($uri) + 1)
  138.     If StringInStr($turl, "?") Then
  139.         $temp = StringSplit($turl, "?")
  140.         $turl = $temp[1]
  141.         $temp = StringSplit($turl, "/")
  142.         $uri = $uri & $temp[1] & "/"
  143.         For $i = 2 To UBound($temp) - 1 Step 1
  144.             If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  145.             $uri = $uri & $temp[$i] & "/"
  146.         Next
  147.         If Not InetGetSize(StringLeft($uri, StringLen($uri) - 1)) Then
  148.             $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
  149.             $temp = StringSplit($turl, "?")
  150.             $turl = $temp[1]
  151.             $temp = StringSplit($turl, "/")
  152.             $uri = $uri & $temp[1] & "/"
  153.             For $i = 2 To UBound($temp) - 2 Step 1
  154.                 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  155.                 $uri = $uri & $temp[$i] & "/"
  156.             Next
  157.         EndIf
  158.     Else
  159.         $temp = StringSplit($turl, "/")
  160.         $uri = $uri & $temp[1] & "/"
  161.         For $i = 2 To UBound($temp) - 1 Step 1
  162.             If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
  163.             $uri = $uri & $temp[$i] & "/"
  164.         Next
  165.     EndIf
  166.  
  167.     Return $uri
  168. EndFunc   ;==>GetURI
  169.  
  170. Func GetURLs($url)
  171.     $uri = GetURI($url)
  172.  
  173.     $file = "spider.html.txt"
  174.     Status("Downloading", $url, 0)
  175.     $filesize = InetGetSize($url)
  176.     $lastsize = 0
  177.     $strikes = 0
  178.     InetGet($url, $file, 0, 0)
  179.     $html = FileRead($file, FileGetSize($file))
  180.     FileDelete($file)
  181.  
  182.     Status("Parsing URLs", $url, 0)
  183.     $tags = _ArrayParse($html, "<a", ">")
  184.     For $i = 0 To UBound($tags) - 1 Step 1
  185.         Status("Checking <A> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  186.         CheckURL($uri, $tags[$i], $url)
  187.     Next
  188.     $tags = _ArrayParse($html, "<img", ">")
  189.     For $i = 0 To UBound($tags) - 1 Step 1
  190.         Status("Checking <IMG> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  191.         CheckURL($uri, $tags[$i], $url)
  192.     Next
  193.     $tags = _ArrayParse($html, "<embed", ">")
  194.     For $i = 0 To UBound($tags) - 1 Step 1
  195.         Status("Checking <EMBED> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
  196.         CheckURL($uri, $tags[$i], $url)
  197.     Next
  198. EndFunc   ;==>GetURLs
  199.  
  200. Func CheckURL($uri, $str, $ref)
  201.     If StringInStr($str, "href=") Then
  202.         $turl = GetAttr($str, "href=")
  203.         If Not StringInStr(StringLeft($turl, 10), "://") Then
  204.             If StringLeft($turl, 1) == "/" Then
  205.                 $turl = $uri & StringMid($turl, 2)
  206.             Else
  207.                 $turl = $uri & $turl
  208.             EndIf
  209.         EndIf
  210.         CheckType($turl, $ref)
  211.     EndIf
  212.     If StringInStr($str, "src=") Then
  213.         $turl = GetAttr($str, "src=")
  214.         If Not StringInStr(StringLeft($turl, 10), "://") Then
  215.             If StringLeft($turl, 1) == "/" Then
  216.                 $turl = $uri & StringMid($turl, 2)
  217.             Else
  218.                 $turl = $uri & $turl
  219.             EndIf
  220.         EndIf
  221.         CheckType($turl, $ref)
  222.     EndIf
  223. EndFunc   ;==>CheckURL
  224.  
  225. Func GetAttr($str, $attr)
  226.     If StringInStr($str, $attr & '"') Then
  227.         $temp = _ArrayParse($str, $attr & '"', '"')
  228.         If UBound($temp) == 1 Then Return $temp[0]
  229.     ElseIf StringInStr($str, $attr & "'") Then
  230.         $temp = _ArrayParse($str, $attr & "'", "'")
  231.         If UBound($temp) == 1 Then Return $temp[0]
  232.     ElseIf StringInStr($str, $attr) Then
  233.         $temp = StringMid($str, StringInStr($str, $attr) + StringLen($attr))
  234.         If StringInStr($temp, " ") Then
  235.             $temp = StringMid($temp, 1, StringInStr($temp, " ") - 1)
  236.         EndIf
  237.         Return $temp
  238.     EndIf
  239. EndFunc   ;==>GetAttr
  240.  
  241. Func CheckType($url, $ref)
  242.     $repeat = StringSplit($url, "/")
  243.     ;$url = StringRegExpReplace($url, "("&$repeat[2]&"/"&$repeat[2]&")", $repeat[2])\
  244.     $url = StringRegExpReplace($url, "("&$repeat[4]&"/"&$repeat[4]&")", $repeat[4])
  245.  
  246.     If StringRight($url, 5) == ".html" Or StringRight($url, 4) == ".htm" Then
  247.         _IENavigate($oIE, $url)
  248.         ConsoleWrite($url & @CRLF)
  249.         _IELoadWait($oIE)
  250.         $link = _IETagNameGetCollection($oIE, "embed")
  251.         For $oElement In $link
  252.             $filename = StringSplit($oElement.src, "/")
  253.             If (FileExists(@ScriptDir & "\swf\" & $filename[$filename[0]]) == 0) Then
  254.                 ConsoleWrite("Downloading " & $oElement.src & " to " & @ScriptDir & "\swf\" & $filename[$filename[0]] & @CRLF)
  255.                 If $oElement.src Then InetGet($oElement.src, @ScriptDir & "\swf\" & $filename[$filename[0]], 0)
  256.                 $videonum += 1
  257.             EndIf
  258.  
  259.         Next
  260.     EndIf
  261.     AddURL(GetURI($url))
  262.  
  263. EndFunc   ;==>CheckType
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement