Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ; ----------------------------------------------------------------------------
- ;
- ; AutoIt Version: 3.1.1.87
- ; Author: AcidicChip <acidicchip@acidicchip.com>
- ;
- ; Script Name: Web Media Spider
- ; Script Version: 0.21
- ;
- ; Script Function:
- ; Spider the web and gather media file URLs
- ;
- ; ----------------------------------------------------------------------------
- Opt("GUIOnEventMode", 1)
- Opt("TrayIconDebug", 1)
- #include <Array.au3>
- #include <GUIConstants.au3>
- #include <GUIConstantsEx.au3>
- #include <IE.au3>
- #include <WindowsConstants.au3>
- Dim $collected[1]
- Dim $urls[1]
- Dim $urlon = 0
- Dim $urlnum = 0
- Dim $imagenum = 0
- Dim $audionum = 0
- Dim $videonum = 0
- #region "GUI"
- Local $oIE = _IECreate()
- GUICreate("Media Spider", 600, 100)
- $lblAction = GUICtrlCreateLabel("Action:", 0, 3, 35, 20)
- $txtAction = GUICtrlCreateInput("", 40, 0, 560, 20)
- GUICtrlSetState($txtAction, $GUI_DISABLE)
- $lblURL = GUICtrlCreateLabel("URL:", 0, 23, 35, 20)
- $txtURL = GUICtrlCreateInput("", 40, 20, 560, 20)
- GUICtrlSetState($txtURL, $GUI_DISABLE)
- $prgPercent = GUICtrlCreateProgress(0, 40, 560, 20)
- $txtPercent = GUICtrlCreateInput("0%", 560, 40, 40, 20)
- GUICtrlSetState($txtPercent, $GUI_DISABLE)
- $lblURLs = GUICtrlCreateLabel("URLs:", 0, 63, 35, 20)
- $txtURLs = GUICtrlCreateInput("0", 40, 60, 75, 20)
- GUICtrlSetState($txtURLs, $GUI_DISABLE)
- $lblAudio = GUICtrlCreateLabel("Audio:", 125, 63, 35, 20)
- $txtAudio = GUICtrlCreateInput("0", 160, 60, 75, 20)
- GUICtrlSetState($txtAudio, $GUI_DISABLE)
- $lblImages = GUICtrlCreateLabel("Images:", 245, 63, 36, 20)
- $txtImages = GUICtrlCreateInput("0", 285, 60, 75, 20)
- GUICtrlSetState($txtImages, $GUI_DISABLE)
- $lblVideos = GUICtrlCreateLabel("Videos:", 370, 63, 35, 20)
- $txtVideos = GUICtrlCreateInput("0", 410, 60, 75, 20)
- GUICtrlSetState($txtVideos, $GUI_DISABLE)
- $lblHistory = GUICtrlCreateLabel("History:", 490, 63, 35, 20)
- $txtHistory = GUICtrlCreateInput("0", 530, 60, 75, 20)
- GUICtrlSetState($txtHistory, $GUI_DISABLE)
- $lblStartURL = GUICtrlCreateLabel("Start URL:", 0, 83, 50, 20)
- $txtStartURL = GUICtrlCreateInput("http://www.flashgames555.com", 55, 80, 490, 20)
- $btnStartStop = GUICtrlCreateButton("Start", 550, 80, 50, 20)
- GUISetState(@SW_SHOW)
- GUISetOnEvent($GUI_EVENT_CLOSE, "GUIClose")
- GUICtrlSetOnEvent($btnStartStop, "GUIStartStop")
- #endregion "GUI"
- Func GUIClose()
- Exit
- EndFunc ;==>GUIClose
- Func GUIStartStop()
- If GUICtrlRead($btnStartStop) == "Start" Then
- GUICtrlSetData($btnStartStop, "Stop")
- GUICtrlSetState($txtStartURL, $GUI_DISABLE)
- FileDelete("spider.urls.txt")
- GetURLs(GUICtrlRead($txtStartURL))
- Do
- ;$url = $urls[1]
- $urlon = $urlon + 1
- $url = FileReadLine("spider.urls.txt", $urlon)
- ;_ArrayDelete($urls, 1)
- $urlnum = $urlnum - 1
- GetURLs($url)
- Until $urlnum <= 0 Or GUICtrlRead($btnStartStop) == "Start"
- ;Until UBound($urls) <= 1 Or GUICtrlRead($btnStartStop) == "Start"
- Else
- GUICtrlSetData($btnStartStop, "Start")
- GUICtrlSetState($txtStartURL, $GUI_ENABLE)
- EndIf
- EndFunc ;==>GUIStartStop
- While 1
- Sleep(250)
- Wend
- Func Status($action, $url, $percent)
- GUICtrlSetData($txtAction, $action)
- If $url <> "" Then GUICtrlSetData($txtURL, $url)
- GUICtrlSetData($prgPercent, $percent)
- GUICtrlSetData($txtPercent, $percent & "%")
- GUICtrlSetData($txtURLs, $urlnum)
- ;GUICtrlSetData($txtURLs, UBound($urls))
- GUICtrlSetData($txtAudio, $audionum)
- GUICtrlSetData($txtImages, $imagenum)
- GUICtrlSetData($txtVideos, $videonum)
- GUICtrlSetData($txtHistory, UBound($collected))
- EndFunc ;==>Status
- Func _ArrayParse($str, $before, $after)
- Return StringRegExp($str, "(?i)" & $before & "(.*?)" & $after, 3)
- EndFunc ;==>_ArrayParse
- Func AddURL($url)
- If Not WasCollected($url) Then
- _ArrayAdd($collected, $url)
- ;_ArrayAdd($urls, $url)
- FileWriteLine("spider.urls.txt", $url)
- $urlnum = $urlnum + 1
- EndIf
- EndFunc ;==>AddURL
- Func WasCollected($url)
- $return = False
- For $i = 1 To Ubound($collected) - 1 Step 1
- If $collected[$i] == $url Then
- $return = True
- ExitLoop
- EndIf
- Next
- If Not $return And UBound($collected) >= 1024 Then _ArrayDelete($collected, 1)
- Return $return
- EndFunc ;==>WasCollected
- Func GetURI($url)
- $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
- $turl = StringMid($url, StringLen($uri) + 1)
- If StringInStr($turl, "?") Then
- $temp = StringSplit($turl, "?")
- $turl = $temp[1]
- $temp = StringSplit($turl, "/")
- $uri = $uri & $temp[1] & "/"
- For $i = 2 To UBound($temp) - 1 Step 1
- If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
- $uri = $uri & $temp[$i] & "/"
- Next
- If Not InetGetSize(StringLeft($uri, StringLen($uri) - 1)) Then
- $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
- $temp = StringSplit($turl, "?")
- $turl = $temp[1]
- $temp = StringSplit($turl, "/")
- $uri = $uri & $temp[1] & "/"
- For $i = 2 To UBound($temp) - 2 Step 1
- If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
- $uri = $uri & $temp[$i] & "/"
- Next
- EndIf
- Else
- $temp = StringSplit($turl, "/")
- $uri = $uri & $temp[1] & "/"
- For $i = 2 To UBound($temp) - 1 Step 1
- If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
- $uri = $uri & $temp[$i] & "/"
- Next
- EndIf
- Return $uri
- EndFunc ;==>GetURI
- Func GetURLs($url)
- $uri = GetURI($url)
- $file = "spider.html.txt"
- Status("Downloading", $url, 0)
- $filesize = InetGetSize($url)
- $lastsize = 0
- $strikes = 0
- InetGet($url, $file, 0, 0)
- $html = FileRead($file, FileGetSize($file))
- FileDelete($file)
- Status("Parsing URLs", $url, 0)
- $tags = _ArrayParse($html, "<a", ">")
- For $i = 0 To UBound($tags) - 1 Step 1
- Status("Checking <A> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
- CheckURL($uri, $tags[$i], $url)
- Next
- $tags = _ArrayParse($html, "<img", ">")
- For $i = 0 To UBound($tags) - 1 Step 1
- Status("Checking <IMG> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
- CheckURL($uri, $tags[$i], $url)
- Next
- $tags = _ArrayParse($html, "<embed", ">")
- For $i = 0 To UBound($tags) - 1 Step 1
- Status("Checking <EMBED> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
- CheckURL($uri, $tags[$i], $url)
- Next
- EndFunc ;==>GetURLs
- Func CheckURL($uri, $str, $ref)
- If StringInStr($str, "href=") Then
- $turl = GetAttr($str, "href=")
- If Not StringInStr(StringLeft($turl, 10), "://") Then
- If StringLeft($turl, 1) == "/" Then
- $turl = $uri & StringMid($turl, 2)
- Else
- $turl = $uri & $turl
- EndIf
- EndIf
- CheckType($turl, $ref)
- EndIf
- If StringInStr($str, "src=") Then
- $turl = GetAttr($str, "src=")
- If Not StringInStr(StringLeft($turl, 10), "://") Then
- If StringLeft($turl, 1) == "/" Then
- $turl = $uri & StringMid($turl, 2)
- Else
- $turl = $uri & $turl
- EndIf
- EndIf
- CheckType($turl, $ref)
- EndIf
- EndFunc ;==>CheckURL
- Func GetAttr($str, $attr)
- If StringInStr($str, $attr & '"') Then
- $temp = _ArrayParse($str, $attr & '"', '"')
- If UBound($temp) == 1 Then Return $temp[0]
- ElseIf StringInStr($str, $attr & "'") Then
- $temp = _ArrayParse($str, $attr & "'", "'")
- If UBound($temp) == 1 Then Return $temp[0]
- ElseIf StringInStr($str, $attr) Then
- $temp = StringMid($str, StringInStr($str, $attr) + StringLen($attr))
- If StringInStr($temp, " ") Then
- $temp = StringMid($temp, 1, StringInStr($temp, " ") - 1)
- EndIf
- Return $temp
- EndIf
- EndFunc ;==>GetAttr
- Func CheckType($url, $ref)
- if stringright($url, 5) == ".html" or StringRight($url, 4) == ".htm" Then
- _IENavigate($oIE, $url)
- ConsoleWrite($url&@CRLF)
- _IELoadWait($oIE)
- $link = _IETagNameGetCollection($oIE, "embed")
- For $oElement In $link
- $filename = StringSplit($oElement.src, "/")
- if (FileExists(@ScriptDir&"\swf\"&$filename[$filename[0]]) == 0) Then
- ConsoleWrite("Downloading "&$oElement.src& " to "& @ScriptDir&"\swf\"&$filename[$filename[0]]&@CRLF)
- If $oElement.src Then InetGet($oElement.src, @ScriptDir&"\swf\"&$filename[$filename[0]], 0)
- EndIf
- Next
- EndIf
- AddURL(GetURI($url))
- EndFunc ;==>CheckType
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement