View difference between Paste ID: 3wZ2HsEw and cNza7DNW
SHOW: | | - or go back to the newest paste.
1
; ----------------------------------------------------------------------------
2
;
3
; AutoIt Version: 3.1.1.87
4
; Author:        AcidicChip <[email protected]>
5
;
6
; Script Name:  Web Media Spider
7
; Script Version: 0.21
8
;
9
; Script Function:
10
;   Spider the web and gather media file URLs
11
;
12
; ----------------------------------------------------------------------------
13
14
Opt("GUIOnEventMode", 1)
15
Opt("TrayIconDebug", 1)
16
17
#include <Array.au3>
18
#include <GUIConstants.au3>
19
#include <GUIConstantsEx.au3>
20
#include <IE.au3>
21
#include <WindowsConstants.au3>
22
23
Dim $collected[1]
24
Dim $urls[1]
25
Dim $urlon = 0
26
Dim $urlnum = 0
27
Dim $imagenum = 0
28
Dim $audionum = 0
29
Dim $videonum = 0
30
31-
#region "GUI"
31+
#Region "GUI"
32
Local $oIE = _IECreate()
33
GUICreate("Media Spider", 600, 100)
34
$lblAction = GUICtrlCreateLabel("Action:", 0, 3, 35, 20)
35
$txtAction = GUICtrlCreateInput("", 40, 0, 560, 20)
36
GUICtrlSetState($txtAction, $GUI_DISABLE)
37
$lblURL = GUICtrlCreateLabel("URL:", 0, 23, 35, 20)
38
$txtURL = GUICtrlCreateInput("", 40, 20, 560, 20)
39
GUICtrlSetState($txtURL, $GUI_DISABLE)
40
$prgPercent = GUICtrlCreateProgress(0, 40, 560, 20)
41
$txtPercent = GUICtrlCreateInput("0%", 560, 40, 40, 20)
42
GUICtrlSetState($txtPercent, $GUI_DISABLE)
43
$lblURLs = GUICtrlCreateLabel("URLs:", 0, 63, 35, 20)
44
$txtURLs = GUICtrlCreateInput("0", 40, 60, 75, 20)
45
GUICtrlSetState($txtURLs, $GUI_DISABLE)
46
$lblAudio = GUICtrlCreateLabel("Audio:", 125, 63, 35, 20)
47
$txtAudio = GUICtrlCreateInput("0", 160, 60, 75, 20)
48
GUICtrlSetState($txtAudio, $GUI_DISABLE)
49
$lblImages = GUICtrlCreateLabel("Images:", 245, 63, 36, 20)
50
$txtImages = GUICtrlCreateInput("0", 285, 60, 75, 20)
51
GUICtrlSetState($txtImages, $GUI_DISABLE)
52
$lblVideos = GUICtrlCreateLabel("Videos:", 370, 63, 35, 20)
53
$txtVideos = GUICtrlCreateInput("0", 410, 60, 75, 20)
54
GUICtrlSetState($txtVideos, $GUI_DISABLE)
55
$lblHistory = GUICtrlCreateLabel("History:", 490, 63, 35, 20)
56
$txtHistory = GUICtrlCreateInput("0", 530, 60, 75, 20)
57
GUICtrlSetState($txtHistory, $GUI_DISABLE)
58
$lblStartURL = GUICtrlCreateLabel("Start URL:", 0, 83, 50, 20)
59
$txtStartURL = GUICtrlCreateInput("http://www.flashgames555.com", 55, 80, 490, 20)
60
$btnStartStop = GUICtrlCreateButton("Start", 550, 80, 50, 20)
61
GUISetState(@SW_SHOW)
62
63
GUISetOnEvent($GUI_EVENT_CLOSE, "GUIClose")
64
GUICtrlSetOnEvent($btnStartStop, "GUIStartStop")
65-
#endregion "GUI"
65+
#EndRegion "GUI"
66
67
Func GUIClose()
68-
    Exit
68+
	Exit
69-
EndFunc  ;==>GUIClose
69+
EndFunc   ;==>GUIClose
70
71
Func GUIStartStop()
72-
    If GUICtrlRead($btnStartStop) == "Start" Then
72+
	If GUICtrlRead($btnStartStop) == "Start" Then
73-
        GUICtrlSetData($btnStartStop, "Stop")
73+
		GUICtrlSetData($btnStartStop, "Stop")
74-
        GUICtrlSetState($txtStartURL, $GUI_DISABLE)
74+
		GUICtrlSetState($txtStartURL, $GUI_DISABLE)
75-
        FileDelete("spider.urls.txt")
75+
		FileDelete("spider.urls.txt")
76-
        GetURLs(GUICtrlRead($txtStartURL))
76+
		GetURLs(GUICtrlRead($txtStartURL))
77-
        Do
77+
		Do
78-
        ;$url = $urls[1]
78+
			;$url = $urls[1]
79-
            $urlon = $urlon + 1
79+
			$urlon = $urlon + 1
80-
            $url = FileReadLine("spider.urls.txt", $urlon)
80+
			$url = FileReadLine("spider.urls.txt", $urlon)
81-
        ;_ArrayDelete($urls, 1)
81+
			;_ArrayDelete($urls, 1)
82-
            $urlnum = $urlnum - 1
82+
			$urlnum = $urlnum - 1
83-
            GetURLs($url)
83+
			GetURLs($url)
84-
        Until $urlnum <= 0 Or GUICtrlRead($btnStartStop) == "Start"
84+
		Until $urlnum <= 0 Or GUICtrlRead($btnStartStop) == "Start"
85-
    ;Until UBound($urls) <= 1 Or GUICtrlRead($btnStartStop) == "Start"
85+
		;Until UBound($urls) <= 1 Or GUICtrlRead($btnStartStop) == "Start"
86-
    Else
86+
	Else
87-
        GUICtrlSetData($btnStartStop, "Start")
87+
		GUICtrlSetData($btnStartStop, "Start")
88-
        GUICtrlSetState($txtStartURL, $GUI_ENABLE)
88+
		GUICtrlSetState($txtStartURL, $GUI_ENABLE)
89-
    EndIf
89+
90-
EndFunc  ;==>GUIStartStop
90+
EndFunc   ;==>GUIStartStop
91
92
While 1
93-
    Sleep(250)
93+
	Sleep(250)
94-
Wend
94+
WEnd
95
96
Func Status($action, $url, $percent)
97-
    GUICtrlSetData($txtAction, $action)
97+
	GUICtrlSetData($txtAction, $action)
98-
    If $url <> "" Then GUICtrlSetData($txtURL, $url)
98+
	If $url <> "" Then GUICtrlSetData($txtURL, $url)
99-
    GUICtrlSetData($prgPercent, $percent)
99+
	GUICtrlSetData($prgPercent, $percent)
100-
    GUICtrlSetData($txtPercent, $percent & "%")
100+
	GUICtrlSetData($txtPercent, $percent & "%")
101
102-
    GUICtrlSetData($txtURLs, $urlnum)
102+
	GUICtrlSetData($txtURLs, $urlnum)
103-
;GUICtrlSetData($txtURLs, UBound($urls))
103+
	;GUICtrlSetData($txtURLs, UBound($urls))
104-
    GUICtrlSetData($txtAudio, $audionum)
104+
	GUICtrlSetData($txtAudio, $audionum)
105-
    GUICtrlSetData($txtImages, $imagenum)
105+
	GUICtrlSetData($txtImages, $imagenum)
106-
    GUICtrlSetData($txtVideos, $videonum)
106+
	GUICtrlSetData($txtVideos, $videonum)
107-
    GUICtrlSetData($txtHistory, UBound($collected))
107+
	GUICtrlSetData($txtHistory, UBound($collected))
108-
EndFunc  ;==>Status
108+
EndFunc   ;==>Status
109
110
Func _ArrayParse($str, $before, $after)
111-
    Return StringRegExp($str, "(?i)" & $before & "(.*?)" & $after, 3)
111+
	Return StringRegExp($str, "(?i)" & $before & "(.*?)" & $after, 3)
112-
EndFunc  ;==>_ArrayParse
112+
EndFunc   ;==>_ArrayParse
113
114
Func AddURL($url)
115-
    If Not WasCollected($url) Then
115+
	If Not WasCollected($url) Then
116-
        _ArrayAdd($collected, $url)
116+
		_ArrayAdd($collected, $url)
117-
    ;_ArrayAdd($urls, $url)
117+
		;_ArrayAdd($urls, $url)
118-
        FileWriteLine("spider.urls.txt", $url)
118+
		FileWriteLine("spider.urls.txt", $url)
119-
        $urlnum = $urlnum + 1
119+
		$urlnum = $urlnum + 1
120-
    EndIf
120+
121-
EndFunc  ;==>AddURL
121+
EndFunc   ;==>AddURL
122
123
Func WasCollected($url)
124-
    $return = False
124+
	$return = False
125-
    For $i = 1 To Ubound($collected) - 1 Step 1
125+
	For $i = 1 To UBound($collected) - 1 Step 1
126-
        If $collected[$i] == $url Then
126+
		If $collected[$i] == $url Then
127-
            $return = True
127+
			$return = True
128-
            ExitLoop
128+
			ExitLoop
129-
        EndIf
129+
		EndIf
130-
    Next
130+
	Next
131-
    If Not $return And UBound($collected) >= 1024 Then _ArrayDelete($collected, 1)
131+
	If Not $return And UBound($collected) >= 1024 Then _ArrayDelete($collected, 1)
132-
    Return $return
132+
	Return $return
133-
EndFunc  ;==>WasCollected
133+
EndFunc   ;==>WasCollected
134
135
Func GetURI($url)
136-
    $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
136+
	$uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
137-
    $turl = StringMid($url, StringLen($uri) + 1)
137+
	$turl = StringMid($url, StringLen($uri) + 1)
138-
    If StringInStr($turl, "?") Then
138+
	If StringInStr($turl, "?") Then
139-
        $temp = StringSplit($turl, "?")
139+
		$temp = StringSplit($turl, "?")
140-
        $turl = $temp[1]
140+
		$turl = $temp[1]
141-
        $temp = StringSplit($turl, "/")
141+
		$temp = StringSplit($turl, "/")
142-
        $uri = $uri & $temp[1] & "/"
142+
		$uri = $uri & $temp[1] & "/"
143-
        For $i = 2 To UBound($temp) - 1 Step 1
143+
		For $i = 2 To UBound($temp) - 1 Step 1
144-
            If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
144+
			If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
145-
            $uri = $uri & $temp[$i] & "/"
145+
			$uri = $uri & $temp[$i] & "/"
146-
        Next
146+
		Next
147-
        If Not InetGetSize(StringLeft($uri, StringLen($uri) - 1)) Then
147+
		If Not InetGetSize(StringLeft($uri, StringLen($uri) - 1)) Then
148-
            $uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
148+
			$uri = StringMid($url, 1, StringInStr($url, "://")) & "//"
149-
            $temp = StringSplit($turl, "?")
149+
			$temp = StringSplit($turl, "?")
150-
            $turl = $temp[1]
150+
			$turl = $temp[1]
151-
            $temp = StringSplit($turl, "/")
151+
			$temp = StringSplit($turl, "/")
152-
            $uri = $uri & $temp[1] & "/"
152+
			$uri = $uri & $temp[1] & "/"
153-
            For $i = 2 To UBound($temp) - 2 Step 1
153+
			For $i = 2 To UBound($temp) - 2 Step 1
154-
                If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
154+
				If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
155-
                $uri = $uri & $temp[$i] & "/"
155+
				$uri = $uri & $temp[$i] & "/"
156-
            Next
156+
			Next
157-
        EndIf
157+
		EndIf
158-
    Else
158+
	Else
159-
        $temp = StringSplit($turl, "/")
159+
		$temp = StringSplit($turl, "/")
160-
        $uri = $uri & $temp[1] & "/"
160+
		$uri = $uri & $temp[1] & "/"
161-
        For $i = 2 To UBound($temp) - 1 Step 1
161+
		For $i = 2 To UBound($temp) - 1 Step 1
162-
            If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
162+
			If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop
163-
            $uri = $uri & $temp[$i] & "/"
163+
			$uri = $uri & $temp[$i] & "/"
164-
        Next
164+
		Next
165-
    EndIf
165+
166
167-
    Return $uri
167+
	Return $uri
168-
EndFunc  ;==>GetURI
168+
EndFunc   ;==>GetURI
169
170
Func GetURLs($url)
171-
    $uri = GetURI($url)
171+
	$uri = GetURI($url)
172
173-
    $file = "spider.html.txt"
173+
	$file = "spider.html.txt"
174-
    Status("Downloading", $url, 0)
174+
	Status("Downloading", $url, 0)
175-
    $filesize = InetGetSize($url)
175+
	$filesize = InetGetSize($url)
176-
    $lastsize = 0
176+
	$lastsize = 0
177-
    $strikes = 0
177+
	$strikes = 0
178-
    InetGet($url, $file, 0, 0)
178+
	InetGet($url, $file, 0, 0)
179-
    $html = FileRead($file, FileGetSize($file))
179+
	$html = FileRead($file, FileGetSize($file))
180-
    FileDelete($file)
180+
	FileDelete($file)
181
182-
    Status("Parsing URLs", $url, 0)
182+
	Status("Parsing URLs", $url, 0)
183-
    $tags = _ArrayParse($html, "<a", ">")
183+
	$tags = _ArrayParse($html, "<a", ">")
184-
    For $i = 0 To UBound($tags) - 1 Step 1
184+
	For $i = 0 To UBound($tags) - 1 Step 1
185-
        Status("Checking <A> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
185+
		Status("Checking <A> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
186-
        CheckURL($uri, $tags[$i], $url)
186+
		CheckURL($uri, $tags[$i], $url)
187-
    Next
187+
	Next
188-
    $tags = _ArrayParse($html, "<img", ">")
188+
	$tags = _ArrayParse($html, "<img", ">")
189-
    For $i = 0 To UBound($tags) - 1 Step 1
189+
	For $i = 0 To UBound($tags) - 1 Step 1
190-
        Status("Checking <IMG> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
190+
		Status("Checking <IMG> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
191-
        CheckURL($uri, $tags[$i], $url)
191+
		CheckURL($uri, $tags[$i], $url)
192-
    Next
192+
	Next
193-
    $tags = _ArrayParse($html, "<embed", ">")
193+
	$tags = _ArrayParse($html, "<embed", ">")
194-
    For $i = 0 To UBound($tags) - 1 Step 1
194+
	For $i = 0 To UBound($tags) - 1 Step 1
195-
        Status("Checking <EMBED> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
195+
		Status("Checking <EMBED> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100))
196-
        CheckURL($uri, $tags[$i], $url)
196+
		CheckURL($uri, $tags[$i], $url)
197-
    Next
197+
	Next
198-
EndFunc  ;==>GetURLs
198+
EndFunc   ;==>GetURLs
199
200
Func CheckURL($uri, $str, $ref)
201-
    If StringInStr($str, "href=") Then
201+
	If StringInStr($str, "href=") Then
202-
        $turl = GetAttr($str, "href=")
202+
		$turl = GetAttr($str, "href=")
203-
        If Not StringInStr(StringLeft($turl, 10), "://") Then
203+
		If Not StringInStr(StringLeft($turl, 10), "://") Then
204-
            If StringLeft($turl, 1) == "/" Then
204+
			If StringLeft($turl, 1) == "/" Then
205-
                $turl = $uri & StringMid($turl, 2)
205+
				$turl = $uri & StringMid($turl, 2)
206-
            Else
206+
			Else
207-
                $turl = $uri & $turl
207+
				$turl = $uri & $turl
208-
            EndIf
208+
			EndIf
209-
        EndIf
209+
		EndIf
210-
        CheckType($turl, $ref)
210+
		CheckType($turl, $ref)
211-
    EndIf
211+
212-
    If StringInStr($str, "src=") Then
212+
	If StringInStr($str, "src=") Then
213-
        $turl = GetAttr($str, "src=")
213+
		$turl = GetAttr($str, "src=")
214-
        If Not StringInStr(StringLeft($turl, 10), "://") Then
214+
		If Not StringInStr(StringLeft($turl, 10), "://") Then
215-
            If StringLeft($turl, 1) == "/" Then
215+
			If StringLeft($turl, 1) == "/" Then
216-
                $turl = $uri & StringMid($turl, 2)
216+
				$turl = $uri & StringMid($turl, 2)
217-
            Else
217+
			Else
218-
                $turl = $uri & $turl
218+
				$turl = $uri & $turl
219-
            EndIf
219+
			EndIf
220-
        EndIf
220+
		EndIf
221-
        CheckType($turl, $ref)
221+
		CheckType($turl, $ref)
222-
    EndIf
222+
223-
EndFunc  ;==>CheckURL
223+
EndFunc   ;==>CheckURL
224
225
Func GetAttr($str, $attr)
226-
    If StringInStr($str, $attr & '"') Then
226+
	If StringInStr($str, $attr & '"') Then
227-
        $temp = _ArrayParse($str, $attr & '"', '"')
227+
		$temp = _ArrayParse($str, $attr & '"', '"')
228-
        If UBound($temp) == 1 Then Return $temp[0]
228+
		If UBound($temp) == 1 Then Return $temp[0]
229-
    ElseIf StringInStr($str, $attr & "'") Then
229+
	ElseIf StringInStr($str, $attr & "'") Then
230-
        $temp = _ArrayParse($str, $attr & "'", "'")
230+
		$temp = _ArrayParse($str, $attr & "'", "'")
231-
        If UBound($temp) == 1 Then Return $temp[0]
231+
		If UBound($temp) == 1 Then Return $temp[0]
232-
    ElseIf StringInStr($str, $attr) Then
232+
	ElseIf StringInStr($str, $attr) Then
233-
        $temp = StringMid($str, StringInStr($str, $attr) + StringLen($attr))
233+
		$temp = StringMid($str, StringInStr($str, $attr) + StringLen($attr))
234-
        If StringInStr($temp, " ") Then
234+
		If StringInStr($temp, " ") Then
235-
            $temp = StringMid($temp, 1, StringInStr($temp, " ") - 1)
235+
			$temp = StringMid($temp, 1, StringInStr($temp, " ") - 1)
236-
        EndIf
236+
		EndIf
237-
        Return $temp
237+
		Return $temp
238-
    EndIf
238+
239-
EndFunc  ;==>GetAttr
239+
EndFunc   ;==>GetAttr
240
241
Func CheckType($url, $ref)
242-
if stringright($url, 5) == ".html" or StringRight($url, 4) == ".htm" Then
242+
	$repeat = StringSplit($url, "/")
243-
_IENavigate($oIE, $url)
243+
	;$url = StringRegExpReplace($url, "("&$repeat[2]&"/"&$repeat[2]&")", $repeat[2])\
244-
ConsoleWrite($url&@CRLF)
244+
	$url = StringRegExpReplace($url, "("&$repeat[4]&"/"&$repeat[4]&")", $repeat[4])
245-
_IELoadWait($oIE)
245+
246-
$link = _IETagNameGetCollection($oIE, "embed")
246+
	If StringRight($url, 5) == ".html" Or StringRight($url, 4) == ".htm" Then
247
		_IENavigate($oIE, $url)
248
		ConsoleWrite($url & @CRLF)
249-
			if (FileExists(@ScriptDir&"\swf\"&$filename[$filename[0]]) == 0) Then
249+
		_IELoadWait($oIE)
250-
			ConsoleWrite("Downloading "&$oElement.src& " to "& @ScriptDir&"\swf\"&$filename[$filename[0]]&@CRLF)
250+
		$link = _IETagNameGetCollection($oIE, "embed")
251-
    If $oElement.src Then InetGet($oElement.src, @ScriptDir&"\swf\"&$filename[$filename[0]], 0)
251+
252
			$filename = StringSplit($oElement.src, "/")
253
			If (FileExists(@ScriptDir & "\swf\" & $filename[$filename[0]]) == 0) Then
254-
Next
254+
				ConsoleWrite("Downloading " & $oElement.src & " to " & @ScriptDir & "\swf\" & $filename[$filename[0]] & @CRLF)
255-
EndIf
255+
				If $oElement.src Then InetGet($oElement.src, @ScriptDir & "\swf\" & $filename[$filename[0]], 0)
256
				$videonum += 1
257
			EndIf
258-
EndFunc  ;==>CheckType
258+
259
		Next
260
	EndIf
261
	AddURL(GetURI($url))
262
263
EndFunc   ;==>CheckType