Advertisement
Guest User

Untitled

a guest
Jun 4th, 2019
436
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
AutoIt 4.08 KB | None | 0 0
  1. rlslog_webCrawler("http://www.rlslog.net/category/ebooks/ebook/page/", "html/body/div[2]/div[2]/div[*]/h3/a", "html/body/div[2]/div[2]/div[1]", "userscloud.com", "e:\tmp\")
  2.  
  3. Func rlslog_webCrawler($url, $header_xpath, $header_xpath_nextPageText, $search_term, $save_urls_dir)
  4.    Local $pageLoadingTime = "", $headers, $start, $finish, $countingPages, $countingHeaders
  5.    Local $aLinks, $aDonwloadURLs[1], $hFileOpen, $tmp_array[1], $header_name_array, $header_name, $header_url, $section_text, $errors_array[1], $errors_flag = 0
  6.  
  7.  
  8.    $start = InputBox("Start page", "Enter start page number", "1")
  9.    $finish = InputBox("End page", "Enter end page number", "2654")
  10.    $countingPages = $finish - $start + 1
  11.  
  12.    If _FFConnect() Then
  13.       For $k = $start To $finish
  14.          $countingPages -= 1
  15.          _FFOpenURL($url & $k, False)
  16.          $pageLoadingTime = _FFLoadWait(15000, 30000)
  17.          If $pageLoadingTime = 0 Then
  18.             $errors_flag = 1
  19.             MsgBox(0, "Error:", "Page load time limit broken" & @CRLF & $url & $k, 3)
  20.             _ArrayAdd($errors_array, @YEAR & @MON & @MDAY & "_" & _DateDayOfWeek(@WDAY, $DMW_SHORTNAME) & "_" & @HOUR & "-" & @MIN & "-" & @SEC & "-" & @MSEC & "_str_" & $k & "_pageLoadTimeError" & @TAB & $url & $k, Default, Default, Default, $ARRAYFILL_FORCE_SINGLEITEM)
  21.          EndIf
  22.          $headers = _FFXPath($header_xpath, "", 7)
  23.          If IsArray($headers) Then
  24.             ;_ArrayDisplay($headers)
  25.             $countingHeaders = UBound($headers) - 1
  26.             For $i = 1 To UBound($headers) - 1
  27.                $header_url = $headers[$i]
  28.                $header_name_array = StringSplit($headers[$i], "/")
  29.                If IsArray($header_name_array) Then
  30.                   ;_ArrayDisplay($header_name_array, "$header_name")
  31.                   $header_name = $header_name_array[4]
  32.                EndIf
  33.                $countingHeaders -= 1
  34.                ToolTip("Current page " & $k & " / " & $finish & @CRLF & "Remaining: " & $countingPages & " pages" & @CRLF  & @CRLF & _
  35.                        "Loading headers " & $i & " / " & UBound($headers) - 1 & @CRLF & "Remaining: " & $countingHeaders & " pages", 0, 0)
  36.                   _FFOpenURL($headers[$i], False)
  37.                   $pageLoadingTime = _FFLoadWait(15000, 30000)
  38.                   If $pageLoadingTime = 0 Then
  39.                      $errors_flag = 1
  40.                      MsgBox(0, "Error:", "Header page load time limit broken" & @CRLF & $header_url, 3)
  41.                      _ArrayAdd($errors_array, @YEAR & @MON & @MDAY & "_" & _DateDayOfWeek(@WDAY, $DMW_SHORTNAME) & "_" & @HOUR & "-" & @MIN & "-" & @SEC & "-" & @MSEC & "_str_" & $k & "_headerPageLoadTimeError" & @TAB & $header_url, Default, Default, Default, $ARRAYFILL_FORCE_SINGLEITEM)
  42.                   EndIf
  43.                   $section_text = @CRLF & _FFXPath($header_xpath_nextPageText, "value", 2) & @CRLF & @CRLF
  44.                   _ArrayAdd($tmp_array, $section_text, Default, Default, Default, $ARRAYFILL_FORCE_SINGLEITEM)
  45.                   $aLinks = _FFLinksGetAll()
  46.                   If IsArray($aLinks) Then
  47.                      ;_ArrayDisplay($aLinks)
  48.                      For $j = 0 To UBound($aLinks) - 1
  49.                         If StringInStr($aLinks[$j][0], $search_term) Then
  50.                            _ArraySearch($aDonwloadURLs, $aLinks[$j][0])
  51.                            If @error Then
  52.                               _ArrayAdd($tmp_array, $header_name & @TAB & $aLinks[$j][0] & @TAB & $header_url, Default, Default, Default, $ARRAYFILL_FORCE_SINGLEITEM)
  53.                               _ArrayAdd($aDonwloadURLs, $aLinks[$j][0], Default, Default, Default, $ARRAYFILL_FORCE_SINGLEITEM)
  54.                            EndIf
  55.                         EndIf
  56.                      Next
  57.                      ;_ArrayDisplay($aDonwloadURLs, "$aDonwloadURLs")
  58.                   EndIf
  59.                   ;MsgBox(0, "Pauza", "pauzi8rano")
  60.                ToolTip("")
  61.             Next
  62.          EndIf
  63.          ;MsgBox(0, "Pauza", "pauzi8rano")
  64.          $hFileOpen = FileOpen($save_urls_dir & @YEAR & @MON & @MDAY & "_" & _DateDayOfWeek(@WDAY, $DMW_SHORTNAME) & "_" & @HOUR & "-" & @MIN & "-" & @SEC & "-" & @MSEC & "_str_" & $k & ".txt", 10)
  65.             _FileWriteFromArray($hFileOpen, $tmp_array, 1)
  66.          FileClose($hFileOpen)
  67.          If $errors_flag = 1 Then
  68.             $hFileOpen = FileOpen($save_urls_dir & "000errors.txt", 9)
  69.                _FileWriteFromArray($hFileOpen, $errors_array, 1)
  70.             FileClose($hFileOpen)
  71.             $errors_flag = 0
  72.             Local $errors_array[1]
  73.          EndIf
  74.          Local $tmp_array[1]
  75.       Next
  76.       _ArrayToClip($aDonwloadURLs, @CRLF, 1)
  77.       _ArrayDisplay($aDonwloadURLs, "$aDonwloadURLs")
  78.       _FFOpenURL($url & $finish, False)
  79.    EndIf
  80.  
  81. EndFunc
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement