Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function CrawlPages ($pagesObj, $jobsURL, $file)
- {
- for($p = 0; $p -le $pagesObj.Length - 1; $p++)
- {
- $pageNext = $jobsURL + $pagesObj[$p].href.ToString()
- $reqNew = Invoke-WebRequest $pageNext
- $jobsLinks = $reqNew.Links | where Class -Match "MainLinkBold" | select -Property href
- for($l = 1; $l -le $jobsLinks.Length - 1; $l++)
- {
- $st = $jobsLinks[$l].href.ToString()
- $nextJob = ""
- $nextJob = $jobsURL + $st
- $reqsNew = Invoke-WebRequest $nextJob
- #Start-Sleep -Seconds 1
- $jobNew = ""
- $jobNew = $reqsNew.AllElements | where Class -Match "jobDataView" | select -ExpandProperty innerHTML
- Add-Content $file $jobNew
- #set back to zero
- $reqsNew = $null
- $st = $null
- $nextJob = $null
- $jobNew = $null
- }
- }
- }
- function CrawlJobLinks ($pageLink, $currKeyword, $jobsURL, $file)
- {
- $pageLink = $pageLink + $currKeyword
- $req = Invoke-WebRequest $pageLink
- #Start-Sleep -Seconds 1
- $pages = $req.Links | where Class -Match "pathlink" | select -Property href
- $jobLinks = $req.Links | where Class -Match "MainLinkBold" | select -Property href
- #check if we have atleast one page with jobs
- if($jobLinks -ne $null)
- {
- for($i = 1; $i -le $jobLinks.Length - 1; $i++)
- {
- $str = $jobLinks[$i].href.ToString()
- $nextlink = ""
- $nextLink = $jobsURL + $str
- $reqs = Invoke-WebRequest $nextLink
- #Start-Sleep -Seconds 1
- $job = ""
- $job = $reqs.AllElements | where Class -Match "jobDataView" | select -ExpandProperty innerHTML
- Add-Content $file $job
- #set back to zero
- $reqs = $null
- $str = $null
- $nextLink = $null
- $job = $null
- }
- #check if there are more than one page with jobs
- if($pages -ne $null)
- {
- #can be done via extension methods or recursion...add PageObjectModel
- CrawlPages -pagesObj $pages -jobsURL $jobsURL -file $file
- }
- }
- }
- $date = Get-Date
- $date = "{0:dd_MM_yyyy}" -f [datetime]$date
- $file = 'C:\Users\evgeni.kostadinov\Desktop\webCrawlers\1-JobsTech\all\requs' + $date + '.html'
- $db = Get-Content 'C:\Users\evgeni.kostadinov\Desktop\webCrawlers\techsData.txt'
- $jobsURL = 'http://www.jobs.bg/'
- $title = "Jobs requirements </br>" > $file
- for($k = 0; $k -le $db.Length - 1; $k++)
- {
- $url = "http://www.jobs.bg/front_job_search.php?first=1&str_regions=&str_locations=&tab=jobs&old_country=&country=-1®ion=0&l_category%5B%5D=0&keyword="
- $searchKeyword = $db[$k]
- $h1 = "<h1 style=""text-align:center;background-color:red;color:white;""> " + $searchKeyword + "</h1>"
- Add-Content $file $h1
- CrawlJobLinks -pageLink $url -currKeyword $searchKeyword -jobsURL $jobsURL -file $file
- #set back to zero
- $req = $null
- $jobLinks = $null
- $url = $null
- }
- start chrome $file
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement