Phylum

Scraper Attempt #2

Sep 17th, 2017
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Site to grab
  2. $URL = 'http://www.bww-law.com/sales.asp'
  3.  
  4. # Grab content
  5. $Result = Invoke-WebRequest $URL
  6.  
  7. # Collection to hold the bits
  8. [System.Collections.ArrayList]$Properties = @{}
  9.  
  10. # Loop through array to [hopefully] grab the
  11. for($i=0; $i -lt $Result.AllElements.Count; $i++)
  12.     {
  13.         # get rid of all the junk entries
  14.         if(([string]::IsNullOrEmpty($Result.AllElements[$i].innerHTML)) -or ([string]::IsNullOrWhiteSpace($Result.AllElements[$i].innerHTML))) { continue }
  15.         if(([string]::IsNullOrEmpty($Result.AllElements[$i].innerHTML.Trim())) -or ([string]::IsNullOrWhiteSpace($Result.AllElements[$i].innerHTML.Trim()))) { continue }
  16.         if($Result.AllElements[$i].innerHTML.Trim() -like '*Jurisdiction*') { continue }
  17.         if($Result.AllElements[$i].innerHTML -like '<TD class=subhead colSpan=8><A name="*') { $CityCounty = $Result.AllElements[$i].innerText.Trim() }
  18.  
  19.         # The innerHTML and outerHTML properties have all the details I'd be interested in
  20.         $innerHTML = $Result.AllElements[$i].innerHTML -split "`r`n"
  21.  
  22.         # If the collection doesn't contain 8 objects, it's not valid
  23.         if($innerHTML.Count -ne 8) { Continue }
  24.        
  25.         # Add to the collection
  26.         $Properties += [pscustomobject]@{CityCounty = $CityCounty;
  27.             JurisdictionFileNumber = $innerHTML[1].Replace('<TD class=bodytext width=80>','').Replace('</TD>','');
  28.             SaleDate = $innerHTML[2].Replace('<TD class=bodytext width=65 align=center>','').Replace('</TD>','');
  29.             SaleTime = $innerHTML[3].Replace('<TD class=bodytext width=80 align=center>','').Replace('</TD>','');
  30.             PropertyAddress = $innerHTML[4].Replace('<TD class=bodytext width=180>','').Replace('</TD>','');
  31.             City = $innerHTML[5].Replace('<TD class=bodytext width=120>','').Replace('</TD>','');
  32.             Zip = $innerHTML[6].Replace('<TD class=bodytext width=90>','').Replace('</TD>','');
  33.             OriginalLoanAmount = $innerHTML[7].Replace('<TD class=bodytext width=60 align=right>','').Replace('</TD>','');
  34.             MapLink = "https://maps.google.com/maps?q=$($($innerHTML[4].Replace('<TD class=bodytext width=180>','').Replace('</TD>','')).Replace(' ','+'))+$City+VA+$Zip"
  35.                     SearchLink = "https://google.com/search?q=$($($innerHTML[4].Replace('<TD class=bodytext width=180>','').Replace('</TD>','')).Replace(' ','+'))+$City+VA+$Zip"
  36.         }
  37.     }
Add Comment
Please, Sign In to add comment