Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Public Sub GetDetailedBizBuySellInfo()
- Dim http As Object, urls() As Variant
- Dim html As MSHTML.HTMLDocument 'VBE > Tools > References > Microsoft HTML Object Library
- urls = Array("https://www.bizbuysell.com/Business-Opportunity/covid-friendly-commercial-cleaning-est-30-years-100k-net/1753433/?d=L2Zsb3JpZGEvaGlsbHNib3JvdWdoLWNvdW50eS1idXNpbmVzc2VzLWZvci1zYWxlLzI/cT1hVEk5T0RFc01qQXNNekFzTnpnbWJtRndQV1UlM0Q=", _
- "https://www.bizbuysell.com/Business-Opportunity/Established-Cleaning-Business-Tampa-St-Pete/1849521/?utm_source=bizbuysell&utm_medium=emailsite&utm_campaign=shtmlbot&utm_content=headline")
- Set http = CreateObject("MSXML2.XMLHTTP")
- Set html = New MSHTML.HTMLDocument
- Dim url As Long, results() As Variant
- ReDim results(1 To UBound(urls) + 1, 1 To 19) 'size the final output array. _
- There will be the number of urls as row count, the number of labels as column count + 1 to store the url itself. You need to update the list of labels below. See GetBlankDetailedInformationDictionary
- With http
- For url = LBound(urls) To UBound(urls) 'loop url list
- .Open "Get", urls(url), False
- .setRequestHeader "User-Agent", "Mozilla/5.0"
- .send
- html.body.innerHTML = .responseText
- Dim currentDetailedInformation As Scripting.Dictionary 'VBE > Tools > References > Microsoft Scripting Runtime
- Set currentDetailedInformation = GetCurrentDetailedInfo(html) 'use retrieved html to return a dictionary with key as dt > strong e.g.Location; value as dd e.g. Tampa, FL
- AddCurrentDetailedInfoToResults results, currentDetailedInformation, url, urls(url) 'url + 1 (zero indexed) will keep track of current row number to add to results
- Next
- End With
- With ActiveSheet 'better to update with explicit sheet/be careful not to overwrite data already in a sheet
- .Cells(1, 1).Resize(1, UBound(results, 2)) = currentDetailedInformation.keys ' write out headers
- .Cells(1, UBound(results, 2)) = "Url"
- .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results ' write out results
- End With
- End Sub
- Public Sub AddCurrentDetailedInfoToResults(ByRef results As Variant, ByVal currentDetailedInformation As Scripting.Dictionary, ByVal url As Long, ByVal currentUrl As String)
- Dim key As Variant, currentColumn As Long
- For Each key In currentDetailedInformation.keys
- currentColumn = currentColumn + 1 'increase column count to update results array with
- results(url + 1, currentColumn) = currentDetailedInformation(key)
- Next
- results(url + 1, currentColumn + 1) = currentUrl
- End Sub
- Public Function GetCurrentDetailedInfo(ByVal html As MSHTML.HTMLDocument) As Scripting.Dictionary
- ' Gathers a list of all the relevant dd, dt nodes within the passed in HTMLDocument.
- ' Requests a new blank dictionary whose keys are the labels (child strong element of dt tag)
- 'Updates blank dictionary, per key, where present, with dd value in a loop of step 2 as list is strong, dd, strong, dd etc.....
- Dim updatedDictionary As Scripting.Dictionary, listOfLabelsAndValues As MSHTML.IHTMLDOMChildrenCollection
- Set updatedDictionary = GetBlankDetailedInformationDictionary
- 'Css pattern to match the appropriate nodes
- Set listOfLabelsAndValues = html.querySelectorAll("#ctl00_ctl00_Content_ContentPlaceHolder1_wideProfile_listingDetails_dlDetailedInformation dt > strong, #ctl00_ctl00_Content_ContentPlaceHolder1_wideProfile_listingDetails_dlDetailedInformation dd")
- Dim currentIndex As Long
- For currentIndex = 0 To listOfLabelsAndValues.length - 2 Step 2 'nodeList is 0 index based
- 'On Error Resume Next 'key (label) may not be present for current html document _
- i.e. url so ignore errors when attempting to update blank dictionary via dt > strong matching on key. If label not found then value = vbNullString
- Dim key As String, value As String
- key = Trim$(listOfLabelsAndValues.Item(currentIndex).innerText)
- value = Trim$(listOfLabelsAndValues.Item(currentIndex + 1).innerText) 'as we are looping every 2 indices 0,2,4 ....
- If updatedDictionary.Exists(key) Then updatedDictionary(key) = value
- 'On Error GoTo 0
- Next
- Set GetCurrentDetailedInfo = updatedDictionary ' return updated dictionary
- End Function
- Public Function GetBlankDetailedInformationDictionary() As Scripting.Dictionary
- Dim blankDictionary As Scripting.Dictionary, keys() As Variant, key As Long
- Set blankDictionary = New Scripting.Dictionary
- '' TODO Note: you would add in all 18 labels into array below.
- keys = Array("Location:", "Type:", "Inventory:", "Real Estate:", "Building SF:", _
- "Building Status:", "Lease Expiration:", "Employees:", "Furniture, Fixtures, & Equipment (FF&E):", _
- "Facilities:", "Competition:", "Growth & Expansion:", "Financing:", "Support & Training:", _
- "Reason for Selling:", "Franchise:", "Home-Based:", "Business Website:")
- For key = LBound(keys) To UBound(keys)
- blankDictionary(keys(key)) = vbNullString 'add blank entry to dictionary for each label
- Next
- Set GetBlankDetailedInformationDictionary = blankDictionary
- End Function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement