Advertisement
alphaservice

VBA_Module_2

Jun 27th, 2021
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.54 KB | None | 0 0
  1. Public Sub GetDetailedBizBuySellInfo()
  2. Dim http As Object, urls() As Variant
  3. Dim html As MSHTML.HTMLDocument 'VBE > Tools > References > Microsoft HTML Object Library
  4.  
  5. urls = Array("https://www.bizbuysell.com/Business-Opportunity/covid-friendly-commercial-cleaning-est-30-years-100k-net/1753433/?d=L2Zsb3JpZGEvaGlsbHNib3JvdWdoLWNvdW50eS1idXNpbmVzc2VzLWZvci1zYWxlLzI/cT1hVEk5T0RFc01qQXNNekFzTnpnbWJtRndQV1UlM0Q=", _
  6. "https://www.bizbuysell.com/Business-Opportunity/Established-Cleaning-Business-Tampa-St-Pete/1849521/?utm_source=bizbuysell&utm_medium=emailsite&utm_campaign=shtmlbot&utm_content=headline")
  7.  
  8. Set http = CreateObject("MSXML2.XMLHTTP")
  9. Set html = New MSHTML.HTMLDocument
  10.  
  11. Dim url As Long, results() As Variant
  12.  
  13. ReDim results(1 To UBound(urls) + 1, 1 To 19) 'size the final output array. _
  14. There will be the number of urls as row count, the number of labels as column count + 1 to store the url itself. You need to update the list of labels below. See GetBlankDetailedInformationDictionary
  15. With http
  16.  
  17. For url = LBound(urls) To UBound(urls) 'loop url list
  18.  
  19. .Open "Get", urls(url), False
  20. .setRequestHeader "User-Agent", "Mozilla/5.0"
  21. .send
  22.  
  23. html.body.innerHTML = .responseText
  24.  
  25. Dim currentDetailedInformation As Scripting.Dictionary 'VBE > Tools > References > Microsoft Scripting Runtime
  26.  
  27. Set currentDetailedInformation = GetCurrentDetailedInfo(html) 'use retrieved html to return a dictionary with key as dt > strong e.g.Location; value as dd e.g. Tampa, FL
  28.  
  29. AddCurrentDetailedInfoToResults results, currentDetailedInformation, url, urls(url) 'url + 1 (zero indexed) will keep track of current row number to add to results
  30. Next
  31.  
  32. End With
  33.  
  34. With ActiveSheet 'better to update with explicit sheet/be careful not to overwrite data already in a sheet
  35. .Cells(1, 1).Resize(1, UBound(results, 2)) = currentDetailedInformation.keys ' write out headers
  36. .Cells(1, UBound(results, 2)) = "Url"
  37. .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results ' write out results
  38. End With
  39. End Sub
  40.  
  41. Public Sub AddCurrentDetailedInfoToResults(ByRef results As Variant, ByVal currentDetailedInformation As Scripting.Dictionary, ByVal url As Long, ByVal currentUrl As String)
  42.  
  43. Dim key As Variant, currentColumn As Long
  44.  
  45. For Each key In currentDetailedInformation.keys
  46. currentColumn = currentColumn + 1 'increase column count to update results array with
  47. results(url + 1, currentColumn) = currentDetailedInformation(key)
  48. Next
  49. results(url + 1, currentColumn + 1) = currentUrl
  50. End Sub
  51.  
  52. Public Function GetCurrentDetailedInfo(ByVal html As MSHTML.HTMLDocument) As Scripting.Dictionary
  53. ' Gathers a list of all the relevant dd, dt nodes within the passed in HTMLDocument.
  54. ' Requests a new blank dictionary whose keys are the labels (child strong element of dt tag)
  55. 'Updates blank dictionary, per key, where present, with dd value in a loop of step 2 as list is strong, dd, strong, dd etc.....
  56.  
  57. Dim updatedDictionary As Scripting.Dictionary, listOfLabelsAndValues As MSHTML.IHTMLDOMChildrenCollection
  58.  
  59. Set updatedDictionary = GetBlankDetailedInformationDictionary
  60. 'Css pattern to match the appropriate nodes
  61. Set listOfLabelsAndValues = html.querySelectorAll("#ctl00_ctl00_Content_ContentPlaceHolder1_wideProfile_listingDetails_dlDetailedInformation dt > strong, #ctl00_ctl00_Content_ContentPlaceHolder1_wideProfile_listingDetails_dlDetailedInformation dd")
  62.  
  63. Dim currentIndex As Long
  64.  
  65. For currentIndex = 0 To listOfLabelsAndValues.length - 2 Step 2 'nodeList is 0 index based
  66.  
  67. 'On Error Resume Next 'key (label) may not be present for current html document _
  68. i.e. url so ignore errors when attempting to update blank dictionary via dt > strong matching on key. If label not found then value = vbNullString
  69. Dim key As String, value As String
  70.  
  71. key = Trim$(listOfLabelsAndValues.Item(currentIndex).innerText)
  72. value = Trim$(listOfLabelsAndValues.Item(currentIndex + 1).innerText) 'as we are looping every 2 indices 0,2,4 ....
  73. If updatedDictionary.Exists(key) Then updatedDictionary(key) = value
  74.  
  75. 'On Error GoTo 0
  76. Next
  77.  
  78. Set GetCurrentDetailedInfo = updatedDictionary ' return updated dictionary
  79.  
  80. End Function
  81.  
  82. Public Function GetBlankDetailedInformationDictionary() As Scripting.Dictionary
  83.  
  84. Dim blankDictionary As Scripting.Dictionary, keys() As Variant, key As Long
  85.  
  86. Set blankDictionary = New Scripting.Dictionary
  87.  
  88. '' TODO Note: you would add in all 18 labels into array below.
  89. keys = Array("Location:", "Type:", "Inventory:", "Real Estate:", "Building SF:", _
  90. "Building Status:", "Lease Expiration:", "Employees:", "Furniture, Fixtures, & Equipment (FF&E):", _
  91. "Facilities:", "Competition:", "Growth & Expansion:", "Financing:", "Support & Training:", _
  92. "Reason for Selling:", "Franchise:", "Home-Based:", "Business Website:")
  93.  
  94. For key = LBound(keys) To UBound(keys)
  95. blankDictionary(keys(key)) = vbNullString 'add blank entry to dictionary for each label
  96. Next
  97.  
  98. Set GetBlankDetailedInformationDictionary = blankDictionary
  99. End Function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement