Guest User

ISBN & webscraping macro

a guest
Nov 21st, 2019
310
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. 'README: Hi, thanks for helping me with my code. I know it's a little messy; some lines are commented out, and I promise I'll delete them as soon as I'm certain I won't end up needing them again. Some of them are incomplete code, some are versions of lines that threw errors at me, and some are leftovers from other code I found online to build this project.
  2.  
  3.  
  4. Option Explicit
  5.  Private Const xlUp As Long = -4162
  6.  
  7. Public Function GetRegex(url As String, reg As String) As String
  8.     Dim http As Object, html As Object, objResult As Object, Regex As Object
  9.     Set http = CreateObject("MSXML2.ServerXMLHTTP.6.0")
  10. '    Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
  11. '    XMLHTTP.Open "GET", url, False
  12. '    XMLHTTP.setRequestHeader "Content-Type", "text/xml"
  13. '    XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
  14. '    XMLHTTP.Send
  15.        If Len(url) < 1 Then
  16. '            continue = False
  17.            Exit Function
  18.         End If
  19.     http.Open "GET", url, False
  20.     http.setRequestHeader "Content-Type", "text/xml"
  21.     http.Send
  22.     Set html = CreateObject("htmlfile")
  23.     html.Body.innerHTML = http.responseText
  24.     Set Regex = CreateObject("VBScript.RegExp")
  25.     Regex.Pattern = reg
  26.     Regex.Global = True
  27.     If Regex.Test(http.responseText) Then
  28. '        Set matches = Regex.Execute(http.responseText)
  29. '        GetRegex = matches(0).SubMatches(0)
  30.         GetRegex = Regex.Execute(http.responseText).Value
  31.         Exit Function
  32.     End If
  33. '    GetRegex = ""
  34. End Function
  35.  
  36. Sub Extract_Desk_Copy_ISBNs()
  37.  Dim objOL As Outlook.Application
  38. ' Dim objItems As Outlook.Items
  39.    Dim ObjItems As Selection
  40.  Dim objFolder As Outlook.MAPIFolder
  41.  Dim olItem As Outlook.MailItem
  42. ' Dim currentExplorer As Explorer
  43. ' Dim Selection As Selection
  44. Dim xlApp As Object
  45.  Dim xlWB As Object
  46.  Dim xlSheet As Object
  47. 'Dim vtext As Variant
  48.   Dim vtext As String
  49.  Dim sText As String
  50.  Dim rCount As Long
  51.  Dim initRow As Long
  52.  Dim bXStarted As Boolean
  53.  Dim enviro As String
  54.  Dim strPath As String
  55.  Dim Reg1 As Object
  56.  Dim M1 As Object
  57.  Dim M As Object
  58.  Dim website As String
  59.  Dim Rng As Range
  60.  
  61.            
  62. enviro = CStr(Environ("USERPROFILE"))
  63. 'the path of the workbook
  64. strPath = "C:\foo\bar.xlsx"
  65.      On Error Resume Next
  66.      Set xlApp = GetObject(, "Excel.Application")
  67.      If Err <> 0 Then
  68.          Application.StatusBar = "Please wait while Excel source is opened ... "
  69.          Set xlApp = CreateObject("Excel.Application")
  70.          bXStarted = True
  71.      End If
  72.      On Error GoTo 0
  73.      'Open the workbook to input the data
  74.     Set xlWB = xlApp.Workbooks.Open(strPath)
  75.      Set xlSheet = xlWB.Sheets("Sheet1")
  76.  
  77.     'Find the next empty line of the worksheet
  78.     initRow = xlSheet.Range("E" & xlSheet.Rows.Count).End(xlUp).row
  79.      rCount = initRow + 1
  80.      
  81.     Set objOL = Outlook.Application
  82. '    Set objFolder = objOL.ActiveExplorer.CurrentFolder
  83. '    Set objItems = objFolder.Items
  84.    Set ObjItems = objOL.ActiveExplorer.Selection
  85.     For Each olItem In ObjItems
  86.  
  87.      On Error Resume Next
  88.  
  89.      With olItem
  90.       sText = olItem.Body
  91.  
  92.         Set Reg1 = CreateObject("VBScript.RegExp")
  93.         With Reg1
  94.             .Global = True
  95.             .Pattern = "([0-9]{13})|(97[0-9]\-[0-9\-]{13})"
  96.         End With
  97. '        If Reg1.Test(sText) Then
  98.            Set M1 = Reg1.Execute(sText)
  99.             For Each M In M1
  100. '               vtext = Trim(M.SubMatches(1))
  101.               With xlSheet.Range("E" & initRow, "E" & rCount)
  102.                     Set Rng = .Find(What:=vtext, _
  103.                                After:=.Cells(.Cells.Count), _
  104.                                LookIn:=xlValues, _
  105.                                LookAt:=xlWhole, _
  106.                                SearchOrder:=xlByRows, _
  107.                                SearchDirection:=xlNext, _
  108.                                MatchCase:=False)
  109.                     If Rng Is Nothing Then
  110.                       website = "http://isbnsearch.org/isbn/" & M.Value
  111.                       xlSheet.Range("E" & rCount) = vtext
  112.                       xlSheet.Range("C" & rCount) = GetRegex("website", "<h1>(.*)</h1>")
  113.                       xlSheet.Range("D" & rCount) = GetRegex("website", "<p><strong>Author:</strong> (.*)</p>")
  114.                       xlSheet.Range("G" & rCount) = GetRegex("website", ">*<p><strong>Publisher:</strong> (.*)</p>")
  115.                     Else
  116.                     End If
  117.                End With
  118.             Next
  119.             rCount = rCount + 1
  120. '        Else
  121. '            If MsgBox("This email cound not be read. Please enter data manually.", vbOKCancel, "Error") = vbOK Then
  122.               'If ObjItems.Count = 1 Then
  123.                    'blah blah code to Open xlSheet
  124.               'Else
  125.               'End If
  126. '            Else
  127. '               Exit Sub
  128. '            End If
  129. '        End If
  130.     End With
  131.     Next
  132.    
  133. '''TODO: Popup dialog, "Please manually check the data for errors." Options: "Okay" [opens xlSheet] / "Later" [closes everything]
  134. '    If MsgBox("Please manually check the data for errors. Would you like to launch Excel now?", vbYesNo, "Scan complete") = vbYes Then
  135.        'blah blah Open xlSheet
  136. '    Else
  137. '    End If
  138.     xlWB.Close 1
  139.      If bXStarted Then
  140.          xlApp.Quit
  141.      End If
  142.      Set M = Nothing
  143.      Set M1 = Nothing
  144.      Set Reg1 = Nothing
  145.      Set xlApp = Nothing
  146.      Set xlWB = Nothing
  147.      Set xlSheet = Nothing
  148.      
  149.     Set ObjItems = Nothing
  150.     Set objFolder = Nothing
  151.     Set objOL = Nothing
  152.  
  153. End Sub
Advertisement
Add Comment
Please, Sign In to add comment