Excel VBA Pulling Web Data From a List of Hyperlinks

1.4k Views Asked by At

I have a list of hyperlinks in column C on sheet 1, and I want to pull data from each link and put the data for each link in separate worksheets which have already been created. All of the hyperlinks are to the same website...pro football reference... but each link is for a different NFL player. I want to pull the same data table for each player. I have been able to pull data from the first link and put it in sheet 2 as it should be, but I am very new to VBA and can't figure out how to create a loop to do this for each link in my list and to put it in the other sheets. Below is the code I currently have to get data from the first link:

Sub passingStats()
Dim x As Long, y As Long
Dim htm As Object

Set htm = CreateObject("htmlFile")

With CreateObject("msxml2.xmlhttp")
    .Open "GET", Range("C2"), False
    .send
    htm.body.innerhtml = .responsetext
End With

With htm.getelementbyid("passing")
    For x = 0 To .Rows.Length - 1
        For y = 0 To .Rows(x).Cells.Length - 1
            Sheets(2).Cells(x + 4, y + 1).Value = .Rows(x).Cells(y).innertext
        Next y
        Next x
End With

End Sub

Any help would be greatly appreciated.

1

There are 1 best solutions below

8
On BEST ANSWER

The following shows using a loop.

N.B.

  1. There is a logic flaw in your table write which I have written a patch for.
  2. Some strings where being converted incorrectly in your script. I have prefixed with ' to stop this.

Code:

Option Explicit
Public Sub GetInfo()
    Dim html As New HTMLDocument, links(), link As Long, wsSourceSheet As Worksheet
    Dim hTable As HTMLTable, ws As Worksheet, playerName As String
    Set wsSourceSheet = ThisWorkbook.Worksheets("Sheet1") '<change to sheet containing links
    Application.ScreenUpdating = False
    With wsSourceSheet
        links = .Range("C2:C" & .Cells(.Rows.Count, "C").End(xlUp).Row).Value
    End With
    For link = LBound(links, 1) To UBound(links, 1)
        If InStr(links(link, 1), "https://") > 0 Then
            Set html = GetHTMLDoc(links(link, 1))
            Set hTable = html.getElementById("passing")
            If Not hTable Is Nothing Then
                playerName = GetNameAbbr(links(link, 1))
                Set ws = AddPlayerSheet(playerName)
                WriteTableToSheet hTable, ws
                FixTable ws
            End If
        End If
    Next
    Application.ScreenUpdating = True
End Sub

Public Function GetHTMLDoc(ByVal url As String) As HTMLDocument
    Dim sResponse As String, html As New HTMLDocument
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", url, False
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
    End With
    sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
    html.body.innerHTML = sResponse
    Set GetHTMLDoc = html
End Function

Public Sub WriteTableToSheet(ByVal hTable As HTMLTable, ByVal ws As Worksheet)
    Dim x As Long, y As Long
    With hTable
        For x = 0 To .Rows.Length - 1
            For y = 0 To .Rows(x).Cells.Length - 1
                If y = 6 Or y = 7 Then
                    ws.Cells(x + 4, y + 1).Value = Chr$(39) & .Rows(x).Cells(y).innerText
                Else
                    ws.Cells(x + 4, y + 1).Value = .Rows(x).Cells(y).innerText
                End If
            Next y
        Next x
    End With
End Sub

Public Function GetNameAbbr(ByVal url As String) As String
    Dim tempArr() As String
    tempArr = Split(url, "/")
    GetNameAbbr = Left$(tempArr(UBound(tempArr)), 6)
End Function

Public Function AddPlayerSheet(ByVal playerName As String) As Worksheet
    Dim ws As Worksheet
    If SheetExists(playerName) Then
        Application.DisplayAlerts = False
        ThisWorkbook.Worksheets(playerName).Delete
        Application.DisplayAlerts = True
    End If
    Set ws = ThisWorkbook.Worksheets.Add
    ws.Name = playerName
    Set AddPlayerSheet = ws
End Function

Public Function SheetExists(ByVal playerName As String) As Boolean
    SheetExists = Evaluate("ISREF('" & playerName & "'!A1)")
End Function

Public Sub FixTable(ByVal ws As Worksheet)
    Dim found As Range, numSummaryRows As Long
    With ws
        Set found = .Columns("A").Find("Career")
        If found Is Nothing Then Exit Sub
        numSummaryRows = .Cells(.Rows.Count, "A").End(xlUp).Row - found.Row
        numSummaryRows = IIf(numSummaryRows = 0, 1, numSummaryRows + 1)
        Debug.Print found.Offset(, 1).Resize(numSummaryRows, 30).Address, ws.Name
        found.Offset(, 1).Resize(numSummaryRows, 30).Copy found.Offset(, 2)
        found.Offset(, 1).Resize(numSummaryRows, 1).ClearContents
    End With
End Sub

Test links in sheet1:

Sheet1


Sample webpage:

sample results


Corresponding code write out:

Sheet write out