VBA - scraping websites videos | Excel VBA Part 47.2 - Scraping Website Tables and Clicking Links

Posted by Andrew Gould on 07 May 2019

In this video you'll learn how to write VBA code to click links on a webpage to view different tables and then scrape the contents of those tables into new Excel worksheets.

You can download any files that you need to follow the video here.

You can increase the size of the video:

Full screen mode for YouTube

You can view the video in full screen mode as shown on the left, using the icon at the bottom right of the frame.

You can also increase the quality of the video:

Changing resolution

You can improve the resolution of the video using another icon at the bottom right of the frame. This will slow down the connection speed, but increase the display and sound quality. This icon only becomes visible when you start playing the video.

Finally, if nothing happens when you play the video, check that you're not using IE in compatibility view.

This page has 3 threads Add post
25 Mar 20 at 16:16

Sub myrecon()

Dim myhtml_element As IHTMLElement
Dim myurl As String

On Error GoTo err_clear

myurl = " https://webpage"

Set mybrowser = New InternetExplorer

mybrowser.Silent = True
mybrowser.navigate myurl
mybrowser.Visible = True

Do While mybrowser.readyState <> READYSTATE_COMPLETE Or mybrowser.Busy

Set htmldoc = mybrowser.document
htmldoc.all.username.Value = "name"
htmldoc.all.password.Value = "pass"

For Each myhtml_element In htmldoc.getElementsByTagName("button")
If myhtml_element.Type = "submit" And myhtml_element.class = "button" Then myhtml_element.Click:
Exit For
Next

Do While mybrowser.readyState <> READYSTATE_COMPLETE Or mybrowser.Bus
For Each myhtml_element In htmldoc.getElementsByTagscr("/arm/skins/images/structure/matte/card//square/reconciliations.png")
If myhtml_element.Title = "Reconciliations" And myhtml_element.alt = "Reconciliations" Then myhtml_element.Click:
Exit For
Next

err_clear:

If Err <> 0 Then
Err.Clear
Resume Next
End If

Hello,

Thank you so much for sharing all the information, I created a macro to navigate trough a websit but the 2nd loop is not working, would you please help me?

Thanks again.

26 Mar 20 at 16:28

We're a bit distracted at the moment at the owlery by other events, but maybe someone else will be able to suggest a solution?

03 Jan 20 at 00:59

Hello Andrew Gould & WiseOwlTutorials

Thank you very much for bringing these videos together! It really helped me practice the sample I wanted to do!  I was able to gather the information I wanted using I.E.  However, when I tried to simulate my actions when using the MSXML2.ServerXMLhttp.6.0 method, VBA was unable to recognize the same ID and tag names recognized by I.E.

I present my VBA code at the bottom.  Can you help with this?! Where's the error?

Thank you so much. Good work.

My VBA Code:

Option Explicit

Sub ListTableOptions()
    Dim XMLPage As New MSXML2.XMLHTTP60
    Dim HTMLDoc As New MSHTML.HTMLDocument
    
    Dim TableOptionsLinks As MSHTML.IHTMLElementCollection
    Dim TableOptions As MSHTML.IHTMLElement
    Dim TableOptionLink As MSHTML.IHTMLElement
    Dim TableName As String
    Dim URL As String
    Dim NextHref As String
    Dim NextURL As String
    Dim DisplayName As String
    
    DeleteOldSheets
        
    URL = "https://www.whoscored.com/Regions/108/Tournaments/5/Seasons/7928/Stages/17835/Show/Italy-Serie-A-2019-2020"
    
    XMLPage.Open "GET", URL, False
    XMLPage.send

    If XMLPage.Status <> 200 Then
    MsgBox "Problem" & vbNewLine & XMLPage.Status & " - " & XMLPage.statusText
    Exit Sub
    End If

    HTMLDoc.body.innerHTML = XMLPage.responseText
    Set XMLPage = Nothing
    
    Set TableOptions = HTMLDoc.getElementById("tournament-tables-17835-options")
    Set TableOptionsLinks = TableOptions.getElementsByTagName("a")
    
    For Each TableOptionLink In TableOptionsLinks
    
        If LCase(TableOptionLink.innerText) <> "progress" Then
            TableName = Right(TableOptionLink.href, Len(TableOptionLink.href) - InStr(TableOptionLink.href, "#"))
            NextHref = TableOptionLink.getAttribute("href")
            NextURL = URL & Mid(NextHref, InStr(NextHref, ":") + 6)
                XMLPage.Open "GET", NextURL, False
                XMLPage.send
                If XMLPage.Status <> 200 Then
                MsgBox "Problem" & vbNewLine & XMLPage.Status & " - " & XMLPage.statusText
                Exit Sub
                End If
                HTMLDoc.body.innerHTML = XMLPage.responseText
                Set XMLPage = Nothing
            ProcessTable HTMLDoc.getElementById(TableName & "-grid"), TableOptionLink.innerText, TableName
        End If
        
    Next TableOptionLink
    
End Sub

Sub ProcessTable(HTMLTable As MSHTML.IHTMLElement, DisplayName As String, TableName As String)

    Dim HTMLTable1 As MSHTML.IHTMLElement
    Dim TableSection As MSHTML.IHTMLElement
    Dim TableRows As MSHTML.IHTMLElementCollection
    Dim TableRow As MSHTML.IHTMLElement
    Dim TableCell As MSHTML.IHTMLElement
    Dim A1 As MSHTML.IHTMLElement
    Dim A2 As MSHTML.IHTMLElement
    
    Dim RowNum As Long, ColNum As Integer
    
    Worksheets.Add
    ActiveSheet.Name = DisplayName
    Range("A1").Value = DisplayName
    RowNum = 2
    
    For Each TableSection In HTMLTable.Children
        
        If LCase(TableSection.tagName) <> "tfoot" Then

            If LCase(TableSection.tagName) = "thead" Then
            Set TableRows = TableSection.getElementsByTagName(TableName & "-general-header")
            
            ElseIf LCase(TableSection.tagName) = "tbody" Then
                Set TableRows = TableSection.Children
            
            End If
            
            For Each TableRow In TableRows
                ColNum = 1

                For Each TableCell In TableRow.Children
                    Cells(RowNum, ColNum).Value = TableCell.innerText
                    ColNum = ColNum + 1
                    
                Next TableCell
        
                RowNum = RowNum + 1
                
            Next TableRow
        End If
    Next TableSection

    Range("A1").CurrentRegion.EntireColumn.AutoFit
    Range("A1:B1").Font.Size = 12
    Range("A2", Range("A2").End(xlToRight)).Offset(-1, 0).Interior.Color = rgbDarkBlue
    Range("A2", Range("A2").End(xlToRight)).Interior.Color = rgbCornflowerBlue
    Range("A2", Range("A2").End(xlToRight).Offset(-1, 0)).Font.Color = rgbWhite
    Range("A2", Range("A2").End(xlToRight).Offset(-1, 0)).Font.Bold = True
    
End Sub

Sub DeleteOldSheets()

    Dim ws As Worksheet
    
    Application.DisplayAlerts = False
    
    For Each ws In ThisWorkbook.Worksheets
        
        If Not ws Is Menu Then ws.Delete
        
    Next ws
    
    Application.DisplayAlerts = True
    
End Sub

27 Jun 19 at 20:06

Hi Andrew

Thank you to share your knowledge

I try to text the code of video webscrapin nº 47.2 with the method of request get insted of Aplication Internet explorer and it dosent work at all (only capture the value of the tables section head and foot but the body dosent capture any value)

Do you know why is it?

Thanks

01 Jul 19 at 09:40

Hi Luis,

The most likely reason for this is that the website has removed the information for that particular tournament - this is one of the main problems with making examples for current events: it's inevitably out-of-date very quickly!

You can find lots of other examples of the same type of page on the same website such as this one https://www.oddschecker.com/golf/open-championship/2019-open-championship/winner which I've just tested and works in the same way as the example shown in the video.

I hope that helps!