MSXML2.XMLHTTP not working

3.4k Views Asked by At

I don't know what's going on behind the scenes with these objects, but I use them all the time with great success.

dim ie As New SHDocVw.InternetExplorer
ie.navigate url
Set oDoc = ie.Document
iT = oDoc.body.innerText

AND

dim oX As New MSXML2.XMLHTTP
dim oDoc As HTMLDocument
oX.Open "GET", url, False
oX.send
rT = oX.responseText

But for one particular webpage the ie object hangs, creates errors, makes my hard-drive thrash and frustrates me completely.

I like the MSXML2 object because it's always fast and trouble-free. The problem on this particular webpage is it is not returning the right info. It seems like it is getting the wrong page or it's not getting the whole thing.

My minimum requirement is getting the entire html for the page -- I can parse from there.

Here is an example of the pages I'm trying to get: http://www.nfl.com/gamecenter/2011090800/2011/REG1/saints@packers?icampaign=GC_schedule_rr#menu=highlights&tab=analyze&analyze=playbyplay

My ambition is to get them all, back to about 2001 when they started keeping the play-by-play record. Something about the technology they're using is upsetting ie and blocking msxml2.

Any suggestions to get me going in the right direction?

1

There are 1 best solutions below

0
On

This isn't the sexiest solution but here you go:

Option Explicit

Sub fantasyFootball_egghead()
Const READYSTATE_COMPLETE = 4
Const tempDir As String = "C:\Windows\Temp\"

Dim URL$, s_outerhtml$ ''These are strings
Dim IE As Object, IE_Element As Object, IE_HTMLCollection As Object
Dim i_file% ''This is an integer
Dim blnSheetFnd As Boolean
Dim ws As Excel.Worksheet

''Enter your address to navigate to here
URL = "http://www.nfl.com/gamecenter/2011090800/2011/REG1/saints@packers?icampaign=GC_schedule_rr#menu=highlights&tab=analyze&analyze=playbyplay"

''Create an Internet Explorer object if it doesn't exist
If IE Is Nothing Then Set IE = CreateObject("InternetExplorer.Application")

''Make the window visible with true, hidden with false
IE.Visible = True
''navigate to the website
IE.Navigate URL

'' use this loop to make wait until the webpage has loaded
Do While IE.Busy Or IE.readyState <> READYSTATE_COMPLETE
  DoEvents
Loop

''The next line helps ensure that the html has been fully loaded
Application.Wait Now() + TimeValue("0:00:02")
s_outerhtml = IE.document.body.OuterHtml
i_file = FreeFile

''This is a modification of some code I found at www.tek-tips.com <--great resource
''the code saves a temporary copy of the webpage to your temp file
Open tempDir & "\tempFile.htm" For Output As #i_file
Print #i_file, s_outerhtml

Close #i_file

''Creating a "Data" sheet if it doesn't exist
For Each ws In ThisWorkbook.Worksheets
  If ws.Name = "Data" Then blnSheetFnd = True: Exit For
Next

If blnSheetFnd = False Then Sheets.Add: ActiveSheet.Name = "Data"

Sheets("Data").Cells.Clear

''Here is your webquery, using the temporary file as its source
''this is untested in 2003, if it errors out, record a macro
''and replace the property that throws the error with your recorded property
With Sheets("Data").QueryTables.Add(Connection:= _
   "URL;" & tempDir & "tempFile.htm" _
   , Destination:=Range("$A$1"))
   .Name = "Data"
   .FieldNames = True
   .RowNumbers = False
   .FillAdjacentFormulas = False
   .PreserveFormatting = True
   .RefreshOnFileOpen = False
   .BackgroundQuery = True
   .RefreshStyle = xlInsertDeleteCells
   .SavePassword = False
   .SaveData = True
   .AdjustColumnWidth = True
   .RefreshPeriod = 0
   .WebSelectionType = xlEntirePage
   .WebFormatting = xlWebFormattingAll
   .WebPreFormattedTextToColumns = True
   .WebConsecutiveDelimitersAsOne = True
   .WebSingleBlockTextImport = False
   .WebDisableDateRecognition = False
   .WebDisableRedirections = False
   .Refresh BackgroundQuery:=False
End With

''delete the temporary file
Kill tempDir & "\tempFile.htm"

IE.Quit
Set IE = Nothing
Set IE_HTMLCollection = Nothing

End Sub

If you put this in a loop, just make sure you delete out the query tables, otherwise excel will stop running when there are too many connections.

Sub delete_qryTables()
Dim qt As QueryTable
Dim qts As QueryTables

Set qts = ThisWorkbook.Worksheets("Data").QueryTables
For Each qt In qts
    qt.Delete
Next

End Sub