Login Search

Excel based tutorial for web scraping

Last Post
#198

Default

You will need to play around with that one. Do a Google search for excel text splitting from space to bracket.

I was able to overcome my problem by using

Dim URL As Range


For Each URL In Sheets("Urls").Range("A1").Cells 'Sheet Urls is where I have the different urls to add on to the site.
ActiveWorkbook.Worksheets.Add
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;www.firstpartofsite.com/" & URL, Destination:=Range("$A$1"))
.Name = "table"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlOverwriteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = False
.RefreshPeriod = 0
.WebSelectionType = "1"
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = True
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With

Next
End Sub
#200

Default

Quote Originally Posted by b_rad_1983 View Post
a12 = Jorge De La Rosa (L)

=LEFT(A12,1) & ". " & MID(A12,SEARCH(" ", A12,1)+1,(SEARCH(" (",A12,SEARCH(" ",A12,1)+1)-1)-SEARCH(" ",A12,1))


b12 = J. De La Rosa
Thanks again for your help! This doesn't make any sense to me, i need to study it.
Last edited by oilcountry99; 06-09-14 at 01:23 PM.
#201

Default

Quote Originally Posted by b_rad_1983 View Post
I'm looking to import pitcher stats that include WHIP

I'm currently using http://espn.go.com/mlb/probables

But there is no walks, so I cannot use a formula to figure out WHIP stats.

Any suggestions of a better site than espn?

Or a way to figure out whip?
What about getting the statistics for the pitchers from ESPN stats pages... like this...

http://espn.go.com/mlb/stats/pitchin...se/order/false

You can loop through the "count" and store all the pitcher data in a sheet, then just look up that data from the Probables page you have by pitcher?

You'd just have to adjust the pitcher count and the qualified/false or qualified/true parameters and loop through that beast and you should have all the pitching data you can handle.
#202

Default

Quote Originally Posted by akphidelt View Post
What about getting the statistics for the pitchers from ESPN stats pages... like this...

http://espn.go.com/mlb/stats/pitchin...se/order/false

You can loop through the "count" and store all the pitcher data in a sheet, then just look up that data from the Probables page you have by pitcher?

You'd just have to adjust the pitcher count and the qualified/false or qualified/true parameters and loop through that beast and you should have all the pitching data you can handle.


On the probables page it all there and easy to get. The pitching link you posted, its over many of pages. I'm not advanced enough to take on this task.

I would love to make a sheet that has every pitcher in mlb and all thier stats.
#203

Default

Quote Originally Posted by b_rad_1983 View Post
On the probables page it all there and easy to get. The pitching link you posted, its over many of pages. I'm not advanced enough to take on this task.

I would love to make a sheet that has every pitcher in mlb and all thier stats.
I'll take a stab at it after lunch... might be a little rusty on VBA but it should come back.
#205

Default

Quote Originally Posted by b_rad_1983 View Post
Rusty or not. Point us in the direction and we should be able to help out!
This is super ghetto but it gets the job done on my end...

Code: [View]
Sub GetPitchingStats()

Dim startPitcher As Long
Dim lastPitcher As Long
Dim qualified As String
Dim lastRow As Long

startPitcher = 1
lastPitcher = 800
qualified = "false"
lastRow = 1

Application.ScreenUpdating = False

Do Until startPitcher > lastPitcher
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://espn.go.com/mlb/stats/pitching/_/count/" & startPitcher & "/qualified/" & qualified & "/order/false" _
        , Destination:=Range("$A$" & lastRow))
        .Name = "false"
        .FieldNames = True
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlEntirePage
        .WebFormatting = xlWebFormattingNone
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = False
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False
    End With
    
    'delete all blank cells in column R
    Columns("R:R").Select
    Selection.SpecialCells(xlCellTypeBlanks).Select
    Selection.EntireRow.Delete
    
    'start back at cell B1
    Range("B1").Select
    
    'add one to the last row so it starts on a blank row next time
    lastRow = Selection.End(xlDown).Row + 1
    
    'increase the pitcher count by 40
    startPitcher = startPitcher + 40
Loop

'a little cleanup
lastRow = Range("$B$5000").End(xlUp).Row
For i = 2 To lastRow
If Cells(i, 2) = "PLAYER" Then
    Rows(i & ":" & i).EntireRow.Delete
End If
Next i

Application.ScreenUpdating = True

End Sub
#206

Default

Quote Originally Posted by akphidelt View Post
This is super ghetto but it gets the job done on my end...

Code: [View]
Sub GetPitchingStats()

Dim startPitcher As Long
Dim lastPitcher As Long
Dim qualified As String
Dim lastRow As Long

startPitcher = 1
lastPitcher = 800
qualified = "false"
lastRow = 1

Application.ScreenUpdating = False

Do Until startPitcher > lastPitcher
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://espn.go.com/mlb/stats/pitching/_/count/" & startPitcher & "/qualified/" & qualified & "/order/false" _
        , Destination:=Range("$A$" & lastRow))
        .Name = "false"
        .FieldNames = True
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlEntirePage
        .WebFormatting = xlWebFormattingNone
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = False
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False
    End With
    
    'delete all blank cells in column R
    Columns("R:R").Select
    Selection.SpecialCells(xlCellTypeBlanks).Select
    Selection.EntireRow.Delete
    
    'start back at cell B1
    Range("B1").Select
    
    'add one to the last row so it starts on a blank row next time
    lastRow = Selection.End(xlDown).Row + 1
    
    'increase the pitcher count by 40
    startPitcher = startPitcher + 40
Loop

'a little cleanup
lastRow = Range("$B$5000").End(xlUp).Row
For i = 2 To lastRow
If Cells(i, 2) = "PLAYER" Then
    Rows(i & ":" & i).EntireRow.Delete
End If
Next i

Application.ScreenUpdating = True

End Sub

I had to add Dim i As Integer, then it worked great!

Thanks!
#210

Default

Quote Originally Posted by b_rad_1983 View Post
In the time it took you to do that, how long would it take to accomplish it in php?
Just to get that data would be about 10-15 minutes. Would be another 10-15 minutes if I wanted to set up a table in the DB to store it.

The beauty of it though, you could scrape every games data, store it and calculate that data yourself if you wanted to.