OK this pulls stats from yesterday's box scores:
Code:
from BeautifulSoup import BeautifulSoup, SoupStrainer
import urllib2, re, time
from datetime import date, timedelta
dateYest=date.today() - timedelta(1)
nhlDateYest=dateYest.strftime("%Y-%m-%d")
nhlScoresWeb="http://sports.yahoo.com/nhl/scoreboard?d="+nhlDateYest
page = urllib2.urlopen(nhlScoresWeb).read()
soup = BeautifulSoup(page)
#iterate through gameIds and Scrape box at same time
for b in soup.findAll('a', href=re.compile('/nhl/boxscore')):
url = b['href']
gid= url[-10:]
g = open(gid+".csv", "w")
g.write(nhlDateYest+','+gid+',')
g.write("\n")
fullUrl = "http://sports.yahoo.com" + str(url)
boxurl = urllib2.urlopen(fullUrl).read()
boxsoup = BeautifulSoup(boxurl)
#FindAwayTeamName
re1='(awayTeamName)'
re2='.*?'
re3='(\\\'.*?\\\')'
rg = re.compile(re1+re2+re3)
m = rg.search(boxurl)
awayteam=m.group(2)
#FindAwayScore
re4='(awayTeamScore)'
re5='.*?'
re6='(\\\'.*?\\\')'
rg = re.compile(re4+re5+re6)
m = rg.search(boxurl)
awayscore=m.group(2)
g.write(awayteam+", "+awayscore+",")
g.write("\n")
#FindHomeTeamName
re1='(homeTeamScore)'
re2='.*?'
re3='(\\\'.*?\\\')'
rg = re.compile(re1+re2+re3)
m = rg.search(boxurl)
hometeam=m.group(2)
#FindAwayScore
re4='(homeTeamName)'
re5='.*?'
re6='(\\\'.*?\\\')'
rg = re.compile(re4+re5+re6)
m = rg.search(boxurl)
homescore=m.group(2)
g.write(hometeam+", "+homescore+",")
#Scrape Team Stats
t = boxsoup.findAll('div', id = "ysp-reg-box-team_stats")
for table in t:
rows = table.findAll('tr')
for tr in rows:
cols = tr.findAll('td')
for td in cols:
g.write(td.find(text=True))
g.write(",")
g.write("\n")
and output looks like this:
Code:
2011-10-11,2011101114,
'Minnesota', '3',
'Ottawa', '4',
27,44,
12,16,
2,13,
10,13,
3,2,
2,3,
1,1,
50%,33%,
50%,33%,
15,21,
34,42,
45%,55%,
45%,55%,
31,28,
21,21,
Can someone recommend a good method for bringing the csv files into a database?
Is there a clear favorite between relational and nosql? I was thinking I might want to learn couchdb, but id most likely just end up using mysql
Also that's my first program ever so if anyone has suggestions to make the code
a. better
b. easier to read
i'd really appreciate it
thanks