OK this pulls stats from yesterday's box scores:
and output looks like this:
Can someone recommend a good method for bringing the csv files into a database?
Is there a clear favorite between relational and nosql? I was thinking I might want to learn couchdb, but id most likely just end up using mysql
Also that's my first program ever so if anyone has suggestions to make the code
a. better
b. easier to read
i'd really appreciate it
thanks
Code:
from BeautifulSoup import BeautifulSoup, SoupStrainer import urllib2, re, time from datetime import date, timedelta dateYest=date.today() - timedelta(1) nhlDateYest=dateYest.strftime("%Y-%m-%d") nhlScoresWeb="http://sports.yahoo.com/nhl/scoreboard?d="+nhlDateYest page = urllib2.urlopen(nhlScoresWeb).read() soup = BeautifulSoup(page) #iterate through gameIds and Scrape box at same time for b in soup.findAll('a', href=re.compile('/nhl/boxscore')): url = b['href'] gid= url[-10:] g = open(gid+".csv", "w") g.write(nhlDateYest+','+gid+',') g.write("\n") fullUrl = "http://sports.yahoo.com" + str(url) boxurl = urllib2.urlopen(fullUrl).read() boxsoup = BeautifulSoup(boxurl) #FindAwayTeamName re1='(awayTeamName)' re2='.*?' re3='(\\\'.*?\\\')' rg = re.compile(re1+re2+re3) m = rg.search(boxurl) awayteam=m.group(2) #FindAwayScore re4='(awayTeamScore)' re5='.*?' re6='(\\\'.*?\\\')' rg = re.compile(re4+re5+re6) m = rg.search(boxurl) awayscore=m.group(2) g.write(awayteam+", "+awayscore+",") g.write("\n") #FindHomeTeamName re1='(homeTeamScore)' re2='.*?' re3='(\\\'.*?\\\')' rg = re.compile(re1+re2+re3) m = rg.search(boxurl) hometeam=m.group(2) #FindAwayScore re4='(homeTeamName)' re5='.*?' re6='(\\\'.*?\\\')' rg = re.compile(re4+re5+re6) m = rg.search(boxurl) homescore=m.group(2) g.write(hometeam+", "+homescore+",") #Scrape Team Stats t = boxsoup.findAll('div', id = "ysp-reg-box-team_stats") for table in t: rows = table.findAll('tr') for tr in rows: cols = tr.findAll('td') for td in cols: g.write(td.find(text=True)) g.write(",") g.write("\n")
Code:
2011-10-11,2011101114, 'Minnesota', '3', 'Ottawa', '4', 27,44, 12,16, 2,13, 10,13, 3,2, 2,3, 1,1, 50%,33%, 50%,33%, 15,21, 34,42, 45%,55%, 45%,55%, 31,28, 21,21,
Is there a clear favorite between relational and nosql? I was thinking I might want to learn couchdb, but id most likely just end up using mysql
Also that's my first program ever so if anyone has suggestions to make the code
a. better
b. easier to read
i'd really appreciate it
thanks