Code:
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 30 20:10:35 2020
@author: Waterstpub87
"""
import numpy as np
import pandas as pd
years = ['2020']
for year in years:
schoolsurl = "https://www.sports-reference.com/cbb/seasons/" + year + "-school-stats.html"
schools = pd.read_html(schoolsurl)
df = schools[0]
df = df
[list(df)]
scl = df['Overall']
scl['School'] = scl['School'].str.replace('NCAA','')
scl['School'] = scl['School'].str.strip()
scl.index = scl['School']
scl['URL'] = scl['School']
scl['URL'] = scl['URL'].str.replace(' ','-')
scl['URL'] = scl['URL'].str.replace('.','')
scl['URL'] = scl['URL'].str.replace('&','')
scl['URL'] = scl['URL'].str.replace('(','')
scl['URL'] = scl['URL'].str.replace(')','')
scl['URL'] = scl['URL'].str.replace("'",'')
scl['URL'] = scl['URL'].str.replace("--",'-')
scl['URL'] = scl['URL'].str.lower()
scl['URL'] = scl['URL'].str.replace('little-rock','arkansas-little-rock')
scl['URL'] = scl['URL'].str.replace('uc-','california-')
scl['URL'] = scl['URL'].str.replace('university-of-california','california')
scl['URL'] = scl['URL'].str.replace('purdue-fort-wayne','ipfw')
scl['URL'] = scl['URL'].str.replace('fort-wayne','ipfw')
scl['URL'] = scl['URL'].str.replace('omaha','nebraska-omaha')
scl['URL'] = scl['URL'].str.replace('siu-edwardsville','southern-illinois-edwardsville')
scl['URL'] = scl['URL'].str.replace('texas-rio-grande-valley','texas-pan-american')
#scl['URL'] = scl['URL'].str.replace('vmi','virginia-military-institute')
scl['URL'] = scl['URL'].str.replace('cal-state-long-beach','long-beach-state')
scl.loc['Louisiana']['URL']='louisiana-lafayette'
scl.loc['VMI']['URL']='virginia-military-institute'
scl = scl[scl['School'] != 'Overall']
scl = scl[scl['School'] != 'School']
scl.index = scl['URL']
for x in scl['URL']:
try:
url = 'https://www.sports-reference.com/cbb/schools/' + x + '/' + year + '-gamelogs.html'
data = pd.read_html(url)
data = data[0]
data = data
[list(data)]
data['School1'] = scl.loc[x]['School']
if x == 'abilene-christian' and years[0]==year:
results = data
else:
results = results.append(data)
except:
pass
results.to_csv('CBBDB.csv')
cols = ['G','Date','Location','Opp','Results','P oints','Points Against','FG','FGA','FG%','3P','3PA','3P %','FT','FTA','FT%','ORB','TRB','AST','S TL','BLK','TOV','PF','Blank','OPPFP','OPFPA','OPFG%','OPP3P', 'OPP3PA','OPP3P%','OPPFT','OPPFTA','OPPF T%','OPPORB','OPPTRB','OPPAST','OPPSTL', 'OPPBLK','OPPTOV','OPPPF','School']
results.columns = cols
mid = results['School']
results.drop(labels=['School'], axis=1,inplace = True)
results.insert(2, 'School', mid)
results.drop(labels=['Blank'], axis=1,inplace = True)
results.drop(labels=['FG%'], axis=1,inplace = True)
results.drop(labels=['3P %'], axis=1,inplace = True)
results.drop(labels=['FT%'], axis=1,inplace = True)
results.drop(labels=['OPFG%'], axis=1,inplace = True)
results.drop(labels=['OPP3P%'], axis=1,inplace = True)
results.drop(labels=['OPPF T%'], axis=1,inplace = True)
results = results[results.Date != 'School']
results = results[results.Date != 'Date']
results= results.fillna(0)
counter = 6
cols = list(results)
while counter < 34:
column = cols[counter]
results[column] = results[column].astype(int)
counter = counter +1
results['Pace'] = (.50*(results['FGA'] + (.49*results['FTA']) + results['TOV'] - results['ORB'])) + (.50 * (results['OPFPA'] + (.49 * results['OPPFTA'])-results['OPPORB']+results['OPPTOV']))
results['School'] = results['School'].str.replace('Cal State Long Beach','Long Beach State')
results['School']= results['School'].str.replace('SIU Edwardsville','Southern Illinois-Edwardsville')
results['School']= results['School'].str.replace('VMI','Virginia Military Institute')
results['Opp']= results['Opp'].str.replace('UMBC','Maryland-Baltimore County')
results['Opp']= results['Opp'].str.replace('UNLV','Nevada-Las Vegas')
results['Opp']= results['Opp'].str.replace('Detroit','Detroit Mercy')
results['Opp']= results['Opp'].str.replace('BYU','Brigham Young')
results['Opp']= results['Opp'].str.replace('Southern Miss','Southern Mississippi')
results['Opp']= results['Opp'].str.replace('UTEP','Texas-El Paso')
results['Opp']= results['Opp'].str.replace('UTSA','Texas-San Antonio')
results['Opp']= results['Opp'].str.replace('UCF','Central Florida')
results['Opp']= results['Opp'].str.replace('LSU','Louisiana State')
results['Opp']= results['Opp'].str.replace('Ole Miss','Mississippi')
results['Opp']= results['Opp'].str.replace('LIU-Brooklyn','Long Island University')
results['Opp']= results['Opp'].str.replace('UMass-Lowell','Massachusetts-Lowell')
results['Opp']= results['Opp'].str.replace('California','University of California')
results['Opp']= results['Opp'].str.replace('USC','Southern California')
results['Opp']= results['Opp'].str.replace('UConn','Connecticut')
results['Opp']= results['Opp'].str.replace('UMass','Massachusetts')
results['Opp']= results['Opp'].str.replace('UCSB','UC-Santa Barbara')
results['Opp']= results['Opp'].str.replace('UNC Wilmington','North Carolina-Wilmington')
results['Opp']= results['Opp'].str.replace("St. Peter's","Saint Peter's")
results['Opp']= results['Opp'].str.replace('UNC Asheville','North Carolina-Asheville')
results['Opp']= results['Opp'].str.replace('NC State','North Carolina State')
results['Opp']= results['Opp'].str.replace('UNC','North Carolina')
results['Opp']= results['Opp'].str.replace('Central Connecticut','Central Connecticut State')
results['Opp']= results['Opp'].str.replace('UT-Martin','Tennessee-Martin')
results['Opp']= results['Opp'].str.replace('TCU','Texas Christian')
results['Opp']= results['Opp'].str.replace("Saint Mary's","Saint Mary's (CA)")
results['Opp']= results['Opp'].str.replace("Pitt","Pittsburgh")
results['Opp']= results['Opp'].str.replace("VCU","Virginia Commonwealth")
results['Opp']= results['Opp'].str.replace("UIC","Illinois-Chicago")
results['Opp']= results['Opp'].str.replace("SMU","Southern Methodist")
results['Opp']= results['Opp'].str.replace("Penn","Pennsylvania")
results['Opp']= results['Opp'].str.replace("USC Upstate","South Carolina Upstate")
results['Opp']= results['Opp'].str.replace("UMKC","Missouri-Kansas City")
results['Opp']= results['Opp'].str.replace("UNC Greensboro","North Carolina-Greensboro")
results['Opp']= results['Opp'].str.replace("St. Joseph's","Saint Joseph's")
results['Opp']= results['Opp'].str.replace("ETSU","East Tennessee State")
results['Opp']= results['Opp'].str.replace("Pennsylvania State","Penn State")
results['Opp']= results['Opp'].str.replace("North Carolina Greensboro","North Carolina-Greensboro")
results['Opp']= results['Opp'].str.replace("Southern California Upstate","South Carolina Upstate")
results['Opp']= results['Opp'].str.replace("University of California Baptist","California Baptist")
results['Opp']= results['Opp'].str.replace('SIU-Edwardsville','Southern Illinois-Edwardsville')
results['Opp']= results['Opp'].str.replace('VMI','Virginia Military Institute')
results.to_csv('CBBD'+year+'.csv')