#!/bin/bash
##Author -- Andy Addington
##Retrosheet script - automated downloading and parsing of retrosheet information into a local MySQL database
##Enter mysql username at the bottom of the page, enter password at prompt.
##Line 19 and 20 are decade variables for downloading event files from retrosheet.org
##Line 36 and 37 correspond to the earliest and latest years that are to be parsed
##After everything is configured properly, open a terminal in the same directory of this file and type in "chmod +x " + name_of_file
##./name_of_file to run
##Written and tested with BASH_VERSION = 4.2.10(1)-release.  Type "echo $BASH_VERSION" in terminal to check.  Anything version higher than 3.0+ should run fine.
mkdir ~/retrosheet
cd retrosheet
mkdir TEMP
cd TEMP
wget http://downloads.sourceforge.net/project/chadwick/chadwick-0.5/0.5.3/chadwick-0.5.3.tar.gz?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fchadwick%2Ffiles%2F&ts=1332029020&use_mirror=iweb
unzip chadwick*.zip
cd chadwick*
./configure
make
make install
make clean
ln -s /usr/lib/local/chadwick.so.0 /usr/lib/chadwicky.so.0
rm *
cd ../
rmdir chadwick*
starteve=1920  ##earliest decade for retrosheet data
endeve=2010    ##latest decade for retrosheet data
wget http://www.retrosheet.org/events/{$starteve..$endeve..10}seve.zip
#wget http://www.chancesis.com/files/chadwick_batch.zip
wget http://www.chancesis.com/files/sql_load.zip
find . -name "*.zip" -exec unzip {} \; -exec /bin/rm {} \;
mv sql_load/*.sql ~/retrosheet/TEMP
##mv chadwick_batch/*.bat ~/retrosheet/TEMP
##rmdir chadwick_batch
rmdir sql_load
rm 02\ load\ events.sql
rm 03\ load\ games.sql
rm 04\ load\ subs.sql
#find -type f name *.bat -exec sed 's/C:\\Retrosheet\common\\programs\\//g' {} \;
#find -type f name *.bat -exec sed 's/C:\\Retrosheet\\data\\parsed\\//g' {} \;
#find -type f name *.sql -exec sed 's/C:\\Retrosheet\\data\\parsed\\//g' {} \;
rename 'y/A-Z/a-z/' *
x=1950 #earliest year to parse data with chadwick and create sql files
y=2011 #latest year to parse data with chadwick and create sql files
for (( i=$x; i<=$y; i++)); do cwevent -f 0-96 -x 0-60 -y "$i" "$i"*.ev* > all"$i".csv; done
for (( i=$x; i<=$y; i++)); do cwgame -f 0-83 -y "$i" "$i"*.ev* > games"$i".csv; done
for (( i=$x; i<=$y; i++)); do cwsub -f 0-9 -y "$i" "$i"*.ev* > sub"$i".csv; done
#chmod +x cwevent_batch.bat
#chmod +x cwgame_batch.bat
#chmod +x cwsub_batch.bat
#./cwevent_batch.bat
#./cwgame_batch.bat
#./cwsub_batch.bat
##copy all created .csv files to the main sql directory
sudo cp *.csv /var/lib/mysql/retrosheet/
##SQL structure and import files are based on recommendations from Colin Wyers (http://www.hardballtimes.com/main/article/building-a-retrosheet-database-the-short-form/) and this post at chancesis.com (http://www.chancesis.com/2010/10/27/building-a-retrosheet-database-part-1/)
for (( i=$x; i<=$y; i++)); do echo load data infile \"all"$i".csv\" into table events_bck fields terminated by \',\' optionally enclosed by \'\"\'\(GAME_ID,AWAY_TEAM_ID,INN_CT,BAT_HOME_ID,OUTS_CT,BALLS_CT,STRIKES_CT,PITCH_SEQ_TX,AWAY_SCORE_CT,HOME_SCORE_CT,BAT_ID,BAT_HAND_CD,RESP_BAT_ID,RESP_BAT_HAND_CD,PIT_ID,PIT_HAND_CD,RES_PIT_ID,RES_PIT_HAND_CD,POS2_FLD_ID,POS3_FLD_ID,POS4_FLD_ID,POS5_FLD_ID,POS6_FLD_ID,POS7_FLD_ID,POS8_FLD_ID,POS9_FLD_ID,BASE1_RUN_ID,BASE2_RUN_ID,BASE3_RUN_ID,EVENT_TX,LEADOFF_FL,PH_FL,BAT_FLD_CD,BAT_LINEUP_ID,EVENT_CD,BAT_EVENT_FL,AB_FL,H_CD,SH_FL,SF_FL,EVENT_OUTS_CT,DP_FL,TP_FL,RBI_CT,WP_FL,PB_FL,FLD_CD,BATTEDBALL_CD,BUNT_FL,FOUL_FL,BATTEDBALL_LOC_TX,ERR_CT,ERR1_FLD_CD,ERR1_CD,ERR2_FLD_CD,ERR2_CD,ERR3_FLD_CD,ERR3_CD,BAT_DEST_ID,RUN1_DEST_ID,RUN2_DEST_ID,RUN3_DEST_ID,BAT_PLAY_TX,RUN1_PLAY_TX,RUN2_PLAY_TX,RUN3_PLAY_TX,RUN1_SB_FL,RUN2_SB_FL,RUN3_SB_FL,RUN1_CS_FL,RUN2_CS_FL,RUN3_CS_FL,RUN1_PK_FL,RUN2_PK_FL,RUN3_PK_FL,RUN1_RESP_PIT_ID,RUN2_RESP_PIT_ID,RUN3_RESP_PIT_ID,GAME_NEW_FL,GAME_END_FL,PR_RUN1_FL,PR_RUN2_FL,PR_RUN3_FL,REMOVED_FOR_PR_RUN1_ID,REMOVED_FOR_PR_RUN2_ID,REMOVED_FOR_PR_RUN3_ID,REMOVED_FOR_PH_BAT_ID,REMOVED_FOR_PH_BAT_FLD_CD,PO1_FLD_CD,PO2_FLD_CD,PO3_FLD_CD,ASS1_FLD_CD,ASS2_FLD_CD,ASS3_FLD_CD,ASS4_FLD_CD,ASS5_FLD_CD,EVENT_ID,HOME_TEAM_ID,BAT_TEAM_ID,FLD_TEAM_ID,BAT_LAST_ID,INN_NEW_FL,INN_END_FL,START_BAT_SCORE_CT,START_FLD_SCORE_CT,INN_RUNS_CT,GAME_PA_CT,INN_PA_CT,PA_NEW_FL,PA_TRUNC_FL,START_BASES_CD,END_BASES_CD,BAT_START_FL,RESP_BAT_START_FL,PIT_START_FL,RESP_PIT_START_FL,RUN1_FLD_CD,RUN1_LINEUP_ID,RUN1_ORIGIN_EVENT_ID,RUN2_FLD_CD,RUN2_LINEUP_ID,RUN2_ORIGIN_EVENT_ID,RUN3_FLD_CD,RUN3_LINEUP_ID,RUN3_ORIGIN_EVENT_ID,RUN1_RESP_CATCH_ID,RUN2_RESP_CATCH_ID,RUN3_RESP_CATCH_ID,PA_BALL_CT,PA_CALLED_BALL_CT,PA_INTENT_BALL_CT,PA_PITCHOUT_BALL_CT,PA_HIT_BALL_CT,PA_OTHER_BALL_CT,PA_STRIKE_CT,PA_CALLED_STRIKE_CT,PA_SWINGMISS_STRIKE_CT,PA_FOUL_STRIKE_CT,PA_BIP_STRIKE_CT,PA_OTHER_STRIKE_CT,EVENT_RUNS_CT,FLD_ID,BASE2_FORCE_FL,BASE3_FORCE_FL,BASE4_FORCE_FL,BAT_SAFE_ERR_FL,BAT_FATE_ID,RUN1_FATE_ID,RUN2_FATE_ID,RUN3_FATE_ID,FATE_RUNS_CT,ASS6_FLD_CD,ASS7_FLD_CD,ASS8_FLD_CD,ASS9_FLD_CD,ASS10_FLD_CD,UNKNOWN_OUT_EXC_FL,UNCERTAIN_PLAY_EXC_FL\)\; >> 02\ load\ events.sql; done
for (( i=$x; i<=$y; i++)); do echo load data infile \"games"$i".csv\" into table games_bck fields terminated by \',\' optionally enclosed by \'\"\' \(GAME_ID,
GAME_DT,
GAME_CT,
GAME_DY,
START_GAME_TM,
DH_FL,
DAYNIGHT_PARK_CD,
AWAY_TEAM_ID,
HOME_TEAM_ID,
PARK_ID,
AWAY_START_PIT_ID,
HOME_START_PIT_ID,
BASE4_UMP_ID,
BASE1_UMP_ID,
BASE2_UMP_ID,
BASE3_UMP_ID,
LF_UMP_ID,
RF_UMP_ID,
ATTEND_PARK_CT,
SCORER_RECORD_ID,
TRANSLATOR_RECORD_ID,
INPUTTER_RECORD_ID,
INPUT_RECORD_TS,
EDIT_RECORD_TS,
METHOD_RECORD_CD,
PITCHES_RECORD_CD,
TEMP_PARK_CT,
WIND_DIRECTION_PARK_CD,
WIND_SPEED_PARK_CT,
FIELD_PARK_CD,
PRECIP_PARK_CD,
SKY_PARK_CD,
MINUTES_GAME_CT,
INN_CT,
AWAY_SCORE_CT,
HOME_SCORE_CT,
AWAY_HITS_CT,
HOME_HITS_CT,
AWAY_ERR_CT,
HOME_ERR_CT,
AWAY_LOB_CT,
HOME_LOB_CT,
WIN_PIT_ID,
LOSE_PIT_ID,
SAVE_PIT_ID,
GWRBI_BAT_ID,
AWAY_LINEUP1_BAT_ID,
AWAY_LINEUP1_FLD_CD,
AWAY_LINEUP2_BAT_ID,
AWAY_LINEUP2_FLD_CD,
AWAY_LINEUP3_BAT_ID,
AWAY_LINEUP3_FLD_CD,
AWAY_LINEUP4_BAT_ID,
AWAY_LINEUP4_FLD_CD,
AWAY_LINEUP5_BAT_ID,
AWAY_LINEUP5_FLD_CD,
AWAY_LINEUP6_BAT_ID,
AWAY_LINEUP6_FLD_CD,
AWAY_LINEUP7_BAT_ID,
AWAY_LINEUP7_FLD_CD,
AWAY_LINEUP8_BAT_ID,
AWAY_LINEUP8_FLD_CD,
AWAY_LINEUP9_BAT_ID,
AWAY_LINEUP9_FLD_CD,
HOME_LINEUP1_BAT_ID,
HOME_LINEUP1_FLD_CD,
HOME_LINEUP2_BAT_ID,
HOME_LINEUP2_FLD_CD,
HOME_LINEUP3_BAT_ID,
HOME_LINEUP3_FLD_CD,
HOME_LINEUP4_BAT_ID,
HOME_LINEUP4_FLD_CD,
HOME_LINEUP5_BAT_ID,
HOME_LINEUP5_FLD_CD,
HOME_LINEUP6_BAT_ID,
HOME_LINEUP6_FLD_CD,
HOME_LINEUP7_BAT_ID,
HOME_LINEUP7_FLD_CD,
HOME_LINEUP8_BAT_ID,
HOME_LINEUP8_FLD_CD,
HOME_LINEUP9_BAT_ID,
HOME_LINEUP9_FLD_CD,
AWAY_FINISH_PIT_ID,
HOME_FINISH_PIT_ID\)\; >> 03\ load\ games.sql; done
for (( i=$x; i<=$y; i++)); do echo load data infile \"sub"$i".csv\" into table subs fields terminated by \',\' optionally enclosed by \'\"\' \( GAME_ID,INN_CT,BAT_HOME_ID ,SUB_ID ,SUB_HOME_ID ,SUB_LINEUP_ID ,SUB_FLD_CD ,REMOVED_ID ,REMOVED_FLD_CD ,EVENT_ID \)\; >> 04\ subs\ events.sql; done
user="user"  ##Enter user name here
pass="pass" ##Enter password here
host="localhost" ##Enter host here. Probably doesn't need to be changed
mysqladmin -u $user -h $host -p$pass create retrosheet
mysql -u $user -p$pass -h $host retrosheet < 01\ create\ structure.sql
mysql -u $user -p$pass -h $host retrosheet < 02\ load\ events.sql
mysql -u $user -p$pass -h $host retrosheet < 03\ load\ games.sql 
mysql -u $user -p$pass -h $host retrosheet < 04\ load\ subs.sql
mysql -u $user -p$pass -h $host retrosheet < partition.sql
mysql -u $user -p -h localhost retrosheet < lookup\ codes.sql
sudo rm *.csv /var/lib/mysql/retrosheet/ ##/path/to/mysql/retrosheet
##If the creation of the database was successful, its safe to delete all the event files (.evn, .eva, .evd, etc...).  In case the script didn't complete the process, the event files will still be available after the script runs, thus saving one the trouble of downloading everything again.
