#change this to point to the directory where you are storing your event files:
pbpDir <- "XXX\\2012eve"
#change this to point to the directory where you are storing your game log files:
glDir <- "XXX\\gamelogs"
#set this to the directory you want to print your results to:
resultsDir <- "XXX"
gameLogFile <- "gl2012.txt"
teamFile <- "TEAM2012"

#baseRun function definition:
baseRuns <- function(A,B,C,D) {
  return( (A*B / (B+C) ) +D)
}


################################################################################
setwd(glDir)
#This contains the Box Score Data For all games in a season: 
gameLogs <- read.csv(gameLogFile,header=FALSE,stringsAsFactors=FALSE)

setwd(pbpDir)
#These are the play by play files. Each file contains a single team's home game 
#play by play statistics for an entire season. Note that there is no redundancy - 
#the same team's away game stats are located in the [home] file for the opponent.
#Strategy is to read in all pbp files, and pool all games into a single season.
dataFiles <- list.files()
pbpFiles <- c(grep("EVA", dataFiles), grep("eva", dataFiles), grep("EVN", dataFiles), grep("evn", dataFiles) )
length(pbpFiles)
filter <- rep(FALSE, length(dataFiles))
filter[pbpFiles] <- TRUE
dataFiles <- dataFiles[filter]


#####################################################################################
# Extract all the stats needed to compute PE, and get inning by inning run counts:
#####################################################################################
#Compute total bases outside the loop, since this is easy enough:
AwayTB <- (gameLogs[,23] - (gameLogs[,24] + gameLogs[,25] + gameLogs[,26])) + 2*gameLogs[,24] + 3*gameLogs[,25] + 4*gameLogs[,26]
HomeTB <- (gameLogs[,51] - (gameLogs[,52] + gameLogs[,53] + gameLogs[,54])) + 2*gameLogs[,52] + 3*gameLogs[,53] + 4*gameLogs[,54]

#Additionally, we'll extract some stats for innings pitched, as well as the innnig by inning run counts-
#I like to have them in a: "X_X_X_X_X_X_X_X_X" format to work with
awayInningRuns <- c()
homeInningRuns <- c()
IPHome <- c()
IPAway <- c()
for(i in 1:nrow(gameLogs) ) {

  ########################################################################
  # Parse inning by inning run counts, each inning separated with a '_'
  # Note that lengths may differ for HT and AT (ie if HT wins and bats 8 inn)
  ########################################################################
  #Home:
  s <- strsplit(gameLogs[i,21] , "")[[1]]
  full <- c()
  j <- 1
  innPitched <- 0
  while( j <= length(s) ) {  
   if(s[j] == "(" ) {
     innPitched <- innPitched + 1
     FLAG <- FALSE
     if(j==1) {FLAG <- TRUE}
     j <- j+1
     tmp <- ""
     while(s[j] != ")" ) {
       tmp <- paste(tmp, s[j], sep="")
       j <- j+1
     }
     if(FLAG) { 
       full <- paste(full, tmp, sep="" )
     } else {     
       full <- paste(full, tmp, sep="_" )
     }
   } else if ( s[j] != "x" & j > 1) {
     innPitched <- innPitched + 1
     full <- paste(full, s[j], sep="_" )
   } else if ( s[j] != "x" & j == 1) {
     innPitched <- innPitched + 1
     full <- paste(full, s[j], sep="" )
   }
   j <- j+1
  }
  IPAway <- c(IPAway, innPitched)
  homeInningRuns <- c(homeInningRuns, full)
  #Away:
  s <- strsplit(gameLogs[i,20] , "")[[1]]
  full <- c()
  j <- 1
  innPitched <- 0
  while( j <= length(s) ) {
  
   if(s[j] == "(" ) {
     innPitched <- innPitched + 1
     FLAG <- FALSE
     if(j==1) {FLAG <- TRUE}
     j <- j+1
     tmp <- ""
     while(s[j] != ")" ) {
       tmp <- paste(tmp, s[j], sep="")
       j <- j+1
     }
     if(FLAG) { 
       full <- paste(full, tmp, sep="" )
     } else {     
       full <- paste(full, tmp, sep="_" )
     }
   } else if ( s[j] != "x" & j > 1) {
     innPitched <- innPitched + 1
     full <- paste(full, s[j], sep="_" )
   } else if ( s[j] != "x" & j == 1) {
     innPitched <- innPitched + 1
     full <- paste(full, s[j], sep="" )
   }
   j <- j+1
  }
  IPHome <- c(IPHome, innPitched)
  awayInningRuns <- c(awayInningRuns, full)
    
}

#construct the data table:
PEStats_allGameLogs <- data.frame( as.numeric( gameLogs[,1]), as.character( gameLogs[,13]), as.character(gameLogs[,7]) ,  as.character(gameLogs[,104]), as.numeric(gameLogs[,11]) ,
                              as.numeric(gameLogs[,51]) , as.numeric(gameLogs[,54]) , as.numeric(gameLogs[,59]) , as.numeric(gameLogs[,63]) ,
                              as.numeric(gameLogs[,58]) , as.numeric(gameLogs[,64]) , as.numeric(HomeTB) , as.numeric(gameLogs[,60]) ,
                              as.numeric(gameLogs[,56]) , as.numeric(gameLogs[,57]) , as.numeric(gameLogs[,62]) , as.numeric(gameLogs[,50]) ,
                              as.numeric(gameLogs[,23]) , as.numeric(gameLogs[,31]) , as.numeric(gameLogs[,26]) , as.numeric(IPHome) , homeInningRuns , 

                                                          as.character(gameLogs[,4]) , as.character(gameLogs[,102]), as.numeric(gameLogs[,10]),
                              as.numeric(gameLogs[,23]) , as.numeric(gameLogs[,26]) , as.numeric(gameLogs[,31]) , as.numeric(gameLogs[,35]) ,
                              as.numeric(gameLogs[,30]) , as.numeric(gameLogs[,36]) , as.numeric(AwayTB) , as.numeric(gameLogs[,32] ),
                              as.numeric(gameLogs[,28]) ,as.numeric( gameLogs[,29]) , as.numeric(gameLogs[,34]) , as.numeric(gameLogs[,22]) ,
                              as.numeric(gameLogs[,51]) , as.numeric(gameLogs[,59]) , as.numeric(gameLogs[,54]) , as.numeric(IPAway) , awayInningRuns ) 
#label the columns:
colnames(PEStats_allGameLogs) <- c( "Date","DayNight","HomeTeam","Home_SP","Home_R",
                                    "Home_H","Home_HR","Home_BB","Home_CS",
                                    "Home_HBP","Home_GIDP","Home_TB","Home_IBB",
                                    "Home_SH","Home_SF","Home_SB","Home_AB",
                                    "Home_Ha","Home_BBa","Home_HRa","Home_IP", "Home_InningRuns",
                                              "AwayTeam","Away_SP","Away_R",
                                    "Away_H","Away_HR","Away_BB","Away_CS",
                                    "Away_HBP","Away_GIDP","Away_TB","Away_IBB",
                                    "Away_SH","Away_SF","Away_SB","Away_AB",
                                    "Away_Ha","Away_BBa","Away_HRa","Away_IP", "Away_InningRuns"    )
#attach the column naes, for ease of reference:
attach(PEStats_allGameLogs)
PEStats_allGameLogs$HomeTeam <- as.character(PEStats_allGameLogs$HomeTeam)
PEStats_allGameLogs$AwayTeam <- as.character(PEStats_allGameLogs$AwayTeam)
PEStats_allGameLogs$Home_SP <- as.character(PEStats_allGameLogs$Home_SP)
PEStats_allGameLogs$Away_SP <- as.character(PEStats_allGameLogs$Away_SP)


########################################################################
# Compute Pyth Exp for each team going in to the current game:
########################################################################
Home_RunningPE <- rep(0, nrow(PEStats_allGameLogs) )
Away_RunningPE <- rep(0, nrow(PEStats_allGameLogs) )
for(i in 1:nrow(PEStats_allGameLogs) ){
  #get the Home teams PE prior to this game:
  filterHome <- PEStats_allGameLogs$HomeTeam == PEStats_allGameLogs$HomeTeam[i]
  filterHome[(i):nrow(PEStats_allGameLogs)] <- FALSE 
  filterAway <- PEStats_allGameLogs$AwayTeam == PEStats_allGameLogs$HomeTeam[i]
  filterAway[(i):nrow(PEStats_allGameLogs)] <- FALSE 

  R <-  sum( as.numeric(PEStats_allGameLogs$Home_R[filterHome]) )  +  sum( as.numeric(PEStats_allGameLogs$Away_R[filterAway]) )
  Ra <-  sum( as.numeric(PEStats_allGameLogs$Away_R[filterHome]) )  +  sum( as.numeric(PEStats_allGameLogs$Home_R[filterAway]) )
  if( sum(filterHome) + sum(filterAway) != 0) Home_RunningPE[i] <- R^1.83 / ((R)^1.83+(Ra)^1.83) 

  #get the Home teams PE prior to this game:
  filterHome <- PEStats_allGameLogs$HomeTeam == PEStats_allGameLogs$AwayTeam[i]
  filterHome[(i):nrow(PEStats_allGameLogs)] <- FALSE 
  filterAway <- PEStats_allGameLogs$AwayTeam == PEStats_allGameLogs$AwayTeam[i]
  filterAway[(i):nrow(PEStats_allGameLogs)] <- FALSE 

  R <-  sum( as.numeric(PEStats_allGameLogs$Home_R[filterHome]) )  +  sum( as.numeric(PEStats_allGameLogs$Away_R[filterAway]) )
  Ra <-  sum( as.numeric(PEStats_allGameLogs$Away_R[filterHome]) )  +  sum( as.numeric(PEStats_allGameLogs$Home_R[filterAway]) )
  if( sum(filterHome) + sum(filterAway) != 0) Away_RunningPE[i] <- R^1.83 / ((R)^1.83+(Ra)^1.83) 
}


########################################################################
# Use current game statistics to derive BaseRuns stat for each team:
########################################################################
Home_BR <- c()
Away_BR <- c()
for(i in 1:dim(PEStats_allGameLogs)[1]) {
    A <- PEStats_allGameLogs$Home_H[i] + PEStats_allGameLogs$Home_BB[i] - PEStats_allGameLogs$Home_HR[i]
    B <- (1.4*PEStats_allGameLogs$Home_TB[i] - .6*PEStats_allGameLogs$Home_H[i] - 3*PEStats_allGameLogs$Home_HR[i] + .1*PEStats_allGameLogs$Home_BB[i])*1.02
    C <- PEStats_allGameLogs$Home_AB[i] - PEStats_allGameLogs$Home_H[i]
    D <- PEStats_allGameLogs$Home_HR[i]
    BR <- baseRuns(A,B,C,D)
    #Uncomment this line if you want to scale the BR to 9 innings:
    #BR <- BR * (9/PEStats_allGameLogs$Away_IP[i])
    Home_BR <- c(Home_BR, BR)

    A <- PEStats_allGameLogs$Away_H[i] + PEStats_allGameLogs$Away_BB[i] - PEStats_allGameLogs$Away_HR[i]
    B <- (1.4*PEStats_allGameLogs$Away_TB[i] - .6*PEStats_allGameLogs$Away_H[i] - 3*PEStats_allGameLogs$Away_HR[i] + .1*PEStats_allGameLogs$Away_BB[i])*1.02
    C <- PEStats_allGameLogs$Away_AB[i] - PEStats_allGameLogs$Away_H[i]
    D <- PEStats_allGameLogs$Away_HR[i]
    BR <- baseRuns(A,B,C,D)
    #Uncomment this line if you want to scale the BR to 9 innings:
    #BR <- BR * (9/PEStats_allGameLogs$Home_IP[i])
    Away_BR <- c(Away_BR, BR)
}

#Add the columns for PE and BR to the table:
PEStats_allGameLogs <- cbind(PEStats_allGameLogs, Home_RunningPE, Away_RunningPE, Home_BR, Away_BR )
setwd(resultsDir)
write.table(PEStats_allGameLogs, "ProcessedGameLogData.txt", sep="\t", quote=FALSE,row.names=FALSE)

