Cumulative average with conditional values

New to R. Little reputation of my df:

PTS_TeamHome <- c(101,87,94,110,95)
PTS_TeamAway <- c(95,89,105,111,121)
TeamHome <- c("LAL", "HOU", "SAS", "MIA", "LAL")
TeamAway <- c("IND", "LAL", "LAL", "HOU", "NOP")
df <- data.frame(cbind(TeamHome, TeamAway,PTS_TeamHome,PTS_TeamAway))
df

TeamHome TeamAway PTS_TeamHome PTS_TeamAway
  LAL      IND          101           95
  HOU      LAL           87           89
  SAS      LAL           94          105
  MIA      HOU          110          111
  LAL      NOP           95          121

Imagine that this is the first four games of the season with 1230 games. I want to calculate the total points per game (on average) at any given time for the home team and the guest team.

The result will look like this:

  TeamHome TeamAway PTS_TeamHome PTS_TeamAway HOMETEAM_AVGCUMPTS ROADTEAM_AVGCUMPTS
1  LAL      IND          101           95                101                 95
2  HOU      LAL           87           89                 87                 95
3  SAS      LAL           94          105                 94              98.33
4  MIA      HOU          110          111                110                 99
5  LAL      NOP           95          121               97.5                121

Please note that the formula for the fifth game is for the home team. Since LAL is a home team, it looks for how many points the LAL has when played at home or on the go. In this case (101 + 89 + 105 + 95) / 4 = 97.5

Here is what I tried without much success:

lst <- list()
for(i in 1:nrow(df)) lst[[i]] <- ( cumsum(df[which(df$TEAM1[1:i]==df$TEAM1[i]),df$PTS_TeamAway,0]) 
                                 + cumsum(df[which(df$TEAM2[1:i]==df$TEAM1[i]),df$PTS_TeamHome,0]) ) 
                             / #divided by number of games
  df$HOMETEAM_AVGCUMPTS <- unlist(lst)

I wanted to calculate the cumulative PTS and then the number of games to separate it, but none of this worked.

+4
source share
4
lst <- list()
for(i in 1:nrow(df)) lst[[i]] <- mean(c(df$PTS_TeamHome[1:i][df$TeamHome[1:i] == df$TeamHome[i]],
                                        df$PTS_TeamAway[1:i][df$TeamAway[1:i] == df$TeamHome[i]]))
df$HOMETEAM_AVGCUMPTS <- unlist(lst)


lst2 <- list()
for(i in 1:nrow(df)) lst2[[i]] <- mean(c(df$PTS_TeamAway[1:i][df$TeamAway[1:i] == df$TeamAway[i]],
                                        df$PTS_TeamHome[1:i][df$TeamHome[1:i] == df$TeamAway[i]]))
df$ROADTEAM_AVGCUMPTS <- unlist(lst2)


df
#   TeamHome TeamAway PTS_TeamHome PTS_TeamAway HOMETEAM_AVGCUMPTS ROADTEAM_AVGCUMPTS
# 1      LAL      IND          101           95                101                 95
# 2      HOU      LAL           87           89                 87                 95
# 3      SAS      LAL           94          105                 94           98.33333
# 4      MIA      HOU          110          111                110                 99
# 5      LAL      NOP           95          121               97.5                121

. . mean(c(vec1,vec2)).

- , , ( col1, col3), - , , ( col2, pts col4). for, , . df$PTS_TeamHome[1:i] , . [df$TeamHome[1:i] == df$TeamHome[i]]. " " TeamHome " , , ". , "" .


stringsAsFactors FALSE. numeric. . .

PTS_TeamHome <- c(101,87,94,110,95)
PTS_TeamAway <- c(95,89,105,111,121)
TeamHome <- c("LAL", "HOU", "SAS", "MIA", "LAL")
TeamAway <- c("IND", "LAL", "LAL", "HOU", "NOP")
df <- data.frame(cbind(TeamHome, TeamAway,PTS_TeamHome,PTS_TeamAway), stringsAsFactors=F)
df[3:4] <- lapply(df[3:4], function(x) as.numeric(x))
+2

, tidier format : . , / .

library(dplyr)
library(tidyr)

df %>%
  mutate(game = row_number()) %>%
  gather(location, team, TeamHome, TeamAway) %>%
  gather(location2, points, PTS_TeamHome, PTS_TeamAway) %>%
  filter(
    (location == "TeamHome" & location2 == "PTS_TeamHome") | 
      (location == "TeamAway" & location2 == "PTS_TeamAway")
  ) %>%
  select(-location2) %>%
  arrange(game) %>%
  group_by(team) %>%
  mutate(run_mean_points = cummean(points))

# note that cbind() is removed.

df <- data.frame(TeamHome, TeamAway,PTS_TeamHome,PTS_TeamAway, stringsAsFactors = FALSE)

Source: local data frame [10 x 5]
Groups: team

   game location team points run_mean_points
1     1 TeamHome  LAL    101       101.00000
2     1 TeamAway  IND     95        95.00000
3     2 TeamHome  HOU     87        87.00000
4     2 TeamAway  LAL     89        95.00000
5     3 TeamHome  SAS     94        94.00000
6     3 TeamAway  LAL    105        98.33333
7     4 TeamHome  MIA    110       110.00000
8     4 TeamAway  HOU    111        99.00000
9     5 TeamHome  LAL     95        97.50000
10    5 TeamAway  NOP    121       121.00000
+3

, ( ). , for . , , , ( Rs), -

## Transpose the data once
tempdf <- t(df)     
## Create transposed matrix with future column names
mat <- matrix(NA, 2, nrow(df))
rownames(mat) <- c("HOMETEAM_AVGCUMPTS", "ROADTEAM_AVGCUMPTS")    
## Create a vector of unique team names
indx <- as.character(unique(unlist(df[1:2])))
## Run the loop only over the unique team names
for (i in indx) {
  indx2 <- tempdf[1:2, ] == i               
  temp <- tempdf[3:4, ][indx2]
  mat[indx2] <- cumsum(temp)/seq_along(temp)
}
## Combine result with the original data
cbind(df, t(mat))
#   TeamHome TeamAway PTS_TeamHome PTS_TeamAway HOMETEAM_AVGCUMPTS ROADTEAM_AVGCUMPTS
# 1      LAL      IND          101           95              101.0           95.00000
# 2      HOU      LAL           87           89               87.0           95.00000
# 3      SAS      LAL           94          105               94.0           98.33333
# 4      MIA      HOU          110          111              110.0           99.00000
# 5      LAL      NOP           95          121               97.5          121.00000
+3

Transpose. : @DavidArenburg :

sv <- t(df[3:4])
tv <- t(df[1:2])
df[c("homeavg","awayavg")] <- t(ave(sv,tv,FUN=cummean))

cummeancomes from library(dplyr); you can disable it for the basic R-counterpart, if necessary; and similarly for column names.


Or alternating. All permutations above are difficult to give. Instead, you can alternate vectors using the Arun approach :

interleave <- function(a,b) c(a,b)[order(c(seq_along(a), seq_along(b)))]
unleave    <- function(x) split(x,1:2)

sv2 <- interleave(df$PTS_TeamHome,df$PTS_TeamAway)
tv2 <- interleave(df$TeamHome,df$TeamAway)

df[c("homeavg","awayavg")] <- unleave(ave(sv2,tv2,FUN=cummean))
+3
source

All Articles