Skip to content
Snippets Groups Projects
Commit eb2e9f18 authored by Mario Garrido Tapias's avatar Mario Garrido Tapias
Browse files

Distribution of positions of the 3 seasons

parent ff7681cc
Branches
No related tags found
No related merge requests found
......@@ -33,6 +33,49 @@ decimalplaces <- function(x) {
0)
}
# Returns the count of a variable from 3 seasons for each position with a bar plot.
plotFor3SeasonsForPos <- function(data, variable) {
par(mfrow = c(1, 3))
dfs <- list()
maxPInPos <- 0
for (i in 1:3) {
season <- switch(i, ".1718", ".1819", ".1920")
var <- select(data, contains(paste(variable, season, sep = "")))
GK <- var %>% filter(str_detect(var[,1], 'GK'))
numGK <- dim(GK)[1]
DF <- var %>% filter(str_detect(var[,1], 'DF'))
numDF <- dim(DF)[1]
MF <- var %>% filter(str_detect(var[,1], 'MF'))
numMF <- dim(MF)[1]
FW <- var %>% filter(str_detect(var[,1], 'FW'))
numFW <- dim(FW)[1]
df <- data.frame(
position = c("Porteros", "Defensas", "Mediocentros", "Delanteros"),
numberOf = c(numGK, numDF, numMF, numFW)
)
dfs[[i]] <- df
if(maxPInPos < max(df$numberOf)) {
maxPInPos <- max(df$numberOf)
}
}
for (i in 1:3) {
season <- switch(i, ".1718", ".1819", ".1920")
bp <- barplot(height = dfs[[i]]$numberOf, names = dfs[[i]]$position,
#col = rgb(0.8,0.1,0.1,0.6),
col = "#9BCD9B",
xlab = "Posiciones",
ylab = "Cantidad",
main= paste(c("Distribución posiciones ", season, "(Validación)"), sep = ""),
ylim = c(0, maxPInPos + 5)
)
prop <- round((dfs[[i]]$numberOf/sum(dfs[[i]]$numberOf))*100, 2)
cat(prop, "\n")
tabla <- table(rep(c("GK", "DF", "MF", "FW"), times = dfs[[i]]$numberOf))
}
}
#############
# LIBRERIAS #
#############
......@@ -205,6 +248,23 @@ for (name in playersIn2Clubs) {
rownames(rowsPIn2C) <- playersIn2Clubs
rowsPIn2C
######################
# VARIABLES CREATION #
######################
# AERIALS
aerials <- laLigaPlayers %>% select(all_of(c("aerials_won.1718", "aerials_lost.1718")))
aerials_contested <- rowSums(aerials)
laLigaPlayers <- cbind(laLigaPlayers, "aerials_contested.1718" = aerials_contested)
aerials <- laLigaPlayers %>% select(all_of(c("aerials_won.1819", "aerials_lost.1819")))
aerials_contested <- rowSums(aerials)
laLigaPlayers <- cbind(laLigaPlayers, "aerials_contested.1819" = aerials_contested)
aerials <- laLigaPlayers %>% select(all_of(c("aerials_won.1920", "aerials_lost.1920")))
aerials_contested <- rowSums(aerials)
laLigaPlayers <- cbind(laLigaPlayers, "aerials_contested.1920" = aerials_contested)
##############################################
# Limpieza muestras erroneas temporada 18-19 #
##############################################
......@@ -228,14 +288,15 @@ playersR.1819 <- playersR.1819 %>% select(all_of(c("player", "squad.1819", varia
# Ibai Gomez, Munir El Haddadi, Ruben Sobrino, Jeison Murillo, Facundo Roncaglia, Takashi Inui, Ruben Vezo
# 4, 8, 13, 66, 72, 84, 109
jugadoresRepes <- laLigaPlayers[rowsPIn2C[-c(1:4, 6:8),], ]
laLigaPlayerSplit <- laLigaPlayers[-rowsPIn2C[-c(1:4, 6:8),], ]
laLigaPlayers <- laLigaPlayers[-c(4, 8, 13, 66, 72, 109), ]
############################# ####################
# CONJUNTO de ENTRENAMIENTO # # CONJUNTO de TEST #
############################# ####################
set.seed(5682)
library(rsample)
jugadoresRepes <- laLigaPlayers[rowsPIn2C[-c(1:4, 6:8),], ]
laLigaPlayerSplit <- laLigaPlayers[-rowsPIn2C[-c(1:4, 6:8),], ]
car_split <- initial_split(laLigaPlayerSplit, prop = 8/10, strata = "position.1920")
train <- training(car_split)
train <- rbind(train, jugadoresRepes)
......@@ -244,6 +305,8 @@ test <- testing(car_split)
plotFor3SeasonsForPos(laLigaPlayers, "position")
plotFor3SeasonsForPos(train, "position")
plotFor3SeasonsForPos(test, "position")
# Tamaño: 1024x768
############################################################################
# Comparación de jugadores que juegan para 2 EQUIPOS en la MISMA temporada #
############################################################################
......@@ -280,7 +343,7 @@ variables <- c("xa.", "passes_completed.", "passes.", "passes_pct.", "passes_tot
"xg.", "npxg.", "shots_total.", "shots_on_target.", "shots_on_target_pct.", "goals_per_shot.", "pens_made.", "pens_att.",
"dribbles.", "dribbles_completed.", "dribbles_completed_pct.", "goals_assists_per90.",
"sca_passes_dead.", "gca_passes_dead.", "sca_passes_live.", "gca_passes_live.", "sca_dribbles.", "gca_dribbles.", "sca_fouled.", "gca_fouled.",
"passes_intercepted.", "ball_recoveries.", "pressure_regains.", "fouls.", "tackles_won.", "tackles.", "aerials_won.", "aerials_lost.", "aerials_won_pct.",
"passes_intercepted.", "ball_recoveries.", "pressure_regains.", "fouls.", "tackles_won.", "tackles.", "aerials_won.", "aerials_lost.", "aerials_contested.", "aerials_won_pct.",
"goals_against_per90_gk.", "pens_saved.", "pens_allowed.", "shots_on_target_against.", "saves.", "save_pct.")
globalColNames <- c()
for (variable in variables) {
......@@ -293,7 +356,6 @@ trainStudyAux <- train %>% select(all_of(c("player", variablesWithSeason)))
trainStudy <- NULL
for (variable in variables) {
cat(variable)
aux <- trainStudyAux %>% select(contains(variable))
aux <- data.frame(lapply(aux, as.numeric))
if (dim(aux)[2] == 3) {
......@@ -301,5 +363,9 @@ for (variable in variables) {
}
trainStudy <- cbind(trainStudy, aux2)
}
trainStudy <- cbind(train %>% select(all_of(c("player"))), trainStudy)
trainStudy <- as_tibble(trainStudy)
names(trainStudy) <- globalColNames
names(trainStudy) <- c("player", globalColNames)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment