Skip to content
Snippets Groups Projects
Commit f53e5fb3 authored by Mario Garrido Tapias's avatar Mario Garrido Tapias
Browse files

Updates on univariantAnalysis function to plot one or two plots

parent d91a3349
No related branches found
No related tags found
No related merge requests found
......@@ -4,20 +4,25 @@ setwd("/home/mariogt/TFGs/Estadistica/")
# FUNCIONES #
#####################################
distBarFor3Season <- function(data, variable) {
age <- c()
v <- NULL
temp <- c()
for (i in 1:3) {
season <- switch(i, ".1718", ".1819", ".1920")
var <- unname(select(data, contains(paste(variable, season, sep = ""))))
n <- dim(var)[1]
colnames(var) <- "Age"
age <- rbind(age, var)
aux <- data %>% select(all_of(paste(variable, season, sep = "")))
aux <- unname(aux)
cat(names(aux))
n <- dim(aux)[1]
temp <- c(temp, rep(season, times = n))
v <- rbind(data.frame(v), data.frame(aux))
}
df <- data.frame(age, rep(c("17-18", "18-19", "19-20"), times = rep(n, times=3)))
colnames(df) <- c("edad", "temporada")
ggplot(df, aes(x = edad, fill = temporada)) + geom_density(alpha = 0.5)
#df <- data.frame(age, rep(c("17-18", "18-19", "19-20"), times = rep(n, times=3)))
df <- data.frame(temporada = temp)
df$Variable <- unlist(v)
ggplot(df, aes(x = Variable, fill = temporada)) + geom_density(alpha = 0.5)
}
distBarFor3Season(train, "age")
# Histograms for all seasons for the variable entered by parameter.
barpFor3Season <- function(data, variable) {
par(mfrow = c(1, 3))
......@@ -37,51 +42,11 @@ barpFor3Season <- function(data, variable) {
}
}
# Returns the count of a variable from 3 seasons for each position with a bar plot.
plotFor3SeasonsForPos <- function(data, variable) {
par(mfrow = c(1, 3))
dfs <- list()
maxPInPos <- 0
for (i in 1:3) {
season <- switch(i, ".1718", ".1819", ".1920")
var <- select(data, contains(paste(variable, season, sep = "")))
GK <- var %>% filter(str_detect(var[,1], 'GK'))
numGK <- dim(GK)[1]
DF <- var %>% filter(str_detect(var[,1], 'DF'))
numDF <- dim(DF)[1]
MF <- var %>% filter(str_detect(var[,1], 'MF'))
numMF <- dim(MF)[1]
FW <- var %>% filter(str_detect(var[,1], 'FW'))
numFW <- dim(FW)[1]
df <- data.frame(
position = c("Portero", "Defensa", "Mediocentro", "Delantero"),
numberOf = c(numGK, numDF, numMF, numFW)
)
dfs[[i]] <- df
if(maxPInPos < max(df$numberOf)) {
maxPInPos <- max(df$numberOf)
}
}
for (i in 1:3) {
season <- switch(i, ".1718", ".1819", ".1920")
bp <- barplot(height = dfs[[i]]$numberOf, names = dfs[[i]]$position,
col = rgb(0.8,0.1,0.1,0.6),
xlab = "posiciones",
ylab = "cantidad",
main= paste(c("Distribución posiciones ", season, "(TRAIN)"), sep = ""),
ylim = c(0, maxPInPos)
)
cat(dfs[[i]]$numberOf)
tabla <- table(rep(c("GK", "DF", "MF", "FW"), times = dfs[[i]]$numberOf))
}
}
# Returns two plots to analize a variable in a longitudinal data.
univariantAnalysis <- function(data, variable, ids) {
univariantAnalysis <- function(data, variable, ids, nplots) {
info <- data %>% select(contains(paste(variable, ".", sep = "")))
info <- cbind(ids, info)
cat(names(info))
# WIDE to LONG
dl <- gather(info, season, variable, 2:4, factor_key = TRUE)
......@@ -97,12 +62,15 @@ univariantAnalysis <- function(data, variable, ids) {
geom_point(alpha=0.5, color = "blue", size=3) +
geom_line(color = "red", size = 1) +
labs(title = "Promedio por temporada", x = "Temporada", y = titleForYAxis(variable))
if(nplots == 0){ bp }
else if (nplots == 1) { gp }
else {
figure <- ggarrange(bp, gp,
labels = c("A", "B"),
ncol = 2, nrow = 1)
figure
}
}
# Returns the spanish title of the Y-axis according to the variable entered by parameter.
titleForYAxis <- function(variable) {
......@@ -115,20 +83,40 @@ titleForYAxis <- function(variable) {
npx = "Goles esperados no de penalti (npxG)",
xa = "Asistencias esperadas (xA)",
shots_on_target_pct = "Porcentaje de tiros a puerta",
passes_completed = "Pases realizados con éxito",
passes = "Pases totales realizados",
passes_pct = "Porcentaje de pases completados",
passes_total_distance = "Distancia total recorrida por sus pases",
assisted_shots = "Pases que generan un tiro",
passes_switches = "Pases que cambian la orientacion del juego",
pass_targets = "Pases totales hacia el jugador",
passes_received = "Pases recibidos por el jugador",
sca_passes_live = "Acciones de creación de tiro con el balon en juego",
gca_passes_live = "Acciones de creación de gol con el balon en juego",
sca_passes_dead = "Acciones de creación de tiro a balon parado",
gca_passes_dead = "Acciones de creación de gol a balon parado",
sca_dribbles = "Acciones de creación de tiro generadas por un regate",
gca_dribbles = "Acciones de creación de gol generadas por un regate",
sca_fouled = "Acciones de creación de tiro tras falta",
gca_fouled = "Acciones de creación de gol tras falta",
passes_intercepted = "Pases interceptados",
pressure_regains = "Porcentaje de presiones exitosas",
dribbles = "Regates intentados",
dribbles_completed = "Regates conseguidos con éxito",
dribbles_completed_pct = "Porcentaje de regates exitosos",
fouls = "Faltas cometidas",
tackles_won = "Entradas con éxito",
tackles = "Entradas realizadas",
aerials_won = "Balones aéreos ganados",
aerials_lost = "Balones aéreos pérdidos",
aerials_contested = "Balones aéreos disputados",
aerials_won_pct = "Porcenaje de balones aéreos ganados",
pens_att = "Penalties atajados",
pens_made = "Penalties marcados",
pens_att = "Penalties intentados",
shots_on_target_against = "Tiros a puerta en contra",
goals_against_per90_gk = "Goles en contra por 90 min (GK)",
pens_saved = "Penalties atajados",
pens_allowed = "Penalties no parados",
saves = "Paradas totales",
save_pct = "Porcentaje de ocasiones paradas",
"Variable no encontrada")
......@@ -143,6 +131,7 @@ library(reshape)
library(tidyr) # gather() -> WIDE data to LONG data
library(ggplot2) # graphics
library(ggpubr) # multiple ggplots
library(RColorBrewer) # paletas
##########################################
......@@ -150,33 +139,81 @@ library(ggpubr) # multiple ggplots
##########################################
# Get players identifiers
ids <- data.frame(train$X)
ids <- t(ids)
ids <- data.frame(laLigaPlayers$X)
#ids <- t(ids)
########
# Edad #
########
# Distribucion (Gráficos de barras)
distBarFor3Season(train, "age")
# Box plots
univariantAnalysis(laLigaPlayers, "age", ids, 0)
# Busqueda del OUTLIER
edad <- laLigaPlayers %>% select(contains(paste("age.", sep = "")))
laLigaPlayers$player[which.max(edad[,3])]
##########
# Altura #
##########
# Distribucion (Gráficos de barras)
distBarFor3Season(train, "height")
# Distribucion (Densidades)
# distBarFor3Season(train, "height")
# Box plots
univariantAnalysis(laLigaPlayers, "height", ids, 0)
################
# Pierna buena #
################
# Distribución (Grafico de tarta)
foot <- train %>% select("foot.1718")
foot <- laLigaPlayers %>% select(all_of("foot.1819"))
library(lessR)
PieChart(foot.1718, hole = FALSE, values = "%", data = foot,
main = "Distribución de pierna dominante", fill = brewer.pal(3, "Set1"),)
PieChart(foot.1819, hole = FALSE, values = "%", data = foot,
main = "Distribución de pierna dominante", fill = brewer.pal(3, "Set1"))
######
# xA #
######
univariantAnalysis(laLigaPlayers, "xa", ids, 2)
##############################
# Pases realizados con éxito #
##############################
univariantAnalysis(laLigaPlayers, "passes_completed", ids, 2)
####################
# Pases realizados #
####################
univariantAnalysis(laLigaPlayers, "passes", ids, 2)
#######################
# Porcentaje de pases #
#######################
univariantAnalysis(laLigaPlayers, "passes_pct", ids, 2)
##################################
# Distancia total mediante pases #
##################################
univariantAnalysis(laLigaPlayers, "passes_total_distance", ids, 2)
#############################
# Pases que derivan en tiro #
#############################
barpFor3Season(laLigaPlayers, "assisted_shots")
univariantAnalysis(laLigaPlayers, "assisted_shots", ids, 2)
###################################
# Cambios de orientación de juego #
###################################
univariantAnalysis(laLigaPlayers, "passes_switches", ids, 2)
############################################################################
# Cantidad de ocasiones en las que un pase tiene como objetivo ese jugador #
############################################################################
univariantAnalysis(laLigaPlayers, "pass_targets", ids, 2)
##########################################
# Cantidad de pases que recibe con éxito #
##########################################
univariantAnalysis(laLigaPlayers, "passes_received", ids, 2)
###########################################
# Porcentaje de pases recibidos con éxito #
###########################################
univariantAnalysis(laLigaPlayers, "passes_received_pct", ids, 2)
##################################
# Goles y asistencias por 90 min #
##################################.
distBarFor3Season(train, "goals_assists_per90")
univariantAnalysis(train, "goals_assists_per90", ids)
univariantAnalysis(train, "goals_assists_per90", ids, 2)
######
# xG #
######
......@@ -185,31 +222,13 @@ ids <- t(ids)
# npxG #
########
univariantAnalysis(train, "npxg", ids)
######
# xA #
######
univariantAnalysis(train, "xa", ids)
##################################
# Porcentaje de tiros a porteria #
##################################
# univariantAnalysis(train, "xa", ids)
#######################
# Porcentaje de pases #
#######################
univariantAnalysis(train, "passes_pct", ids)
##################################
# Distancia total mediante pases #
##################################
univariantAnalysis(train, "passes_total_distance", ids)
#############################
# Pases que derivan en tiro #
#############################
barpFor3Season(train, "assisted_shots")
univariantAnalysis(train, "assisted_shots", ids)
###################################
# Cambios de orientación de juego #
###################################
univariantAnalysis(train, "passes_switches", ids)
###################
# SCA passes dead #
###################
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment