Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
GMTool
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
margarr
GMTool
Commits
f53e5fb3
Commit
f53e5fb3
authored
Jun 12, 2022
by
Mario Garrido Tapias
Browse files
Options
Downloads
Patches
Plain Diff
Updates on univariantAnalysis function to plot one or two plots
parent
d91a3349
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
UnivariateAnalysis.R
+105
-86
105 additions, 86 deletions
UnivariateAnalysis.R
with
105 additions
and
86 deletions
UnivariateAnalysis.R
+
105
−
86
View file @
f53e5fb3
...
...
@@ -4,20 +4,25 @@ setwd("/home/mariogt/TFGs/Estadistica/")
# FUNCIONES #
#####################################
distBarFor3Season
<-
function
(
data
,
variable
)
{
age
<-
c
()
v
<-
NULL
temp
<-
c
()
for
(
i
in
1
:
3
)
{
season
<-
switch
(
i
,
".1718"
,
".1819"
,
".1920"
)
var
<-
unname
(
select
(
data
,
contains
(
paste
(
variable
,
season
,
sep
=
""
))))
n
<-
dim
(
var
)[
1
]
colnames
(
var
)
<-
"Age"
age
<-
rbind
(
age
,
var
)
aux
<-
data
%>%
select
(
all_of
(
paste
(
variable
,
season
,
sep
=
""
)))
aux
<-
unname
(
aux
)
cat
(
names
(
aux
))
n
<-
dim
(
aux
)[
1
]
temp
<-
c
(
temp
,
rep
(
season
,
times
=
n
))
v
<-
rbind
(
data.frame
(
v
),
data.frame
(
aux
))
}
df
<-
data.frame
(
age
,
rep
(
c
(
"17-18"
,
"18-19"
,
"19-20"
),
times
=
rep
(
n
,
times
=
3
)))
colnames
(
df
)
<-
c
(
"edad"
,
"temporada"
)
ggplot
(
df
,
aes
(
x
=
edad
,
fill
=
temporada
))
+
geom_density
(
alpha
=
0.5
)
#df <- data.frame(age, rep(c("17-18", "18-19", "19-20"), times = rep(n, times=3)))
df
<-
data.frame
(
temporada
=
temp
)
df
$
Variable
<-
unlist
(
v
)
ggplot
(
df
,
aes
(
x
=
Variable
,
fill
=
temporada
))
+
geom_density
(
alpha
=
0.5
)
}
distBarFor3Season
(
train
,
"age"
)
# Histograms for all seasons for the variable entered by parameter.
barpFor3Season
<-
function
(
data
,
variable
)
{
par
(
mfrow
=
c
(
1
,
3
))
...
...
@@ -37,51 +42,11 @@ barpFor3Season <- function(data, variable) {
}
}
# Returns the count of a variable from 3 seasons for each position with a bar plot.
plotFor3SeasonsForPos
<-
function
(
data
,
variable
)
{
par
(
mfrow
=
c
(
1
,
3
))
dfs
<-
list
()
maxPInPos
<-
0
for
(
i
in
1
:
3
)
{
season
<-
switch
(
i
,
".1718"
,
".1819"
,
".1920"
)
var
<-
select
(
data
,
contains
(
paste
(
variable
,
season
,
sep
=
""
)))
GK
<-
var
%>%
filter
(
str_detect
(
var
[,
1
],
'GK'
))
numGK
<-
dim
(
GK
)[
1
]
DF
<-
var
%>%
filter
(
str_detect
(
var
[,
1
],
'DF'
))
numDF
<-
dim
(
DF
)[
1
]
MF
<-
var
%>%
filter
(
str_detect
(
var
[,
1
],
'MF'
))
numMF
<-
dim
(
MF
)[
1
]
FW
<-
var
%>%
filter
(
str_detect
(
var
[,
1
],
'FW'
))
numFW
<-
dim
(
FW
)[
1
]
df
<-
data.frame
(
position
=
c
(
"Portero"
,
"Defensa"
,
"Mediocentro"
,
"Delantero"
),
numberOf
=
c
(
numGK
,
numDF
,
numMF
,
numFW
)
)
dfs
[[
i
]]
<-
df
if
(
maxPInPos
<
max
(
df
$
numberOf
))
{
maxPInPos
<-
max
(
df
$
numberOf
)
}
}
for
(
i
in
1
:
3
)
{
season
<-
switch
(
i
,
".1718"
,
".1819"
,
".1920"
)
bp
<-
barplot
(
height
=
dfs
[[
i
]]
$
numberOf
,
names
=
dfs
[[
i
]]
$
position
,
col
=
rgb
(
0.8
,
0.1
,
0.1
,
0.6
),
xlab
=
"posiciones"
,
ylab
=
"cantidad"
,
main
=
paste
(
c
(
"Distribución posiciones "
,
season
,
"(TRAIN)"
),
sep
=
""
),
ylim
=
c
(
0
,
maxPInPos
)
)
cat
(
dfs
[[
i
]]
$
numberOf
)
tabla
<-
table
(
rep
(
c
(
"GK"
,
"DF"
,
"MF"
,
"FW"
),
times
=
dfs
[[
i
]]
$
numberOf
))
}
}
# Returns two plots to analize a variable in a longitudinal data.
univariantAnalysis
<-
function
(
data
,
variable
,
ids
)
{
univariantAnalysis
<-
function
(
data
,
variable
,
ids
,
nplots
)
{
info
<-
data
%>%
select
(
contains
(
paste
(
variable
,
"."
,
sep
=
""
)))
info
<-
cbind
(
ids
,
info
)
cat
(
names
(
info
))
# WIDE to LONG
dl
<-
gather
(
info
,
season
,
variable
,
2
:
4
,
factor_key
=
TRUE
)
...
...
@@ -97,12 +62,15 @@ univariantAnalysis <- function(data, variable, ids) {
geom_point
(
alpha
=
0.5
,
color
=
"blue"
,
size
=
3
)
+
geom_line
(
color
=
"red"
,
size
=
1
)
+
labs
(
title
=
"Promedio por temporada"
,
x
=
"Temporada"
,
y
=
titleForYAxis
(
variable
))
if
(
nplots
==
0
){
bp
}
else
if
(
nplots
==
1
)
{
gp
}
else
{
figure
<-
ggarrange
(
bp
,
gp
,
labels
=
c
(
"A"
,
"B"
),
ncol
=
2
,
nrow
=
1
)
figure
}
}
# Returns the spanish title of the Y-axis according to the variable entered by parameter.
titleForYAxis
<-
function
(
variable
)
{
...
...
@@ -115,20 +83,40 @@ titleForYAxis <- function(variable) {
npx
=
"Goles esperados no de penalti (npxG)"
,
xa
=
"Asistencias esperadas (xA)"
,
shots_on_target_pct
=
"Porcentaje de tiros a puerta"
,
passes_completed
=
"Pases realizados con éxito"
,
passes
=
"Pases totales realizados"
,
passes_pct
=
"Porcentaje de pases completados"
,
passes_total_distance
=
"Distancia total recorrida por sus pases"
,
assisted_shots
=
"Pases que generan un tiro"
,
passes_switches
=
"Pases que cambian la orientacion del juego"
,
pass_targets
=
"Pases totales hacia el jugador"
,
passes_received
=
"Pases recibidos por el jugador"
,
sca_passes_live
=
"Acciones de creación de tiro con el balon en juego"
,
gca_passes_live
=
"Acciones de creación de gol con el balon en juego"
,
sca_passes_dead
=
"Acciones de creación de tiro a balon parado"
,
gca_passes_dead
=
"Acciones de creación de gol a balon parado"
,
sca_dribbles
=
"Acciones de creación de tiro generadas por un regate"
,
gca_dribbles
=
"Acciones de creación de gol generadas por un regate"
,
sca_fouled
=
"Acciones de creación de tiro tras falta"
,
gca_fouled
=
"Acciones de creación de gol tras falta"
,
passes_intercepted
=
"Pases interceptados"
,
pressure_regains
=
"Porcentaje de presiones exitosas"
,
dribbles
=
"Regates intentados"
,
dribbles_completed
=
"Regates conseguidos con éxito"
,
dribbles_completed_pct
=
"Porcentaje de regates exitosos"
,
fouls
=
"Faltas cometidas"
,
tackles_won
=
"Entradas con éxito"
,
tackles
=
"Entradas realizadas"
,
aerials_won
=
"Balones aéreos ganados"
,
aerials_lost
=
"Balones aéreos pérdidos"
,
aerials_contested
=
"Balones aéreos disputados"
,
aerials_won_pct
=
"Porcenaje de balones aéreos ganados"
,
pens_att
=
"Penalties atajados"
,
pens_made
=
"Penalties marcados"
,
pens_att
=
"Penalties intentados"
,
shots_on_target_against
=
"Tiros a puerta en contra"
,
goals_against_per90_gk
=
"Goles en contra por 90 min (GK)"
,
pens_saved
=
"Penalties atajados"
,
pens_allowed
=
"Penalties no parados"
,
saves
=
"Paradas totales"
,
save_pct
=
"Porcentaje de ocasiones paradas"
,
"Variable no encontrada"
)
...
...
@@ -143,6 +131,7 @@ library(reshape)
library
(
tidyr
)
# gather() -> WIDE data to LONG data
library
(
ggplot2
)
# graphics
library
(
ggpubr
)
# multiple ggplots
library
(
RColorBrewer
)
# paletas
##########################################
...
...
@@ -150,33 +139,81 @@ library(ggpubr) # multiple ggplots
##########################################
# Get players identifiers
ids
<-
data.frame
(
train
$
X
)
ids
<-
t
(
ids
)
ids
<-
data.frame
(
laLigaPlayers
$
X
)
#
ids <- t(ids)
########
# Edad #
########
# Distribucion (Gráficos de barras)
distBarFor3Season
(
train
,
"age"
)
# Box plots
univariantAnalysis
(
laLigaPlayers
,
"age"
,
ids
,
0
)
# Busqueda del OUTLIER
edad
<-
laLigaPlayers
%>%
select
(
contains
(
paste
(
"age."
,
sep
=
""
)))
laLigaPlayers
$
player
[
which.max
(
edad
[,
3
])]
##########
# Altura #
##########
# Distribucion (
Gráficos de barra
s)
distBarFor3Season
(
train
,
"height"
)
# Distribucion (
Densidade
s)
#
distBarFor3Season(train, "height")
# Box plots
univariantAnalysis
(
laLigaPlayers
,
"height"
,
ids
,
0
)
################
# Pierna buena #
################
# Distribución (Grafico de tarta)
foot
<-
train
%>%
select
(
"foot.1
718
"
)
foot
<-
laLigaPlayers
%>%
select
(
all_of
(
"foot.1
819
"
)
)
library
(
lessR
)
PieChart
(
foot.1718
,
hole
=
FALSE
,
values
=
"%"
,
data
=
foot
,
main
=
"Distribución de pierna dominante"
,
fill
=
brewer.pal
(
3
,
"Set1"
),)
PieChart
(
foot.1819
,
hole
=
FALSE
,
values
=
"%"
,
data
=
foot
,
main
=
"Distribución de pierna dominante"
,
fill
=
brewer.pal
(
3
,
"Set1"
))
######
# xA #
######
univariantAnalysis
(
laLigaPlayers
,
"xa"
,
ids
,
2
)
##############################
# Pases realizados con éxito #
##############################
univariantAnalysis
(
laLigaPlayers
,
"passes_completed"
,
ids
,
2
)
####################
# Pases realizados #
####################
univariantAnalysis
(
laLigaPlayers
,
"passes"
,
ids
,
2
)
#######################
# Porcentaje de pases #
#######################
univariantAnalysis
(
laLigaPlayers
,
"passes_pct"
,
ids
,
2
)
##################################
# Distancia total mediante pases #
##################################
univariantAnalysis
(
laLigaPlayers
,
"passes_total_distance"
,
ids
,
2
)
#############################
# Pases que derivan en tiro #
#############################
barpFor3Season
(
laLigaPlayers
,
"assisted_shots"
)
univariantAnalysis
(
laLigaPlayers
,
"assisted_shots"
,
ids
,
2
)
###################################
# Cambios de orientación de juego #
###################################
univariantAnalysis
(
laLigaPlayers
,
"passes_switches"
,
ids
,
2
)
############################################################################
# Cantidad de ocasiones en las que un pase tiene como objetivo ese jugador #
############################################################################
univariantAnalysis
(
laLigaPlayers
,
"pass_targets"
,
ids
,
2
)
##########################################
# Cantidad de pases que recibe con éxito #
##########################################
univariantAnalysis
(
laLigaPlayers
,
"passes_received"
,
ids
,
2
)
###########################################
# Porcentaje de pases recibidos con éxito #
###########################################
univariantAnalysis
(
laLigaPlayers
,
"passes_received_pct"
,
ids
,
2
)
##################################
# Goles y asistencias por 90 min #
##################################.
distBarFor3Season
(
train
,
"goals_assists_per90"
)
univariantAnalysis
(
train
,
"goals_assists_per90"
,
ids
)
univariantAnalysis
(
train
,
"goals_assists_per90"
,
ids
,
2
)
######
# xG #
######
...
...
@@ -185,31 +222,13 @@ ids <- t(ids)
# npxG #
########
univariantAnalysis
(
train
,
"npxg"
,
ids
)
######
# xA #
######
univariantAnalysis
(
train
,
"xa"
,
ids
)
##################################
# Porcentaje de tiros a porteria #
##################################
# univariantAnalysis(train, "xa", ids)
#######################
# Porcentaje de pases #
#######################
univariantAnalysis
(
train
,
"passes_pct"
,
ids
)
##################################
# Distancia total mediante pases #
##################################
univariantAnalysis
(
train
,
"passes_total_distance"
,
ids
)
#############################
# Pases que derivan en tiro #
#############################
barpFor3Season
(
train
,
"assisted_shots"
)
univariantAnalysis
(
train
,
"assisted_shots"
,
ids
)
###################################
# Cambios de orientación de juego #
###################################
univariantAnalysis
(
train
,
"passes_switches"
,
ids
)
###################
# SCA passes dead #
###################
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment