Select Git revision
triplesAllen.csv
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
MultivariateAnalysis.R 5.20 KiB
#############
# FUNCIONES #
#############
#############
# LIBRERIAS #
#############
library(stats) # Calculos
library(factoextra) # PCAs representations
suppressPackageStartupMessages(library(plotly)) # 3D PCA representations
#######################################
# ANALISIS DE COMPONENTES PRINCIPALES #
#######################################
pca_laLiga <- prcomp(laLigaPlayersStudy %>% select(-player), scale = TRUE)
names(pca_laLiga)
dim(pca_laLiga$rotation)
# Hay un total de 37 COMPONENTES PRINCIPALES
# SCORES
head(pca_laLiga$x)[1:5]
# VARIANZA EXPLICADA POR CADA PC
(pca_laLiga$sdev[1]^2/sum(pca_laLiga$sdev^2) + pca_laLiga$sdev[2]^2/sum(pca_laLiga$sdev^2) )*100
# + pca_laLiga$sdev[3]^2
# Players' names
repeatedPlayers <- which(duplicated(train$player) == TRUE)
for (player in repeatedPlayers){
train$player[player] <- paste(train$player[player], 2, sep = "")
}
rownames(pca_laLiga$x) <- train$player
# REPRESENTATIONS
# 2D
fviz_pca_ind(pca_laLiga, geom.ind = "point",
col.ind = "aquamarine3",
axes = c(1, 2),
pointsize = 1.5, title = "")
# 3D
data <- data.frame(pca_laLiga$x)
fig <- plot_ly(data, x = ~PC1, y = ~PC2, z = ~PC3, colors = c('aquamarine3') ) %>%
add_markers(size = 12)
fig
# CREACION del vector POSICIONES
# colores <- function(vec){
# # la función rainbow() devuelve un vector que contiene el número de colores distintos
# col <- rainbow(length(unique(vec)))
# return(col[as.numeric(as.factor(vec))])
# }
# Observaciones sobre PC1 y PC2
# plot(pca_laLiga$x[,1:2], col = colores(c("GK", "DF", "MF", "FW")),
# pch = 19,
# xlab = "Z1",
# ylab = "Z2")
# legend("topright", legend=c("GK", "DF", "MF", "FW"),
# col = colores(c("GK", "DF", "MF", "FW")),pch=19, cex=0.8)
#
# plot(pca_laLiga$x[,1:2], col = colores(unique(laLigaPlayers$position.1718)),
# pch = 19,
# xlab = "Z1",
# ylab = "Z2")
# legend("topright", legend=unique(laLigaPlayers$position.1718),
# col = colores(unique(laLigaPlayers$position.1718)),pch=19, cex=0.8)
# table(laLigaPlayers$position.1718)
# BIPLOT (toSave)
biplot(pca_laLiga, scale = 0, cex = 0.5, col = c("khaki4", "darkorchid3"))
fviz_pca_biplot(pca_laLiga)
fviz_pca_var(pca_laLiga,
col.var = "contrib",
gradient.cols = c("#FC4E07", "#E7B800", "#006600"),
labelsize = 3,
repel = TRUE,
title = "")
# SELECCIÓN DEL NUMERO DE PC (650x550)
fviz_screeplot(pca_laLiga, addlabels = TRUE, ylim = c(0, 40),
xlab = "Dimensiones", ylab = "Porcentaje de variabilidad explicada", title = "")
############################# ####################
# CONJUNTO de ENTRENAMIENTO # # CONJUNTO de TEST #
############################# ####################
set.seed(5682)
library(rsample)
jugadoresRepes <- laLigaPlayersStudy[rowsPIn2C[,1], ]
jugadoresRepes <- rbind(jugadoresRepes, laLigaPlayersStudy[rowsPIn2C[, 2], ])
rowsQuit <- c(rowsPIn2C[,1], rowsPIn2C[,2])
jugadoresRepes$position <- laLigaPlayers[rowsQuit,]$position.1920
laLigaPlayersSplit <- laLigaPlayersStudy[-rowsQuit, ]
laLigaPlayersSplit$position <- laLigaPlayers[-rowsQuit,]$position.1920
laLiga_split <- initial_split(laLigaPlayersSplit, prop = 8/10, strata = "position")
train <- training(laLiga_split)
test <- testing(laLiga_split)
train <- rbind(train, jugadoresRepes)
if (quitarPorteros == 1) {
# Elimination of goalkeepers and Lionel
train <- train[-which(train$position == 'GK'), ]
test <- test[-which(test$position == 'GK'), ]
train <- train[-which(train$player == 'Lionel Messi'), ]
}
# DISTRIBUCION conjunto de ENTRENAMIENTO
var <- data.frame(train$position)
GK <- var %>% filter(str_detect(var[,1], 'GK'))
numGK <- dim(GK)[1]
DF <- var %>% filter(str_detect(var[,1], 'DF'))
numDF <- dim(DF)[1]
MF <- var %>% filter(str_detect(var[,1], 'MF'))
numMF <- dim(MF)[1]
FW <- var %>% filter(str_detect(var[,1], 'FW'))
numFW <- dim(FW)[1]
df <- data.frame(
position = c("Porteros", "Defensas", "Mediocentros", "Delanteros"),
numberOf = c(numGK, numDF, numMF, numFW)
)
df$numberOf/sum(df$numberOf)*100
bp <- barplot(height = df$numberOf, names = df$position,
#col = rgb(0.8,0.1,0.1,0.6),
col = "goldenrod3",
border = NA,
xlab = "Posiciones",
ylab = "Cantidad"
)
# 550x450
#Cuidado en el otro caso es 39
if (quitarPorteros == 1) {
train <- train[,-35]
} else {
train <- train[,-39]
}
# DISTRIBUCION del conjunto de VALIDACION
var <- data.frame(test$position)
GK <- var %>% filter(str_detect(var[,1], 'GK'))
numGK <- dim(GK)[1]
DF <- var %>% filter(str_detect(var[,1], 'DF'))
numDF <- dim(DF)[1]
MF <- var %>% filter(str_detect(var[,1], 'MF'))
numMF <- dim(MF)[1]
FW <- var %>% filter(str_detect(var[,1], 'FW'))
numFW <- dim(FW)[1]
df <- data.frame(
position = c("Porteros", "Defensas", "Mediocentros", "Delanteros"),
numberOf = c(numGK, numDF, numMF, numFW)
)
df$numberOf/sum(df$numberOf)*100
bp <- barplot(height = df$numberOf, names = df$position,
#col = rgb(0.8,0.1,0.1,0.6),
col = "darkseagreen3",
border = NA,
xlab = "Posiciones",
ylab = "Cantidad"
)
#Cuidado en el otro caso es 39
if (quitarPorteros == 1) {
test <- test[,-35]
} else {
test <- test[,-39]
}