Skip to content
Snippets Groups Projects
Commit 42924ea0 authored by Mario Garrido Tapias's avatar Mario Garrido Tapias
Browse files

Train-Test split done and PCA representations

parent 9e8481b4
No related branches found
No related tags found
No related merge requests found
......@@ -7,10 +7,36 @@
#############
library(stats) # Calculos
library(factoextra) # Representaciones
############################# ####################
# CONJUNTO de ENTRENAMIENTO # # CONJUNTO de TEST #
############################# ####################
set.seed(5682)
library(rsample)
jugadoresRepes <- laLigaPlayersStudy[rowsPIn2C[,1], ]
jugadoresRepes <- rbind(jugadoresRepes, laLigaPlayersStudy[rowsPIn2C[, 2], ])
rowsQuit <- c(rowsPIn2C[,1], rowsPIn2C[,2])
laLigaPlayersSplit <- laLigaPlayersStudy[-rowsQuit, ]
laLigaPlayersSplit$position.1920 <- laLigaPlayers[-rowsQuit,]$position.1920
laLiga_split <- initial_split(laLigaPlayersSplit, prop = 8/10, strata = "position.1920")
train <- training(laLiga_split)
train <- train[,-39]
train <- rbind(train, jugadoresRepes)
test <- testing(laLiga_split)
test <- test[,-39]
plotFor3SeasonsForPos(laLigaPlayers, "position")
plotFor3SeasonsForPos(train, "position")
plotFor3SeasonsForPos(test, "position")
#######################################
# ANALISIS DE COMPONENTES PRINCIPALES #
#######################################
pca_laLiga <- prcomp(laLigaPlayersStudy[,-1], scale = TRUE)
pca_laLiga <- prcomp(train %>% select(-player), scale = TRUE)
names(pca_laLiga)
dim(pca_laLiga$rotation)
......@@ -19,9 +45,13 @@ dim(pca_laLiga$rotation)
# SCORES
head(pca_laLiga$x)[1:5]
# Players' names
rownames(pca_laLiga$x) <- train$player
# VARIANZA EXPLICADA POR CADA PC
pca_laLiga$sdev[1]^2 + pca_laLiga$sdev[2]^2 + pca_laLiga$sdev[3]^2
# Con 48 PC -> Suma de los 2 primeros = 27.57, Suma de los 3 primeros = 33.925
(pca_laLiga$sdev[1]^2/sum(pca_laLiga$sdev^2) + pca_laLiga$sdev[2]^2/sum(pca_laLiga$sdev^2) )*100
# + pca_laLiga$sdev[3]^2
# Esta mal los comentarios
# Con 48 PC -> Suma de los 2 primeros = 57.44, Suma de los 3 primeros = 33.925
# Con 35 PC -> Suma de los 2 primeros = 19.96, Suma de los 3 primeros = 24.15
# REPRESENTACIONES
......@@ -29,3 +59,36 @@ fviz_pca_ind(pca_laLiga, geom.ind = "point",
col.ind = "#FC4E07",
axes = c(1, 2),
pointsize = 1.5)
# CREACION del vector POSICIONES
colores <- function(vec){
# la función rainbow() devuelve un vector que contiene el número de colores distintos
col <- rainbow(length(unique(vec)))
return(col[as.numeric(as.factor(vec))])
}
# Observaciones sobre PC1 y PC2
plot(pca_laLiga$x[,1:2], col = colores(c("GK", "DF", "MF", "FW")),
pch = 19,
xlab = "Z1",
ylab = "Z2")
legend("topright", legend=c("GK", "DF", "MF", "FW"),
col = colores(c("GK", "DF", "MF", "FW")),pch=19, cex=0.8)
plot(pca_laLiga$x[,1:2], col = colores(unique(laLigaPlayers$position.1718)),
pch = 19,
xlab = "Z1",
ylab = "Z2")
legend("topright", legend=unique(laLigaPlayers$position.1718),
col = colores(unique(laLigaPlayers$position.1718)),pch=19, cex=0.8)
table(laLigaPlayers$position.1718)
fviz_pca_var(pca_laLiga, col.var = "cos2",
geom.var = "arrow",
labelsize = 2,
repel = FALSE)
# BIPLOT
biplot(pca_laLiga, scale = 0, cex = 0.5, col = c("dodgerblue3", "deeppink3"))
# SELECCIÓN DEL NUMERO DE PC
fviz_screeplot(pca_laLiga, addlabels = TRUE, ylim = c(0, 40))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment