Skip to content
Snippets Groups Projects
Select Git revision
  • master
1 result

app.module.ts

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    MultivariateAnalysis.R 5.20 KiB
    #############
    # FUNCIONES #
    #############
    
    #############
    # LIBRERIAS #
    #############
    library(stats) # Calculos
    library(factoextra) # PCAs representations
    suppressPackageStartupMessages(library(plotly)) # 3D PCA representations
    
    #######################################
    # ANALISIS DE COMPONENTES PRINCIPALES #
    #######################################
    pca_laLiga <- prcomp(laLigaPlayersStudy %>% select(-player), scale = TRUE)
    names(pca_laLiga)
    
    dim(pca_laLiga$rotation)
    # Hay un total de 37 COMPONENTES PRINCIPALES
    
    # SCORES
    head(pca_laLiga$x)[1:5]
    
    # VARIANZA EXPLICADA POR CADA PC
    (pca_laLiga$sdev[1]^2/sum(pca_laLiga$sdev^2) + pca_laLiga$sdev[2]^2/sum(pca_laLiga$sdev^2) )*100
    # + pca_laLiga$sdev[3]^2
    
    # Players' names
    repeatedPlayers <- which(duplicated(train$player) == TRUE)
    for (player in repeatedPlayers){
      train$player[player] <- paste(train$player[player], 2, sep = "")
    }
    rownames(pca_laLiga$x) <- train$player
    
    # REPRESENTATIONS
    # 2D
    fviz_pca_ind(pca_laLiga, geom.ind = "point", 
                 col.ind = "aquamarine3", 
                 axes = c(1, 2), 
                 pointsize = 1.5, title = "") 
    # 3D 
    
    data <- data.frame(pca_laLiga$x)
    fig <- plot_ly(data, x = ~PC1, y = ~PC2, z = ~PC3, colors = c('aquamarine3') ) %>%
      add_markers(size = 12)
    fig
    
    # CREACION del vector POSICIONES 
    # colores <- function(vec){
    #   # la función rainbow() devuelve un vector que contiene el número de colores distintos
    #   col <- rainbow(length(unique(vec)))
    #   return(col[as.numeric(as.factor(vec))])
    # }
    
    # Observaciones sobre PC1 y PC2
    # plot(pca_laLiga$x[,1:2], col = colores(c("GK", "DF", "MF", "FW")), 
    #      pch = 19, 
    #      xlab = "Z1", 
    #      ylab = "Z2")
    # legend("topright", legend=c("GK", "DF", "MF", "FW"),
    # col = colores(c("GK", "DF", "MF", "FW")),pch=19, cex=0.8)
    # 
    # plot(pca_laLiga$x[,1:2], col = colores(unique(laLigaPlayers$position.1718)), 
    #      pch = 19, 
    #      xlab = "Z1", 
    #      ylab = "Z2")
    # legend("topright", legend=unique(laLigaPlayers$position.1718),
    #        col = colores(unique(laLigaPlayers$position.1718)),pch=19, cex=0.8)
    # table(laLigaPlayers$position.1718)
    
    
    # BIPLOT (toSave)
    biplot(pca_laLiga, scale = 0, cex = 0.5, col = c("khaki4", "darkorchid3"))
    fviz_pca_biplot(pca_laLiga)
    fviz_pca_var(pca_laLiga, 
                 col.var = "contrib", 
                 gradient.cols = c("#FC4E07", "#E7B800", "#006600"),
                 labelsize = 3, 
                 repel = TRUE,
                 title = "")
    
    # SELECCIÓN DEL NUMERO DE PC (650x550)
    fviz_screeplot(pca_laLiga, addlabels = TRUE, ylim = c(0, 40), 
                   xlab = "Dimensiones", ylab = "Porcentaje de variabilidad explicada", title = "")
    
    ############################# ####################
    # CONJUNTO de ENTRENAMIENTO # # CONJUNTO de TEST #
    ############################# ####################
    set.seed(5682)
    library(rsample)
    
    jugadoresRepes <- laLigaPlayersStudy[rowsPIn2C[,1], ]
    jugadoresRepes <- rbind(jugadoresRepes, laLigaPlayersStudy[rowsPIn2C[, 2], ])
    rowsQuit <- c(rowsPIn2C[,1], rowsPIn2C[,2])
    jugadoresRepes$position <- laLigaPlayers[rowsQuit,]$position.1920
    
    laLigaPlayersSplit <- laLigaPlayersStudy[-rowsQuit, ]
    laLigaPlayersSplit$position <- laLigaPlayers[-rowsQuit,]$position.1920
    
    laLiga_split <- initial_split(laLigaPlayersSplit, prop = 8/10, strata = "position")
    train <- training(laLiga_split)
    test <- testing(laLiga_split)
    train <- rbind(train, jugadoresRepes)
    if (quitarPorteros == 1) {
      # Elimination of goalkeepers and Lionel
      train <- train[-which(train$position == 'GK'), ]
      test <- test[-which(test$position == 'GK'), ]
      train <- train[-which(train$player == 'Lionel Messi'), ]
    }
    # DISTRIBUCION conjunto de ENTRENAMIENTO
    var <- data.frame(train$position)
    GK <- var %>% filter(str_detect(var[,1], 'GK'))
    numGK <- dim(GK)[1]
    DF <- var %>% filter(str_detect(var[,1], 'DF'))
    numDF <- dim(DF)[1]
    MF <- var %>% filter(str_detect(var[,1], 'MF'))
    numMF <- dim(MF)[1]
    FW <- var %>% filter(str_detect(var[,1], 'FW'))
    numFW <- dim(FW)[1]
    df <- data.frame(
      position = c("Porteros", "Defensas", "Mediocentros", "Delanteros"),
      numberOf = c(numGK, numDF, numMF, numFW)
    )
    df$numberOf/sum(df$numberOf)*100
    bp <- barplot(height = df$numberOf, names = df$position, 
                  #col = rgb(0.8,0.1,0.1,0.6),
                  col = "goldenrod3",
                  border = NA,
                  xlab = "Posiciones", 
                  ylab = "Cantidad"
    )
    # 550x450
    #Cuidado en el otro caso es 39
    if (quitarPorteros == 1) {
      train <- train[,-35]
    } else {
      train <- train[,-39]
    }
    
    
    
    # DISTRIBUCION del conjunto de VALIDACION
    var <- data.frame(test$position)
    GK <- var %>% filter(str_detect(var[,1], 'GK'))
    numGK <- dim(GK)[1]
    DF <- var %>% filter(str_detect(var[,1], 'DF'))
    numDF <- dim(DF)[1]
    MF <- var %>% filter(str_detect(var[,1], 'MF'))
    numMF <- dim(MF)[1]
    FW <- var %>% filter(str_detect(var[,1], 'FW'))
    numFW <- dim(FW)[1]
    df <- data.frame(
      position = c("Porteros", "Defensas", "Mediocentros", "Delanteros"),
      numberOf = c(numGK, numDF, numMF, numFW)
    )
    df$numberOf/sum(df$numberOf)*100
    bp <- barplot(height = df$numberOf, names = df$position, 
                  #col = rgb(0.8,0.1,0.1,0.6),
                  col = "darkseagreen3",
                  border = NA,
                  xlab = "Posiciones", 
                  ylab = "Cantidad"
    )
    
    #Cuidado en el otro caso es 39
    if (quitarPorteros == 1) {
      test <- test[,-35]
    } else {
      test <- test[,-39]
    }