From 84e9313c13726b006f0c80607f8ca98617f0ce05 Mon Sep 17 00:00:00 2001 From: migtoqu <miguel.toquero@alumnos.uva.es> Date: Thu, 15 Jul 2021 08:13:44 +0000 Subject: [PATCH] Delete Gestion_de_directorios.ipynb --- .../Codigo/Gestion_de_directorios.ipynb | 455 ------------------ 1 file changed, 455 deletions(-) delete mode 100644 Aplicacion/Codigo/Gestion_de_directorios.ipynb diff --git a/Aplicacion/Codigo/Gestion_de_directorios.ipynb b/Aplicacion/Codigo/Gestion_de_directorios.ipynb deleted file mode 100644 index 530722d..0000000 --- a/Aplicacion/Codigo/Gestion_de_directorios.ipynb +++ /dev/null @@ -1,455 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Importamos modulos" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "import os, shutil\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.model_selection import train_test_split" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Lectura de datos (metadatos)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>FILE NAME</th>\n", - " <th>CLASE</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>COVID-19(1)</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>COVID-19(2)</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>COVID-19(3)</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>COVID-19(4)</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>COVID-19(5)</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " FILE NAME CLASE\n", - "0 COVID-19(1) COVID-19\n", - "1 COVID-19(2) COVID-19\n", - "2 COVID-19(3) COVID-19\n", - "3 COVID-19(4) COVID-19\n", - "4 COVID-19(5) COVID-19" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "metadata = pd.read_csv('../datos/Metadata/metadatos.csv')\n", - "metadata.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Separamos en entrenamiento y prueba" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "train, test = train_test_split(metadata, test_size=1/3, stratify=metadata.CLASE)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Observamos las mismas proporciones en todos los subconjuntos" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Creamos en el directorio datos los directorios train y test donde almacenaremos las imagenes correspondientes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Intentamos obtener los nombres reales de las imagenes" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "mypath = \"../datos/COVID-19\"\n", - "covid_files = [f for f in os.listdir(mypath)]\n", - "#covid_files" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "mypath = \"../datos/NORMAL\"\n", - "normal_files = [f for f in os.listdir(mypath)]\n", - "#normal_files" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "mypath = \"../datos/Viral Pneumonia\"\n", - "pneumonia_files = [f for f in os.listdir(mypath)]\n", - "#pneumonia_files" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "files= normal_files+pneumonia_files+covid_files\n", - "#files.append(pneumonia_files)\n", - "#files.append(covid_files)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "clase_n = ['NORMAL' for i in range(0,len(normal_files))]\n", - "clase_p = ['Viral Pneumonia' for i in range(0,len(pneumonia_files))]\n", - "clase_c = ['COVID-19' for i in range(0,len(covid_files))]\n", - "\n", - "clase = clase_n+clase_p+clase_c" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>FILENAME</th>\n", - " <th>CLASS</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>NORMAL (1).png</td>\n", - " <td>NORMAL</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>NORMAL (10).png</td>\n", - " <td>NORMAL</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>NORMAL (100).png</td>\n", - " <td>NORMAL</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>NORMAL (101).png</td>\n", - " <td>NORMAL</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>NORMAL (102).png</td>\n", - " <td>NORMAL</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2900</th>\n", - " <td>COVID-19(215).png</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2901</th>\n", - " <td>COVID-19(216).png</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2902</th>\n", - " <td>COVID-19(217).png</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2903</th>\n", - " <td>COVID-19(218).png</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2904</th>\n", - " <td>COVID-19(219).png</td>\n", - " <td>COVID-19</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>2905 rows × 2 columns</p>\n", - "</div>" - ], - "text/plain": [ - " FILENAME CLASS\n", - "0 NORMAL (1).png NORMAL\n", - "1 NORMAL (10).png NORMAL\n", - "2 NORMAL (100).png NORMAL\n", - "3 NORMAL (101).png NORMAL\n", - "4 NORMAL (102).png NORMAL\n", - "... ... ...\n", - "2900 COVID-19(215).png COVID-19\n", - "2901 COVID-19(216).png COVID-19\n", - "2902 COVID-19(217).png COVID-19\n", - "2903 COVID-19(218).png COVID-19\n", - "2904 COVID-19(219).png COVID-19\n", - "\n", - "[2905 rows x 2 columns]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(list(zip(files, clase)), \n", - " columns =['FILENAME', 'CLASS']) \n", - "df " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Separacion en entrenamiento y prueba" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "train, test = train_test_split(df, test_size=1/3, stratify=df.CLASS)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "METADATA:\n", - "Total frecuencies: \n", - " Viral Pneumonia 1345\n", - "NORMAL 1341\n", - "COVID-19 219\n", - "Name: CLASS, dtype: int64\n", - "Proportion: \n", - " Viral Pneumonia 0.462995\n", - "NORMAL 0.461618\n", - "COVID-19 0.075387\n", - "Name: CLASS, dtype: float64\n", - "----------------------------------------------\n", - "TRAIN:\n", - "Total frecuencies: \n", - " Viral Pneumonia 896\n", - "NORMAL 894\n", - "COVID-19 146\n", - "Name: CLASS, dtype: int64\n", - "Proportion: \n", - " Viral Pneumonia 0.462810\n", - "NORMAL 0.461777\n", - "COVID-19 0.075413\n", - "Name: CLASS, dtype: float64\n", - "----------------------------------------------\n", - "TEST:\n", - "Total frecuencies: \n", - " Viral Pneumonia 449\n", - "NORMAL 447\n", - "COVID-19 73\n", - "Name: CLASS, dtype: int64\n", - "Proportion: \n", - " Viral Pneumonia 0.463364\n", - "NORMAL 0.461300\n", - "COVID-19 0.075335\n", - "Name: CLASS, dtype: float64\n" - ] - } - ], - "source": [ - "print(\"METADATA:\")\n", - "print(\"Total frecuencies: \\n\",df.CLASS.value_counts())\n", - "print(\"Proportion: \\n\",df.CLASS.value_counts()/df.shape[0])\n", - "print(\"----------------------------------------------\")\n", - "print(\"TRAIN:\")\n", - "print(\"Total frecuencies: \\n\",train.CLASS.value_counts())\n", - "print(\"Proportion: \\n\",train.CLASS.value_counts()/train.shape[0])\n", - "print(\"----------------------------------------------\")\n", - "print(\"TEST:\")\n", - "print(\"Total frecuencies: \\n\",test.CLASS.value_counts())\n", - "print(\"Proportion: \\n\",test.CLASS.value_counts()/test.shape[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Comprobamos que las proporciones se mantienen, hemos realizado un muestreo balanceado." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "for i in range(train.shape[0]):\n", - " #train.iloc[i,1] = CLASE\n", - " #train.iloc[i,0] = FILE NAME\n", - " src = os.path.join(\"../datos\",str(train.iloc[i,1]),str(train.iloc[i,0]))\n", - " dst = os.path.join(\"../datos/train\",str(train.iloc[i,1]),str(train.iloc[i,0]))\n", - " shutil.copyfile(src,dst)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "for i in range(test.shape[0]):\n", - " #train.iloc[i,1] = CLASE\n", - " #train.iloc[i,0] = FILE NAME\n", - " src = os.path.join(\"../datos\",str(test.iloc[i,1]),str(test.iloc[i,0]))\n", - " dst = os.path.join(\"../datos/test\",str(test.iloc[i,1]),str(test.iloc[i,0]))\n", - " shutil.copyfile(src,dst)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} -- GitLab