Skip to content
Snippets Groups Projects
Commit e8cbff6e authored by migtoqu's avatar migtoqu
Browse files

Delete covid-tfg-tfrecords.ipynb

parent 8b4fc3c6
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from glob import glob
```
%% Cell type:code id: tags:
``` python
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, MaxPooling2D, GlobalAveragePooling2D, Conv2D, Input, Flatten,BatchNormalization, Dropout, Activation, Add, LeakyReLU, ELU
import tensorflow
import tensorflow as tf
from keras.optimizers import Adam, RMSprop
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.engine.topology import Layer
```
%% Cell type:code id: tags:
``` python
TRAIN_PATH = '../datos/train/*/*'
TEST_PATH = '../datos/test/*/*'
TRAIN_IMGS = glob(TRAIN_PATH)
TEST_IMGS = glob(TEST_PATH)
print (len(TRAIN_IMGS) , len (TEST_IMGS))
```
%% Output
1323 962
%% Cell type:code id: tags:
``` python
TRAIN_IMGS[: 2], TEST_IMGS[:2]
```
%% Output
(['../input/tfg-covid-data/train/NORMAL/NORMAL (159).png',
'../input/tfg-covid-data/train/NORMAL/NORMAL (28).png'],
['../input/tfg-covid-data/test/NORMAL/NORMAL (391).png',
'../input/tfg-covid-data/test/NORMAL/NORMAL (695).png'])
%% Cell type:code id: tags:
``` python
## converts name into number
class_dict = {'NORMAL': 0, 'COVID-19':1, 'Viral Pneumonia':2}
num2label = {0:'NORMAL', 1:'COVID-19', 2:'Viral Pneumonia'}
```
%% Cell type:code id: tags:
``` python
### Generate labels
TRAIN_LABELS = []
TEST_LABELS = []
for filename in TRAIN_IMGS:
label = str(filename.split('/')[4])
#print (label)
assert label in ['NORMAL', 'COVID-19', 'Viral Pneumonia']
label = int (class_dict[label])
TRAIN_LABELS.append (label)
for filename in TEST_IMGS:
label = str(filename.split('/')[4])
#print (label)
assert label in ['NORMAL', 'COVID-19', 'Viral Pneumonia']
label = int (class_dict[label])
TEST_LABELS.append (label)
```
%% Cell type:code id: tags:
``` python
len(TRAIN_LABELS) , len(TEST_LABELS), TRAIN_LABELS[:3], TEST_LABELS[:3]
```
%% Output
(1323, 962, [0, 0, 0], [0, 0, 0])
%% Cell type:code id: tags:
``` python
NCLASSES = 3
SIZE = (1024,1024)
BATCH_SIZE = 16
```
%% Cell type:code id: tags:
``` python
def decode_image(filename, label = None, image_size= SIZE ):
bits = tf.io.read_file(filename)
image = tf.image.decode_png(bits, channels=1)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
if label is None:
return image
else:
return image, label
```
%% Cell type:code id: tags:
``` python
image, label = decode_image(TRAIN_IMGS[0], TRAIN_LABELS[0])
plt.imshow(image)
plt.title (num2label[label] + str(label))
```
%% Cell type:code id: tags:
``` python
img = cv2.imread('../input/tfg-covid-data/train/Viral Pneumonia/Viral Pneumonia (1000).png')
img.shape, img.min(), img.max()
```
%% Cell type:code id: tags:
``` python
## DATALOADERS
AUTO = tf.data.experimental.AUTOTUNE
train_dataset = (
tf.data.TFRecordDataset
.from_tensor_slices((TRAIN_IMGS, TRAIN_LABELS))
.map(decode_image, num_parallel_calls=AUTO)
.repeat()
.shuffle(1024)
.batch(44)
#.batch(BATCH_SIZE)
)
test_dataset = (
tf.data.TFRecordDataset
.from_tensor_slices((TEST_IMGS, TEST_LABELS))
.map(decode_image, num_parallel_calls=AUTO)
.batch(19)
#.batch(BATCH_SIZE)
)
```
%% Cell type:code id: tags:
``` python
for element in train_dataset:
image = element[0]
label = element[1]
print (image.shape, label.shape)
for img, lbl in zip(image, label): #batch
plt.imshow(img)
plt.show()
break
break
```
%% Cell type:code id: tags:
``` python
#arquitectura Teodoro
EPOCHS = 100
model = Sequential()
model.add(Conv2D(filters=8 , (3 , activation='relu', input_shape=(1024,1024,1)))
model.add(Conv2D(filters=8 , kernel_size=3 , activation='relu'))
model.add(MaxPooling2D((3,3)))
model.add(Conv2D(filters=16 , kernel_size=3 , activation='relu'))
model.add(Conv2D(filters=16 , kernel_size=3 , activation='relu'))
model.add(MaxPooling2D((3,3)))
model.add(Conv2D(filters=32 , kernel_size=3 , activation='relu'))
model.add(Conv2D(filters=32 , kernel_size=3 , activation='relu'))
model.add(MaxPooling2D((3,3)))
model.add(Conv2D(filters=64 , kernel_size=3 , activation='relu'))
model.add(Conv2D(filters=64 , kernel_size=3 , activation='relu'))
model.add(MaxPooling2D((3,3)))
model.add(Conv2D(filters=128 , kernel_size=3 , activation='relu'))
model.add(Conv2D(filters=128 , kernel_size=3 , activation='relu'))
model.add(MaxPooling2D((3,3)))
#model.add(GlobalAveragePooling2D()) # mas eficiente que Flatten
model.add(Flatten()) # o GAP = Global Average Pooling
model.add(Dense(256,activation="relu"))
#model.add(Dropout (0.5))
model.add(Dense(NCLASSES, activation='softmax'))
print (model.summary())
model.compile(optimizer= RMSprop(0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
```
%% Output
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_10 (Conv2D) (None, 1022, 1022, 8) 80
_________________________________________________________________
conv2d_11 (Conv2D) (None, 1020, 1020, 8) 584
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 510, 510, 8) 0
_________________________________________________________________
conv2d_12 (Conv2D) (None, 508, 508, 16) 1168
_________________________________________________________________
conv2d_13 (Conv2D) (None, 506, 506, 16) 2320
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 253, 253, 16) 0
_________________________________________________________________
conv2d_14 (Conv2D) (None, 251, 251, 32) 4640
_________________________________________________________________
conv2d_15 (Conv2D) (None, 249, 249, 32) 9248
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 124, 124, 32) 0
_________________________________________________________________
conv2d_16 (Conv2D) (None, 122, 122, 64) 18496
_________________________________________________________________
conv2d_17 (Conv2D) (None, 120, 120, 64) 36928
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 60, 60, 64) 0
_________________________________________________________________
conv2d_18 (Conv2D) (None, 58, 58, 128) 73856
_________________________________________________________________
conv2d_19 (Conv2D) (None, 56, 56, 128) 147584
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 28, 28, 128) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 100352) 0
_________________________________________________________________
dense_2 (Dense) (None, 256) 25690368
_________________________________________________________________
dense_3 (Dense) (None, 3) 771
=================================================================
Total params: 25,986,043
Trainable params: 25,986,043
Non-trainable params: 0
_________________________________________________________________
None
%% Cell type:code id: tags:
``` python
LAYERS = 4
EPOCHS = 5
model = Sequential()
model.add(Input((SIZE[0],SIZE[0],3))) # (16, 512, 512, 3)
for i in range(1, LAYERS+1):
print ('block' , i)
model.add(Conv2D(filters=16* (2**i) , kernel_size=3 , padding='same', activation='relu'))
model.add(Conv2D(filters=16* (2**i) , kernel_size=3 , padding='same', activation='relu'))
#model.add(Conv2D(filters=16* (2**i) , kernel_size=3 , padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))
#model.add(GlobalAveragePooling2D()) # mas eficiente que Flatten
model.add(Flatten())
model.add(Dense(512,activation="relu"))
model.add(Dropout (0.5)) #regularizacion
model.add(Dense(NCLASSES, activation='softmax'))
print (model.summary())
model.compile(optimizer= RMSprop(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
```
%% Cell type:code id: tags:
``` python
STEPS_PER_EPOCH = len(TRAIN_IMGS) // 44#BATCH_SIZE
BATCH_SIZE, EPOCHS, STEPS_PER_EPOCH
VALIDATION_STEPS = len(TEST_IMGS) // 19
```
%% Cell type:code id: tags:
``` python
hist = model.fit(train_dataset, validation_data=test_dataset, batch_size=BATCH_SIZE, epochs=EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, validation_steps=VALIDATION_STEPS)
```
%% Output
Epoch 1/100
30/30 [==============================] - 89s 2s/step - loss: 0.9596 - accuracy: 0.5794 - val_loss: 0.7236 - val_accuracy: 0.5632
Epoch 2/100
30/30 [==============================] - 52s 2s/step - loss: 0.5108 - accuracy: 0.8070 - val_loss: 0.3412 - val_accuracy: 0.8726
Epoch 3/100
30/30 [==============================] - 52s 2s/step - loss: 0.3205 - accuracy: 0.8718 - val_loss: 0.4332 - val_accuracy: 0.8126
Epoch 4/100
30/30 [==============================] - 52s 2s/step - loss: 0.2543 - accuracy: 0.9051 - val_loss: 0.3484 - val_accuracy: 0.8663
Epoch 5/100
30/30 [==============================] - 52s 2s/step - loss: 0.2787 - accuracy: 0.9041 - val_loss: 0.2671 - val_accuracy: 0.9063
Epoch 6/100
30/30 [==============================] - 52s 2s/step - loss: 0.2398 - accuracy: 0.9090 - val_loss: 0.2619 - val_accuracy: 0.9074
Epoch 7/100
30/30 [==============================] - 52s 2s/step - loss: 0.2304 - accuracy: 0.9126 - val_loss: 0.2845 - val_accuracy: 0.8895
Epoch 8/100
30/30 [==============================] - 51s 2s/step - loss: 0.1868 - accuracy: 0.9378 - val_loss: 0.3402 - val_accuracy: 0.8800
Epoch 9/100
30/30 [==============================] - 52s 2s/step - loss: 0.1531 - accuracy: 0.9444 - val_loss: 0.2378 - val_accuracy: 0.9242
Epoch 10/100
30/30 [==============================] - 51s 2s/step - loss: 0.1818 - accuracy: 0.9302 - val_loss: 0.2493 - val_accuracy: 0.9126
Epoch 11/100
30/30 [==============================] - 52s 2s/step - loss: 0.1632 - accuracy: 0.9474 - val_loss: 0.3234 - val_accuracy: 0.8905
Epoch 12/100
30/30 [==============================] - 51s 2s/step - loss: 0.1516 - accuracy: 0.9480 - val_loss: 0.3337 - val_accuracy: 0.8916
Epoch 13/100
30/30 [==============================] - 52s 2s/step - loss: 0.1084 - accuracy: 0.9574 - val_loss: 0.3399 - val_accuracy: 0.8937
Epoch 14/100
30/30 [==============================] - 51s 2s/step - loss: 0.0936 - accuracy: 0.9671 - val_loss: 0.2425 - val_accuracy: 0.9284
Epoch 15/100
30/30 [==============================] - 51s 2s/step - loss: 0.0817 - accuracy: 0.9687 - val_loss: 0.2315 - val_accuracy: 0.9263
Epoch 16/100
30/30 [==============================] - 52s 2s/step - loss: 0.0639 - accuracy: 0.9789 - val_loss: 0.4438 - val_accuracy: 0.8411
Epoch 17/100
30/30 [==============================] - 51s 2s/step - loss: 0.1003 - accuracy: 0.9647 - val_loss: 0.2368 - val_accuracy: 0.9316
Epoch 18/100
30/30 [==============================] - 52s 2s/step - loss: 0.0768 - accuracy: 0.9680 - val_loss: 0.2742 - val_accuracy: 0.9274
Epoch 19/100
30/30 [==============================] - 52s 2s/step - loss: 0.0739 - accuracy: 0.9697 - val_loss: 0.2523 - val_accuracy: 0.9316
Epoch 20/100
30/30 [==============================] - 51s 2s/step - loss: 0.0566 - accuracy: 0.9786 - val_loss: 0.3835 - val_accuracy: 0.8947
Epoch 21/100
30/30 [==============================] - 52s 2s/step - loss: 0.0379 - accuracy: 0.9860 - val_loss: 0.3615 - val_accuracy: 0.9189
Epoch 22/100
30/30 [==============================] - 51s 2s/step - loss: 0.0537 - accuracy: 0.9818 - val_loss: 0.2796 - val_accuracy: 0.9326
Epoch 23/100
30/30 [==============================] - 52s 2s/step - loss: 0.0420 - accuracy: 0.9833 - val_loss: 0.5393 - val_accuracy: 0.8537
Epoch 24/100
30/30 [==============================] - 51s 2s/step - loss: 0.0332 - accuracy: 0.9866 - val_loss: 0.5634 - val_accuracy: 0.8632
Epoch 25/100
30/30 [==============================] - 52s 2s/step - loss: 0.0691 - accuracy: 0.9784 - val_loss: 0.3830 - val_accuracy: 0.9179
Epoch 26/100
30/30 [==============================] - 51s 2s/step - loss: 0.1033 - accuracy: 0.9678 - val_loss: 0.3829 - val_accuracy: 0.9263
Epoch 27/100
30/30 [==============================] - 51s 2s/step - loss: 0.0151 - accuracy: 0.9965 - val_loss: 0.3094 - val_accuracy: 0.9442
Epoch 28/100
30/30 [==============================] - 51s 2s/step - loss: 0.0354 - accuracy: 0.9892 - val_loss: 0.2608 - val_accuracy: 0.9474
Epoch 29/100
30/30 [==============================] - 52s 2s/step - loss: 0.0180 - accuracy: 0.9974 - val_loss: 0.3035 - val_accuracy: 0.9347
Epoch 30/100
30/30 [==============================] - 51s 2s/step - loss: 0.0122 - accuracy: 0.9972 - val_loss: 0.3220 - val_accuracy: 0.9474
Epoch 31/100
30/30 [==============================] - 52s 2s/step - loss: 0.0369 - accuracy: 0.9884 - val_loss: 0.3080 - val_accuracy: 0.9347
Epoch 32/100
30/30 [==============================] - 52s 2s/step - loss: 0.0021 - accuracy: 1.0000 - val_loss: 0.4127 - val_accuracy: 0.9295
Epoch 33/100
30/30 [==============================] - 51s 2s/step - loss: 0.0405 - accuracy: 0.9883 - val_loss: 0.3328 - val_accuracy: 0.9389
Epoch 34/100
30/30 [==============================] - 52s 2s/step - loss: 6.7841e-04 - accuracy: 1.0000 - val_loss: 0.4397 - val_accuracy: 0.9295
Epoch 35/100
30/30 [==============================] - 51s 2s/step - loss: 0.0207 - accuracy: 0.9958 - val_loss: 0.4035 - val_accuracy: 0.9347
Epoch 36/100
30/30 [==============================] - 52s 2s/step - loss: 0.0039 - accuracy: 0.9990 - val_loss: 0.4232 - val_accuracy: 0.9274
Epoch 37/100
30/30 [==============================] - 52s 2s/step - loss: 0.0071 - accuracy: 0.9970 - val_loss: 0.4524 - val_accuracy: 0.9379
Epoch 38/100
30/30 [==============================] - 51s 2s/step - loss: 0.0127 - accuracy: 0.9962 - val_loss: 0.4557 - val_accuracy: 0.9316
Epoch 39/100
30/30 [==============================] - 51s 2s/step - loss: 5.2700e-04 - accuracy: 1.0000 - val_loss: 0.4592 - val_accuracy: 0.9358
Epoch 40/100
30/30 [==============================] - 52s 2s/step - loss: 0.0053 - accuracy: 0.9990 - val_loss: 0.4466 - val_accuracy: 0.9284
Epoch 41/100
30/30 [==============================] - 51s 2s/step - loss: 0.0012 - accuracy: 1.0000 - val_loss: 0.4651 - val_accuracy: 0.9411
Epoch 42/100
30/30 [==============================] - 52s 2s/step - loss: 9.5511e-05 - accuracy: 1.0000 - val_loss: 0.5212 - val_accuracy: 0.9389
Epoch 43/100
30/30 [==============================] - 51s 2s/step - loss: 0.0421 - accuracy: 0.9905 - val_loss: 0.3996 - val_accuracy: 0.9347
Epoch 44/100
30/30 [==============================] - 52s 2s/step - loss: 5.2430e-04 - accuracy: 1.0000 - val_loss: 0.4369 - val_accuracy: 0.9432
Epoch 45/100
30/30 [==============================] - 51s 2s/step - loss: 8.4447e-05 - accuracy: 1.0000 - val_loss: 0.5082 - val_accuracy: 0.9411
Epoch 46/100
30/30 [==============================] - 52s 2s/step - loss: 0.0101 - accuracy: 0.9989 - val_loss: 0.5193 - val_accuracy: 0.9147
Epoch 47/100
30/30 [==============================] - 52s 2s/step - loss: 0.0043 - accuracy: 0.9978 - val_loss: 0.5534 - val_accuracy: 0.9263
Epoch 48/100
30/30 [==============================] - 51s 2s/step - loss: 2.3153e-04 - accuracy: 1.0000 - val_loss: 0.5945 - val_accuracy: 0.9295
Epoch 49/100
30/30 [==============================] - 52s 2s/step - loss: 4.6704e-05 - accuracy: 1.0000 - val_loss: 0.6543 - val_accuracy: 0.9295
Epoch 50/100
30/30 [==============================] - 51s 2s/step - loss: 1.4655e-05 - accuracy: 1.0000 - val_loss: 0.6759 - val_accuracy: 0.9316
Epoch 51/100
25/30 [========================>.....] - ETA: 6s - loss: 0.0328 - accuracy: 0.9955
%% Cell type:code id: tags:
``` python
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title("model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["loss","Validation Loss"])
#plt.savefig("history_vgg16.png")
plt.show()
```
%% Cell type:code id: tags:
``` python
plt.plot(hist.history["accuracy"])
plt.plot(hist.history['val_accuracy'])
plt.title("model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy","Validation Accuracy","loss","Validation Loss"])
#plt.savefig("history_vgg16.png")
plt.show()
```
%% Cell type:code id: tags:
``` python
from keras.models import load_model
model.save('model.h5')#136M de params - 3 canales -
#model = load_model('modelo136M3C.h5')
```
%% Cell type:code id: tags:
``` python
## Predict test data
predictions = model.predict(test_dataset)
print (predictions.shape)
predictions = np.argmax(predictions, axis=1).astype(np.int)
print (accuracy_score(TEST_LABELS,predictions ))
```
%% Cell type:code id: tags:
``` python
target_names = {'COVID-19','NORMAL', 'Viral pneumonia'}
print('Confussion matrix:')
c = confusion_matrix(TEST_LABELS,predictions)
print(c)
print('Classification report')
print(classification_report(TEST_LABELS,predictions,target_names=target_names))
```
%% Cell type:code id: tags:
``` python
df = pd.DataFrame(c,range(3),range(3))
plt.figure(figsize=(6,4))
sns.set(font_scale=1.4)
sns.heatmap(df, annot=True, annot_kws={"size":16}, cmap="Blues")
plt.savefig("conf_mat_modelo136M3C.png")
plt.show()
```
%% Cell type:code id: tags:
``` python
model.summary()
```
%% Output
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 1022, 1022, 8) 80
_________________________________________________________________
conv2d_1 (Conv2D) (None, 1020, 1020, 8) 584
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 340, 340, 8) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 338, 338, 16) 1168
_________________________________________________________________
conv2d_3 (Conv2D) (None, 336, 336, 16) 2320
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 168, 168, 16) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 166, 166, 32) 4640
_________________________________________________________________
conv2d_5 (Conv2D) (None, 164, 164, 32) 9248
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 82, 82, 32) 0
_________________________________________________________________
conv2d_6 (Conv2D) (None, 80, 80, 64) 18496
_________________________________________________________________
conv2d_7 (Conv2D) (None, 78, 78, 64) 36928
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 39, 39, 64) 0
_________________________________________________________________
conv2d_8 (Conv2D) (None, 37, 37, 128) 73856
_________________________________________________________________
conv2d_9 (Conv2D) (None, 35, 35, 128) 147584
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 17, 17, 128) 0
_________________________________________________________________
flatten (Flatten) (None, 36992) 0
_________________________________________________________________
dense (Dense) (None, 256) 9470208
_________________________________________________________________
dense_1 (Dense) (None, 3) 771
=================================================================
Total params: 9,765,883
Trainable params: 9,765,883
Non-trainable params: 0
_________________________________________________________________
%% Cell type:code id: tags:
``` python
from keras.preprocessing import image
img = image.load_img("../datos/test/COVID-19/COVID-19(135).png",target_size=(1024,1024), color_mode="grayscale")
img = np.asarray(img)/255.0
plt.imshow(img)
print(img.shape)
img = np.expand_dims(img, axis=0)
img = np.expand_dims(img, axis=3)
print(img.shape)
from keras.models import load_model
#saved_model = load_model("./modelo136M3C.h5")
#output = saved_model.predict(img)
output = model.predict(img)
print(output)
prediction = np.argmax(output, axis=1).astype(np.int)
print(prediction)
print(num2label[prediction[0]])
```
%% Output
(1024, 1024)
(1, 1024, 1024, 1)
[[9.9999976e-01 7.2375179e-15 2.0856763e-07]]
[0]
NORMAL
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment