Home Kaggle 마스크 타입 분류
Post
Cancel

Kaggle 마스크 타입 분류

Kaggle Face Mask Types Dataset

사용 라이브러리

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
import glob
import pathlib
import splitfolders


import PIL
import cv2

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import plot_model

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import classification_report

Data Load

1
2
3
total_img = glob.glob("Dataset/train/*/*")

print(f"Total Images: {len(total_img)}")
1
Total Images: 1956
1
2
3
4
5
6
total_img = pd.Series(total_img)
total_df = pd.DataFrame()
total_df["Filename"] = total_img.map(lambda img_name: img_name.split("\\")[-1])
total_df["ClassId"] = total_img.map(lambda img_name: img_name.split("\\")[-2])

total_df.head()
FilenameClassId
00.jpgcloth
11.jpgcloth
210.jpgcloth
3100.jpgcloth
4101.jpgcloth
1
2
class_id_distributionTotal = total_df["ClassId"].value_counts()
class_id_distributionTotal
1
2
3
4
5
6
nfm      474
cloth    396
n95v     390
n95      354
srg      342
Name: ClassId, dtype: int64
1
2
plt.figure(facecolor="white")
_ = plt.pie(class_id_distributionTotal.values, labels=class_id_distributionTotal.index, autopct="%.2f%%")

png

Data Split

1
2
# Train : Test : Val = 80 : 10 : 10
splitfolders.ratio(input="Dataset/train/", output="output", ratio=(0.8, 0.1, 0.1))
1
Copying files: 1956 files [00:01, 1260.90 files/s]
1
2
3
4
5
6
7
8
9
train_img = glob.glob("./output/train/*/*")
test_img = glob.glob("./output/test/*/*")
val_img = glob.glob("./output/val/*/*")

print(f"\
training images: {len(train_img)}\n\
validation images: {len(val_img)}\n\
test images: {len(test_img)}\
    ")
1
2
3
training images: 1563
validation images: 194
test images: 199    
1
2
3
4
5
6
7
data = [len(train_img), len(val_img), len(test_img)]
labels = ["Train", "Val", "Test"]

plt.figure(facecolor="white")
_ = plt.pie(data, labels=labels, autopct="%.2f%%")
plt.title("Ratio - [Train : Val : Test]")
plt.show()

png

Train / Validation / Test

1
2
3
4
5
6
7
8
9
10
11
12
# Train
train_image_names = pd.Series(train_img)
train_df = pd.DataFrame({"Filename":[x for x in train_image_names.map(lambda img_name: img_name.split("\\")[-1])],
                         "ClassId":[x for x in train_image_names.map(lambda img_name: img_name.split("\\")[-2])]})
# Val
val_image_names = pd.Series(val_img)
val_df = pd.DataFrame({"Filename":[x for x in val_image_names.map(lambda img_name: img_name.split("\\")[-1])],
                         "ClassId":[x for x in val_image_names.map(lambda img_name: img_name.split("\\")[-2])]})
# Test
test_image_names = pd.Series(test_img)
test_df = pd.DataFrame({"Filename":[x for x in test_image_names.map(lambda img_name: img_name.split("\\")[-1])],
                         "ClassId":[x for x in test_image_names.map(lambda img_name: img_name.split("\\")[-2])]})
1
2
3
4
5
6
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
for idx, df in enumerate([train_df, val_df, test_df]):
    axex = ax[idx]
    df = df["ClassId"].value_counts()
    axex.pie(df.values, labels=df.index, autopct="%.2f%%")
plt.show()

png

Show Image

1
2
3
4
5
6
7
8
9
plot_df = train_df.sample(12).reset_index()
plt.figure(figsize=(15, 15))

for _ in range(12):
    img_name = plot_df.loc[_, "Filename"]
    label_str = plot_df.loc[_, "ClassId"]
    plt.subplot(4, 4, _+1)
    plt.imshow(plt.imread(os.path.join("./output/train/", label_str, img_name)))
    plt.title(label_str)

png

Image Data Generator

1
2
3
4
5
6
7
train_datagen = ImageDataGenerator(zoom_range=0.15, width_shift_range=0.2, shear_range=0.15)
test_datagen = ImageDataGenerator()
val_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory("./output/train/", target_size=(300, 300), batch_size=32, shuffle=True, class_mode='categorical')
test_generator = test_datagen.flow_from_directory("./output/test/", target_size=(300,300), batch_size=32, shuffle=False, class_mode='categorical')
val_generator = val_datagen.flow_from_directory("./output/val/", target_size=(300,300), batch_size=32, shuffle=False, class_mode='categorical')
1
2
3
Found 1563 images belonging to 5 classes.
Found 199 images belonging to 5 classes.
Found 194 images belonging to 5 classes.

Modeling - Efficent Net

1
2
3
4
5
6
7
from tensorflow.keras.applications import EfficientNetB0

model = EfficientNetB0(
    input_shape=(300, 300, 3),
    include_top=False,
    weights="imagenet"
)

Fine Tuning

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
model.trainable = True

for layer in model.layers[:-15]:
    layer.trainable = False
    
x = tf.keras.layers.Flatten()(model.output)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(5, activation="softmax")(x)

model = tf.keras.Model(model.input, x)

model.compile(
    optimizer = "adam",
    loss = tf.keras.losses.CategoricalCrossentropy(),
    metrics = ["accuracy"]
)

Train

1
2
early_stopping = EarlyStopping(monitor="val_accuracy", mode="max", verbose=0, patience=20)
# model_check = ModelCheckpoint("model.h5", monitor="val_accuracy", mode="max", save_best_only=True)
1
2
with tf.device("/device:GPU:0"):
    history = model.fit(train_generator, validation_data=val_generator, epochs=50, verbose=1, callbacks=[early_stopping])
1
2
3
4
5
6
7
8
9
10
11
Epoch 1/50
49/49 [==============================] - 25s 517ms/step - loss: 0.3727 - accuracy: 0.9469 - val_loss: 0.9907 - val_accuracy: 0.8918
Epoch 2/50
49/49 [==============================] - 26s 521ms/step - loss: 0.2210 - accuracy: 0.9623 - val_loss: 0.9097 - val_accuracy: 0.9021
Epoch 3/50
49/49 [==============================] - 26s 522ms/step - loss: 0.3565 - accuracy: 0.9584 - val_loss: 0.9695 - val_accuracy: 0.9175
...
Epoch 47/50
49/49 [==============================] - 31s 623ms/step - loss: 0.4125 - accuracy: 0.9834 - val_loss: 1.8275 - val_accuracy: 0.9227
Epoch 48/50
49/49 [==============================] - 27s 553ms/step - loss: 0.2847 - accuracy: 0.9795 - val_loss: 2.2491 - val_accuracy: 0.9124
1
2
3
4
5
6
hist_df = pd.DataFrame(history.history)

fig, ax = plt.subplots(1, 2, figsize=(14, 5))
hist_df[["accuracy", "val_accuracy"]].plot(ax=ax[0])
hist_df[["loss", "val_loss"]].plot(ax=ax[1])
plt.show()

png

Model Evaliation

1
2
3
test_loss, test_acc = model.evaluate(test_generator, steps=len(test_generator), verbose=1)
print('Loss: %.3f' % (test_loss * 100.0))
print('Accuracy: %.3f' % (test_acc * 100.0)) 
1
2
3
7/7 [==============================] - 3s 332ms/step - loss: 4.2478 - accuracy: 0.8995
Loss: 424.777
Accuracy: 89.950
1
2
3
4
5
y_val = test_generator.classes
y_pred = model.predict(test_generator)
y_pred = np.argmax(y_pred,axis=1)

print(classification_report(y_val, y_pred))
1
2
3
4
5
6
7
8
9
10
11
              precision    recall  f1-score   support

           0       0.88      0.85      0.86        41
           1       0.91      0.81      0.85        36
           2       0.86      0.95      0.90        39
           3       0.96      1.00      0.98        48
           4       0.88      0.86      0.87        35

    accuracy                           0.90       199
   macro avg       0.90      0.89      0.89       199
weighted avg       0.90      0.90      0.90       199
This post is licensed under CC BY 4.0 by the author.

멋쟁이 사자처럼 AI School 12주차

DACON 쇼핑몰 지점별 매출액 예측 경진대회 1 EDA/Preprocessing

Comments powered by Disqus.