-Deep learning을 통한 마스크 착용 이미지 분류로 올바른 마스크 착용을 판단
더보기
사용 라이브러리
python 3.8.16
numpy 1.20.3
pandas 1.5.3
scipy 1.5.4
sklean 0.0.post1
tensorflow 2.10.0
tensorflow-gpu 2.10.0
cuda V11.0
Dataset 은 캐글의 마스크 데이터 사용 (https://www.kaggle.com/datasets/tapakah68/medical-masks-part1)
500 GB of images for Face Mask Detection. Part 1
250 000 images, 4 types of masks worn, 28 000 unique faces
www.kaggle.com
TYPE 1 = without mask
TYPE 2,3 = incorrect mask
TYPE 4 = with mask
기타 배경 사진 : etc
with mask, without mask, incorrect mask, etc 4개로 분류
각 클래스별 2600장, 총 10400장
train, validation 비율을
0.8 : 0.2 로 분리
test 안드로이드 앱으로 만들어서 테스트 예정
import tensorflow as tf
import numpy as np
import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array
import imgaug.augmenters as iaa
import imgaug as ia
from imgaug import parameters as iap
from imgaug.augmenters.arithmetic import Invert
from imgaug.augmenters import size
import os
# tensorflow-gpu 사용 설정. gpu 메모리 할당방법
config = tf.compat.v1.ConfigProto(
gpu_options=tf.compat.v1.GPUOptions(
allow_growth=True # True->필요에 따라 확보, False->전부 확보
# per_process_gpu_memory_fraction=0.5 # 전체 gpu 메모리 비율로 설정 0~1
)
)
sess = tf.compat.v1.Session(config=config)
# path 설정
train_dir = './data/train'
val_dir = './data/val'
ModelCheckpoint_path = './data/ModelCheckpoint2.hdf5'
# 이미지 크기 224 x 224 px , 컬러 사진(3)
im_height = 224
im_width = 224
im_channel = 3
batch_size = 32
epochs = 5
# with mask, without mask, incorrect mask, etc 4개로 분류
classes = 4
# 이미지 전처리
# 학습 데이터의 양을 늘리기 위해 증강 사용. ( sometimeds. 20% )
def preprocessing(train_dir, val_dir):
def sometimes(aug): return iaa.Sometimes(0.2, aug)
seq_train = iaa.Sequential(
[
#
# Apply the following augmenters to most images.
#
iaa.Fliplr(0.5), # horizontally flip 50% of all images
iaa.Flipud(0.2), # vertically flip 20% of all images
# crop some of the images by 0-10% of their height/width
sometimes(iaa.Crop(percent=(0, 0.1))),
# Apply affine transformations to some of the images
# - scale to 80-120% of image height/width (each axis independently)
# - translate by -20 to +20 relative to height/width (per axis)
# - rotate by -45 to +45 degrees
# - shear by -16 to +16 degrees
# - order: use nearest neighbour or bilinear interpolation (fast)
# - mode: use any available mode to fill newly created pixels
# see API or scikit-image for which modes are available
# - cval: if the mode is constant, then use a random brightness
# for the newly created pixels (e.g. sometimes black,
# sometimes white)
sometimes(iaa.Affine(
scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
rotate=(-45, 45),
shear=(-16, 16),
order=[0, 1],
cval=(0, 255),
mode=ia.ALL
)),
#
# Execute 0 to 5 of the following (less important) augmenters per
# image. Don't execute all of them, as that would often be way too
# strong.
#
iaa.SomeOf((0, 5),
[
# Convert some images into their superpixel representation,
# sample between 20 and 200 superpixels per image, but do
# not replace all superpixels with their average, only
# some of them (p_replace).
sometimes(
iaa.Superpixels(
p_replace=(0, 1.0),
n_segments=(20, 200)
)
),
# Blur each image with varying strength using
# gaussian blur (sigma between 0 and 3.0),
# average/uniform blur (kernel size between 2x2 and 7x7)
# median blur (kernel size between 3x3 and 11x11).
iaa.OneOf([
iaa.GaussianBlur((0, 3.0)),
iaa.AverageBlur(k=(2, 7)),
iaa.MedianBlur(k=(3, 11)),
]),
# Sharpen each image, overlay the result with the original
# image using an alpha between 0 (no sharpening) and 1
# (full sharpening effect).
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
# Same as sharpen, but for an embossing effect.
iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)),
# Search in some images either for all edges or for
# directed edges. These edges are then marked in a black
# and white image and overlayed with the original image
# using an alpha of 0 to 0.7.
sometimes(iaa.OneOf([
iaa.EdgeDetect(alpha=(0, 0.7)),
iaa.DirectedEdgeDetect(
alpha=(0, 0.7), direction=(0.0, 1.0)
),
])),
# Add gaussian noise to some images.
# In 50% of these cases, the noise is randomly sampled per
# channel and pixel.
# In the other 50% of all cases it is sampled once per
# pixel (i.e. brightness change).
iaa.AdditiveGaussianNoise(
loc=0, scale=(0.0, 0.05*255), per_channel=0.5
),
# Either drop randomly 1 to 10% of all pixels (i.e. set
# them to black) or drop them on an image with 2-5% percent
# of the original size, leading to large dropped
# rectangles.
iaa.OneOf([
iaa.Dropout((0.01, 0.1), per_channel=0.5),
iaa.CoarseDropout(
(0.03, 0.15), size_percent=(0.02, 0.05),
per_channel=0.2
),
]),
# Invert each image's channel with 5% probability.
# This sets each pixel value v to 255-v.
iaa.Invert(0.05, per_channel=True), # invert color channels
# Add a value of -10 to 10 to each pixel.
iaa.Add((-10, 10), per_channel=0.5),
# Change brightness of images (50-150% of original value).
iaa.Multiply((0.5, 1.5), per_channel=0.5),
# Improve or worsen the contrast of images.
iaa.LinearContrast((0.5, 2.0), per_channel=0.5),
# Convert each image to grayscale and then overlay the
# result with the original with random alpha. I.e. remove
# colors with varying strengths.
iaa.Grayscale(alpha=(0.0, 1.0)),
# In some images move pixels locally around (with random
# strengths).
sometimes(
iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)
),
# In some images distort local areas with varying strength.
sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05)))
],
# do all of the above augmentations in random order
random_order=True
)
],
# do all of the above augmentations in random order
random_order=True
)
def img_preprocessing_train(img):
# 이미지의 모양을 3차원으로 변환
# (이미지 개수, 가로 크기, 세로 크기 , 컬러(채널수 3) )
img = img.reshape(1, img.shape[0], img.shape[1], 3)
# imgaug는 정수 이미지만 증강이 가능하기 때문에 이미지를 정수로 변환
img = img.astype(np.uint8)
# 이미지를 증강 시키고 결과를 generate_img에 저장
generate_img = seq_train(images=img)
# generate_img를 255로 나눠서 0~1사이 실수로 변환
generate_img = generate_img / 255
# generate_img.reshape(가로 , 세로, 컬러)
# 현재 (1,가로, 세로, 컬러 ) 개수가 1개 이므로 (가로, 세로, 컬러)로 차원 축소.
# .astype(np.float32) : 기본적으로 실수는 float64이므로
# 케라스가 처리 가능하도록 이미지를 float32 타입으로 변환
return generate_img.reshape(im_height, im_width, im_channel).astype(np.float32)
# 학습에 사용될 이미지를 읽을 객체
train_datagen = ImageDataGenerator(
rotation_range=180, # 회전 쵀대 180도
width_shift_range=0.2, # 좌우 이동 최대 이미지 가로사이즈 20%
height_shift_range=0.2, # 상하 이동 최대 이미지 세로사이즈 20%
horizontal_flip=True, # 좌우 반전 실행
vertical_flip=True, # 상하 반전 실행
# rescale=1/255.0, #imgaug는 정수 이미지를 증강 가능 하기 때문에 주석처리
# 이미지 밝기 조정 0 : 완전 어두운 이미지 1: 원본 밝기 0.5: 원본 50% 밝기1.2 : 원본보다 20% 밝은 이미지
brightness_range=[0.5, 1.2],
zoom_range=[0.8, 1.2], # 이미지 확대 0.8: 원본 80% 확대 1.2 : 원본 120% 확대
preprocessing_function=img_preprocessing_train
)
train_generator = train_datagen.flow_from_directory(
directory=train_dir,
target_size=(im_height, im_width),
batch_size=batch_size,
class_mode='categorical',
shuffle=True)
print('done train_data_load\n')
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1/255.0,
shear_range=0.2,
zoom_range=0.2,
rotation_range=45,
horizontal_flip=True)
validation_generator = val_datagen.flow_from_directory(
directory=val_dir,
target_size=(im_height, im_width),
batch_size=batch_size,
class_mode='categorical',
shuffle=True)
print('done val_data_load\n')
return train_generator, validation_generator
# 학습을 위한 모델 ( MobileNetV2 , imagenet 에서 이미 학습된 가중치 불러오기 )
def creat_model_and_fit(train_generator, validation_generator):
# 기본적으로 케라스에 있는 MobileNetV2 모델 불러오기
MobileNetV2_layers = tf.keras.applications.mobilenet_v2.MobileNetV2(
weights='imagenet',
input_shape=(im_height, im_width, im_channel),
include_top=False,
classes=classes,
)
# imagenet에서 사용한 모델 전부 사용하면 너무 무겁기 떄문에(154줄) 첫번째 Normalization(15줄)까지 사용.
for layer in MobileNetV2_layers.layers[:15]:
# 해당 레이어의 weight는 수정하지 않고 이미지넷 대회의 값 그대로 사용
layer.trainable = False
# Convolution - Batch Normalization - Activation - Dropout - Pooling 순
model = Sequential()
model.add(MobileNetV2_layers)
# 모델 학습을 위한 Pooling(GlobalAveragePooling2D 선택 =>
# AveragePooling2D + Flatten 에서 AveragePooling2D의 pool size를 feature map과 같음. )
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(Dense(classes, activation='softmax'))
model.compile(optimizer=Adam(learning_rate=0.0001),
loss='categorical_crossentropy',
metrics=['accuracy'])
# early_stopping= EarlyStopping(monitor='val_acc',min_delta=0,patience=0,verbose=0,mode='auto',baseline=None,restore_best_weights=True)
# mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
# reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')
model_checkpoint = ModelCheckpoint(
ModelCheckpoint_path, save_best_only=True, monitor='val_accuracy', mode='max')
history = model.fit(train_generator,
batch_size=batch_size,
epochs=epochs,
validation_data=(validation_generator),
# callbacks=[early_stopping_monitor,cb_checkpoint],
callbacks=[model_checkpoint],
)
return history
def handler():
train_generator, validation_generator = preprocessing(train_dir, val_dir)
history = creat_model_and_fit(train_generator, validation_generator)
if __name__ == "__main__":
handler()
-> 안드로이드 스튜디오 이용 어플리케이션 제작
To be continued...