ML_DL/MUJAKJUNG (무작정 시리즈)
이미지 분류 모델 작성하기 (feat. wandb)
swwho
2025. 2. 19. 22:23
728x90
반응형
목표
- 5가지 class의 이미지를 분류한다. (각각의 이름으로 폴더가 있고, 파일이 나뉘어 있는 형태)
- f1_score로 모델의 성능을 평가한다. 모델의 학습 과정을 wandb에 기록한다.
라이브러리 불러오기
import os
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from efficientnet_pytorch import EfficientNet
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
wandb를 불러오고, API Key를 통해 로그인한다.
Sign In with Auth0
wandb.ai
import wandb
wandb.login()
데이터 정의
|-- Cloth
|-- cardigan
|-- long_shirt
|-- short_shirt
|-- long_tshirt
|-- short_tshirt
- 이미지 경로와 라벨을 담은 DataFrame을 생성하여, validation data를 나누는데 활용한다.
PATH = "Data Path"
img_paths = []
labels = []
classes = {'cardigan':0, 'long_shirt':1, 'short_shirt':2, 'long_tshirt':3, 'short_tshirt':4}
for class_name, label in classes.items():
class_dir = os.path.join(PATH, class_name)
if os.path.exists(class_dir):
for img_name in os.listdir(class_dir):
img_path = os.path.join(class_dir, img_name)
img_paths.append(img_path)
labels.append(label)
train_df = pd.DataFrame({'img_paths':img_paths, 'labels':labels})
train_x, val_x, train_y, val_y = train_test_split(train_df['img_paths'], train_df['labels'], test_size=0.1, stratify=train_df['labels'], random_state=42)
train_x.shape, val_x.shape, train_y.shape, val_y.shape
- 데이터를 다루는 trasform 정의한다.
transform = transforms.Compose([
transforms.Resize((224,224)),
transforms.RandomHorizontalFlip(0.5),
transforms.RandomRotation(degrees=(-45, 45)),
transforms.ToTensor()
])
- CustomDataset을 torch.utils.data의 Dataset을 상속받아 작성한다.
class CustomDataset(Dataset):
def __init__(self, data_df, transform=None, train=False):
self.data_df = data_df
self.train = train
self.img_paths = self.data_df['img_paths'].tolist()
if train: self.labels = self.data_df['labels'].tolist()
self.transform = transform
def __len__(self):
return len(self.img_paths)
def __getitem__(self, idx):
img = Image.open(self.img_paths[idx]).convert('RGB')
if self.transform:
img = self.transform(img)
if self.train:
return img, self.labels[idx]
return img
- train과 validation의 dataset과 dataloader를 정의한다.
train_dataset = CustomDataset(train_df, transform=transform, train=True)
val_dataset = CustomDataset(train_df, transform=transform, train=True)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
모델 정의
- device의 종류(cpu / gpu)를 정의하고, 사용할 모델을 불러온다.
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = EfficientNet.from_pretrained('efficientnet-b0')
학습 함수 정의
def validation(model, criterion, val_dataloader, device):
model.eval()
val_loss = []
val_score = 0
all_preds = []
gt = []
with torch.no_grad():
for data, label in tqdm(val_dataloader):
data, label = data.to(device), label.to(device)
output = model(data)
loss = criterion(output, label)
val_loss.append(loss.item())
preds = torch.argmax(output, dim=1)
all_preds.extend(preds.cpu().numpy())
gt.extend(label.cpu().numpy())
avg_loss = np.mean(val_loss)
f1 = f1_score(all_preds, gt, average='macro')
model.train()
return avg_loss, f1
def train(model, optimizer, train_dataloader, val_dataloader, device, num_epoch):
wandb.init(
project="cloth-classification",
name="efficientnet-finetune",
config={
"epochs": num_epoch,
"batch_size": train_dataloader.batch_size,
"learning_rate": optimizer.param_groups[0]['lr'],
"optimizer": optimizer.__class__.__name__
}
)
model.train()
model.to(device)
criterion = nn.CrossEntropyLoss().to(device)
best_val_score = 0
best_model = None
for epoch in range(1, num_epoch+1):
model.train()
train_loss = []
train_preds, train_labels = [], []
for data, label in tqdm(train_dataloader):
data = data.to(device)
label = label.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, label)
loss.backward()
optimizer.step()
train_loss.append(loss.item())
preds = torch.argmax(output, dim=1).cpu().numpy()
train_preds.extend(preds)
train_labels.extend(label.cpu().numpy())
_val_loss, _val_score = validation(model, criterion, val_dataloader, device)
_train_loss = np.mean(train_loss)
_train_f1 = f1_score(train_preds, train_labels, average='macro')
wandb.log({
"epoch": epoch,
"train_loss": _train_loss,
"train_f1": _train_f1,
"val_loss": _val_loss,
"val_f1": _val_score
})
print(f"Epoch: [{epoch}/{num_epoch}] | Train Loss: {_train_loss:.4f} | Val Loss: {_val_loss:.4f} | Val Score: {_val_score}")
if best_val_score > _val_loss:
best_val_score = _val_score
best_model = torch.save(model.state_dict(), './content/drive/MyDrive/models/efficientnet/')
wandb.finish()
return best_model
모델 학습
optimizer = torch.optim.Adam(params=model.parameters(), lr=2e-4)
num_epoch = 10
infer_model = train(model, optimizer, train_dataloader, val_dataloader, device, num_epoch)
결과
- 각 class별 이미지 데이터 수가 현저히 적기 때문에, overfitting이 발생한다.
- wandb에 학습 과정을 기록하여, 실험 결과를 한눈에 볼 수 있다.