Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet
from sklearn.metrics import accuracy_score
from shutil import copyfile
from tqdm import tqdm
# Define paths for original dataset and train/validation directories
original_data_dir = 'train_cropped'
train_dir = 'training_data'
val_dir = 'validation_data'
# Create train/validation directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
# Set the proportion of data to be used for validation (e.g., 20%)
validation_split = 0.2
# Iterate over each class folder in the original dataset
for class_name in os.listdir(original_data_dir):
class_dir = os.path.join(original_data_dir, class_name)
if os.path.isdir(class_dir):
# Create corresponding class directories in train/validation directories
train_class_dir = os.path.join(train_dir, class_name)
val_class_dir = os.path.join(val_dir, class_name)
os.makedirs(train_class_dir, exist_ok=True)
os.makedirs(val_class_dir, exist_ok=True)
# Get list of image files in the class directory
image_files = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
# Shuffle the list of image files
random.shuffle(image_files)
# Calculate the number of images to be moved to validation set
num_val_images = int(len(image_files) * validation_split)
# Move images to train/validation directories
for i, image_file in enumerate(image_files):
src_path = os.path.join(class_dir, image_file)
if i < num_val_images:
dst_path = os.path.join(val_class_dir, image_file)
else:
dst_path = os.path.join(train_class_dir, image_file)
copyfile(src_path, dst_path)
print("Data split into train and validation sets.")
# Define data transformations
transform = transforms.Compose([
transforms.Resize((224, 224)), # Resize images to 224x224
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize with ImageNet statistics
])
# Load training dataset
train_dataset = ImageFolder(root=train_dir, transform=transform)
# Load validation dataset
val_dataset = ImageFolder(root=val_dir, transform=transform)
# Create DataLoader for training data
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# Create DataLoader for validation data
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
# Load pre-trained EfficientNet-B0 model
model = EfficientNet.from_pretrained('efficientnet-b1', num_classes=len(train_dataset.classes))
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Train the model
num_epochs = 8
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
correct_train = 0
total_train = 0
pbar_train = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=True)
for images, labels in pbar_train:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * images.size(0)
_, predicted = torch.max(outputs, 1)
total_train += labels.size(0)
correct_train += (predicted == labels).sum().item()
pbar_train.set_postfix({'loss': loss.item()})
epoch_loss = running_loss / len(train_dataset)
train_accuracy = correct_train / total_train
# Validation
model.eval()
val_loss = 0.0
correct_val = 0
total_val = 0
pbar_val = tqdm(val_loader, desc=f'Validation', leave=True)
with torch.no_grad():
for images, labels in pbar_val:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
val_loss += loss.item() * images.size(0)
_, predicted = torch.max(outputs, 1)
total_val += labels.size(0)
correct_val += (predicted == labels).sum().item()
pbar_val.set_postfix({'loss': loss.item()})
val_accuracy = correct_val / total_val
val_loss /= len(val_dataset)
print(f"Epoch [{epoch+1}/{num_epochs}], "
f"Train Loss: {epoch_loss:.4f}, "
f"Train Accuracy: {100 * train_accuracy:.2f}%, "
f"Validation Loss: {val_loss:.4f}, "
f"Validation Accuracy: {100 * val_accuracy:.2f}%")
# Save model parameters after each epoch
torch.save(model.state_dict(), f'newb1_train_model_epoch_{epoch+1}.pth')
print("Model parameters after each epoch saved.")