Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import os
import shutil
import pandas as pd
# Preprocess images
data_dir = 'train_classified'
# Load CSV file with filenames and labels
train_df = pd.read_csv('train_updated.csv')
# Initialize count for skipped images
num_skipped = 0
# Iterate over each row in the CSV file
for index, row in train_df.iterrows():
# Extract filename and label from the current row
filename = str(row[1]) # Assuming the file names are in the second column (index 1)
label = str(row[3]) # Assuming the classifications are in the fourth column (index 3)
# Create directory if it doesn't exist
label_dir = os.path.join(data_dir, label)
if len(label) == 1:
label = f'0{label}' # Add leading zero if folder name has only one digit
label_dir = os.path.join(data_dir, label)
if not os.path.exists(label_dir):
os.makedirs(label_dir)
# Move image to its corresponding directory if the file exists
source_path = os.path.join(data_dir, filename)
if os.path.exists(source_path):
destination_path = os.path.join(label_dir, filename)
shutil.move(source_path, destination_path)
else:
print(f"Warning: File '{filename}' mentioned in the CSV was not found.")
num_skipped += 1
print("Images have been moved to their respective folders.")
print(f"Skipped {num_skipped} images.")