I have folder called preprocessed_data_png where the npy file of the images and annotations are. When I try to train the model I get the below error.
RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[32, 416, 416, 3] to have 3 channels, but got 416 channels instead
Convert DICOM to PNG
def preprocess_data(scan_dir, annotation_dir, output_dir):
dict = getUID_path(scan_dir)
annotation_paths = [os.path.join(annotation_dir, f) for f in os.listdir(annotation_dir) if f.endswith('.xml')]
dicom_paths = []
dicom_names = []
for full_path in annotation_paths:
base_path = os.path.basename(full_path)
dcm_path, dcm_name = dict[base_path[:-4]]
_, ext = os.path.splitext(dcm_name)
if ext in ['.dcm']:
dicom_names.append(dcm_name)
dicom_paths.append(dcm_path)
for dcm_path, dcm_name, annotation_path in zip(dicom_paths,dicom_names,annotation_paths):
im = pydicom.dcmread(dcm_path)
im = im.pixel_array.astype(float)
rescaled_image = (np.maximum(im,0)/im.max())*255
final_image = np.uint8(rescaled_image)
final_image = Image.fromarray(final_image)
name_without_extension, ext = os.path.splitext(dcm_name)
if ext.lower() == '.dcm':
dcm_name = name_without_extension
# final_image.save(output_dir,dcm_name+'.png')
# Save the image in the specified folder location with the correct format
final_image.save(os.path.join(output_dir, dcm_name+'.png'), format='PNG')
# Copy annotation file to output directory with the same name as DICOM image
copyfile(annotation_path, os.path.join(output_dir, dcm_name+'.xml'))
# Preprocess data
scan_dir = "/Dataset/Scans/Lung_Dx-B0001"
annotation_dir = "/Dataset/Annotations/B0001"
# Ensure the output directory exists, create it if not
output_dir = "/Dataset/preprocessed_png/"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
preprocess_data(scan_dir, annotation_dir, output_dir)
Preprocess Code
import os
import cv2
import numpy as np
from xml.etree import ElementTree as ET
def read_png_file(png_path):
# Read PNG image using OpenCV
image = cv2.imread(png_path, cv2.IMREAD_GRAYSCALE)
return image
def parse_annotation(annotation_file):
tree = ET.parse(annotation_file)
root = tree.getroot()
bounding_boxes = []
for obj in root.findall('object'):
xmin = int(obj.find('bndbox').find('xmin').text)
ymin = int(obj.find('bndbox').find('ymin').text)
xmax = int(obj.find('bndbox').find('xmax').text)
ymax = int(obj.find('bndbox').find('ymax').text)
bounding_boxes.append([xmin, ymin, xmax, ymax])
return np.array(bounding_boxes)
def resize_and_normalize(scan, annotation):
# Resize scan to 416x416 and normalize pixel values to [0, 1]
resized_scan = cv2.resize(scan, (416, 416))
normalized_scan = resized_scan / 255.0
# Normalize bounding box coordinates
normalized_annotation = annotation / np.array([scan.shape[1], scan.shape[0], scan.shape[1], scan.shape[0]])
return normalized_scan, normalized_annotation
def convert_to_yolo_labels(annotations):
yolo_labels = []
for annotation in annotations:
x_center = (annotation[0] + annotation[2]) / 2.0
y_center = (annotation[1] + annotation[3]) / 2.0
width = annotation[2] - annotation[0]
height = annotation[3] - annotation[1]
yolo_label = [0, x_center, y_center, width, height]
yolo_labels.append(yolo_label)
return np.array(yolo_labels)
def preprocess_data(scan_dir, output_dir):
for filename in os.listdir(scan_dir):
if filename.endswith('.png'):
base_name = os.path.splitext(filename)[0]
png_path = os.path.join(scan_dir, filename)
annotation_path = os.path.join(scan_dir, base_name + '.xml')
# Read PNG scan
scan = read_png_file(png_path)
# Parse annotation XML file
annotations = parse_annotation(annotation_path)
# Resize and normalize scan and bounding boxes
resized_scan, resized_annotations = resize_and_normalize(scan, annotations)
# Convert to YOLO-style labels
yolo_labels = convert_to_yolo_labels(resized_annotations)
# Save preprocessed data
np.save(os.path.join(output_dir, f"{base_name}_scan.npy"), resized_scan)
np.save(os.path.join(output_dir, f"{base_name}_labels.npy"), yolo_labels)
# Set directories
scan_dir = "/Dataset/preprocessed_png" // folder where png and xml files are
output_dir = "/Dataset/preprocessed_data_png"
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Preprocess data
preprocess_data(scan_dir, output_dir)
Model Code
class YOLOv7(nn.Module):
def __init__(self, num_classes):
super(YOLOv7, self).__init__()
self.num_classes = num_classes
# Define convolutional layers for feature extraction
self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
# Define fully connected layers for classification and bounding box regression
self.fc1 = nn.Linear(128 * 64 * 64, 1024)
self.fc2 = nn.Linear(1024, 256)
self.fc3 = nn.Linear(256, num_classes + 5) # 5 for bounding box coordinates
def forward(self, x):
# Feature extraction
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, kernel_size=2, stride=2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, kernel_size=2, stride=2)
x = F.relu(self.conv3(x))
x = F.max_pool2d(x, kernel_size=2, stride=2)
# Flatten the feature map
x = x.view(-1, 128 * 64 * 64)
# Fully connected layers for classification and bounding box regression
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
main.py
# Define the dataset class to load preprocessed data
class CustomDataset(Dataset):
def __init__(self, data_dir):
self.data_dir = data_dir
self.image_files = [f for f in os.listdir(data_dir) if f.endswith('_scan.npy')]
self.annotation_files = [f for f in os.listdir(data_dir) if f.endswith('_labels.npy')]
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
image_file = os.path.join(self.data_dir, self.image_files[idx])
annotation_file = os.path.join(self.data_dir, self.annotation_files[idx])
image = np.load(image_file)
annotation = np.load(annotation_file)
# Convert grayscale image to 3 channels (if needed)
if len(image.shape) == 2:
image = np.stack((image,) * 3, axis=-1)
print(image.shape)
# Convert to tensor
image = torch.from_numpy(image).float()
annotation = torch.from_numpy(annotation).float()
return image, annotation
# Define training parameters
batch_size = 32
num_classes = 1
lr = 0.001
num_epochs = 10
# Create dataset and dataloader
train_dataset = CustomDataset("/content/drive/MyDrive/Dataset/preprocessed_data_png/")
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# Initialize model
model = YOLOv7(num_classes)
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# Training loop
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for images, targets in train_loader:
# Forward pass
outputs = model(images)
# Compute loss
loss = criterion(outputs, targets)
# Backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")
# Evaluation
model.eval()
eval_loss = 0.0
eval_accuracy = 0.0
y_true = []
y_pred = []
with torch.no_grad():
for images, targets in train_loader:
outputs = model(images)
eval_loss += criterion(outputs, targets).item()
# Convert outputs and targets to numpy arrays
outputs_np = outputs.detach().cpu().numpy().round()
targets_np = targets.detach().cpu().numpy().round()
# Flatten arrays
outputs_flat = outputs_np.flatten()
targets_flat = targets_np.flatten()
# Calculate accuracy
eval_accuracy += calculate_accuracy(outputs_flat, targets_flat)
# Collect true and predicted labels for F1 score calculation
y_true.extend(targets_flat)
y_pred.extend(outputs_flat)
eval_loss /= len(train_loader)
eval_accuracy /= len(train_loader)
f1 = f1_score(y_true, y_pred)
print(f"Epoch [{epoch+1}/{num_epochs}], Evaluation Loss: {eval_loss:.4f}, Accuracy: {eval_accuracy:.4f}, F1 Score: {f1:.4f}")
I basically have DICOM files and xml annotation. When I tried with the dicom file got the same error. Then converted the files into png and made the .npy in the main dataset folder but still getting the same error. Trying to run on YOLOv7