profile pic
⌘ '
raccourcis clavier

See also jupyter notebook and Kaggle

Task 1: SVHN Image Classification Using CNN

class SVHNClassifier(nn.Module, PretrainedMixin):
  def __init__(self):
    super(SVHNClassifier, self).__init__()
 
    # not specified in spec, but add dropout for stability
    self.convblock1 = nn.Sequential(
      nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(32),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
    )
 
    self.convblock2 = nn.Sequential(
      nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
    )
 
    self.convblock3 = nn.Sequential(
      nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(128),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
    )
 
    # Calculate input size for the first fully connected layer
    # Input image: 32x32
    # After 3 max pooling layers (32 -> 16 -> 8 -> 4)
    # With 128 channels: 128 * 4 * 4 = 2048
    self.fc = nn.Sequential(nn.Linear(128 * 4 * 4, 128), nn.ReLU(), nn.Linear(128, 10))
 
  def forward(self, x):
    x = self.convblock1(x)
    x = self.convblock2(x)
    x = self.convblock3(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

Note that we include a small serialisation helpers PretrainedMixin using safetensors:

class PretrainedMixin:
  @classmethod
  def from_pretrained(cls, filepath, device='cuda'):
    model = cls().to(device)
    load_model(model, filepath)
    model.eval()
    return model
 
  def save_pretrained(self, base_path='./model'):
    save_pretrained(self, name=self.__class__.__qualname__, base_path=base_path)

Plot for training metrics can be found as follow:

Accuracy over epochs for SVHN classifier
Accuracy over epochs for SVHN classifier
loss over epochs for SVHN classifier
loss over epochs for SVHN classifier

Task 2: CNN for Image Denoising

class ImageDenoisingCNN(nn.Module, PretrainedMixin):
  def __init__(self):
    super(ImageDenoisingCNN, self).__init__()
 
    # First Convolutional Layer
    # Input: 32x32x3 -> Output: 32x32x30
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=30, kernel_size=3, padding=1, stride=1)
    self.relu = nn.ReLU()
 
    # Second Convolutional Layer
    # Input: 32x32x30 -> Output: 32x32x3
    self.conv2 = nn.Conv2d(in_channels=30, out_channels=3, kernel_size=3, padding=1, stride=1)
    self.sigmoid = nn.Sigmoid()
 
  def forward(self, x):
    # First conv layer with ReLU
    x = self.conv1(x)
    x = self.relu(x)
 
    # Second conv layer with Sigmoid
    x = self.conv2(x)
    x = self.sigmoid(x)
 
    return x

training and eval loop:

def train(train_loader, test_loader, model, epochs, loss_function, optimizer, device='cuda'):
  """
  Train the model on the training dataset and evaluate it on the test dataset.
  """
  # Move model to the specified device
  model = model.to(device)
  train_loss_epochs = []
  test_loss_epochs = []
 
  for epoch in range(epochs):
    model.train()
    train_loss_batches = []
 
    # Use context manager for batch progress bar
    with tqdm(
      enumerate(train_loader), total=len(train_loader), desc=f'epoch {epoch + 1}/{epochs}', ncols=100
    ) as batch_pbar:
      for batch_idx, (clean_images, noisy_images) in batch_pbar:
        # Move data to device
        clean_images = clean_images.to(device)
        noisy_images = noisy_images.to(device)
 
        # Zero the gradients
        optimizer.zero_grad()
 
        # Forward pass
        denoised_images = model(noisy_images)
        loss = loss_function(denoised_images, clean_images)
 
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
 
        # Track batch loss
        train_loss_batches.append(loss.item())
        batch_pbar.set_postfix({'batch_loss': loss.item()})
 
        # Display sample results every 5 epochs, at the last batch
        if epoch % 5 == 0 and batch_idx == len(train_loader) - 1:
          show_images_grid2(clean_images[:5].detach().cpu(), title='Clean', cols=5)
          show_images_grid2(noisy_images[:5].detach().cpu(), title='Noisy', cols=5)
          show_images_grid2(denoised_images[:5].detach().cpu(), title='Denoised', cols=5)
 
    # Calculate average training loss for the epoch
    train_loss_epoch = np.mean(train_loss_batches)
    train_loss_epochs.append(train_loss_epoch)
 
    # Evaluate model on test set
    test_loss_epoch = evaluate(test_loader, model, loss_function, epoch + 1, num_epochs, device=device)
    test_loss_epochs.append(test_loss_epoch)
 
  return train_loss_epochs, test_loss_epochs
 
def evaluate(dataloader, model, loss_function, epoch, num_epochs, device='cuda'):
  """
  Evaluate the model on the test dataset and return the average loss.
  """
  model.eval()
  test_losses = []
 
  with torch.no_grad():
    with tqdm(dataloader, desc=f'eval  {epoch}/{num_epochs}', ncols=100) as eval_pbar:
      for clean_images, noisy_images in eval_pbar:
        # Move data to device
        clean_images = clean_images.to(device)
        noisy_images = noisy_images.to(device)
 
        # Forward pass
        denoised_images = model(noisy_images)
        loss = loss_function(denoised_images, clean_images)
 
        # Track batch loss
        test_losses.append(loss.item())
 
  return np.mean(test_losses)

Last sample for this training loop:

last sample of this training epochs
last sample of this training epochs
epoch 96/100: 100%|██████████████████████████████| 24/24 [00:00<00:00, 34.90it/s, batch_loss=0.0027]
eval  96/100: 100%|█████████████████████████████████████████████████| 24/24 [00:00<00:00, 80.53it/s]
epoch 97/100: 100%|█████████████████████████████| 24/24 [00:00<00:00, 70.63it/s, batch_loss=0.00307]
eval  97/100: 100%|█████████████████████████████████████████████████| 24/24 [00:00<00:00, 78.39it/s]
epoch 98/100: 100%|█████████████████████████████| 24/24 [00:00<00:00, 69.79it/s, batch_loss=0.00271]
eval  98/100: 100%|█████████████████████████████████████████████████| 24/24 [00:00<00:00, 79.21it/s]
epoch 99/100: 100%|█████████████████████████████| 24/24 [00:00<00:00, 70.38it/s, batch_loss=0.00367]
eval  99/100: 100%|█████████████████████████████████████████████████| 24/24 [00:00<00:00, 79.09it/s]
epoch 100/100: 100%|████████████████████████████| 24/24 [00:00<00:00, 70.95it/s, batch_loss=0.00302]
eval  100/100: 100%|████████████████████████████████████████████████| 24/24 [00:00<00:00, 78.81it/s]

visualisation

# Create the plot
plt.figure(figsize=(10, 6))
 
# Plot training and test losses
epochs = range(1, len(train_loss_epochs) + 1)
plt.plot(epochs, train_loss_epochs, label='Training Loss', color='blue', linestyle='-')
plt.plot(epochs, test_loss_epochs, label='Test Loss', color='red', linestyle='-')
 
# Customize the plot
plt.title('Training and Test Losses Over Time', fontsize=14, pad=15)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss (MSE)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=10)
 
# Add minor gridlines
plt.minorticks_on()
plt.grid(True, which='minor', linestyle=':', alpha=0.4)
 
# Adjust layout and display
plt.tight_layout()
plt.show()
 
# Print final losses
print(f'Final Training Loss: {train_loss_epochs[-1]:.6f}')
print(f'Final Test Loss: {test_loss_epochs[-1]:.6f}')

yields the following:

Final Training Loss: 0.003326
Final Test Loss: 0.003811
training and test loss of denoising image over time
training and test loss of denoising image over time

denoising last five samples

Average Test Loss on classes 5-9: 0.003754
denoising last five samples
denoising last five samples