Merge branch 'assignment-a5' into 'main'
Assignment a5 See merge request mmc-mmp/mmp_wise2526_franksim!4
This commit is contained in:
@@ -255,7 +255,7 @@ def draw_positive_boxes(
|
|||||||
def main():
|
def main():
|
||||||
anchor_grid = get_anchor_grid(
|
anchor_grid = get_anchor_grid(
|
||||||
anchor_widths=[8, 16, 32, 64, 96, 128, 160, 192],
|
anchor_widths=[8, 16, 32, 64, 96, 128, 160, 192],
|
||||||
aspect_ratios=[1 / 3, 1 / 2, 3 / 5, 2 / 3, 3 / 4, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
|
aspect_ratios=[1 / 2, 2 / 3, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
|
||||||
num_rows=28,
|
num_rows=28,
|
||||||
num_cols=28,
|
num_cols=28,
|
||||||
scale_factor=8,
|
scale_factor=8,
|
||||||
|
|||||||
BIN
mmp/a5/document.pdf
Normal file
BIN
mmp/a5/document.pdf
Normal file
Binary file not shown.
82
mmp/a5/document.tex
Normal file
82
mmp/a5/document.tex
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
\documentclass[11pt,a4paper]{article}
|
||||||
|
|
||||||
|
% Language and encoding settings
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage[english]{babel}
|
||||||
|
|
||||||
|
% Page formatting
|
||||||
|
\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
|
||||||
|
\usepackage{setspace}
|
||||||
|
\onehalfspacing
|
||||||
|
|
||||||
|
% Header/Footer
|
||||||
|
\usepackage{fancyhdr}
|
||||||
|
\pagestyle{fancy}
|
||||||
|
\fancyhf{} % clear all header and footer fields
|
||||||
|
\fancyhead[L]{\textbf{\course}}
|
||||||
|
\fancyhead[C]{Assignment \assignmentnumber}
|
||||||
|
\fancyhead[R]{\name}
|
||||||
|
\fancyfoot[C]{\thepage}
|
||||||
|
|
||||||
|
% Other packages
|
||||||
|
\usepackage{enumitem}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
|
||||||
|
% Custom commands for easy detail insertion
|
||||||
|
\newcommand{\assignmentnumber}{05} % <-- CHANGE Assignment Number
|
||||||
|
\newcommand{\name}{Simon Franken} % <-- CHANGE YOUR NAME
|
||||||
|
\newcommand{\course}{Multimedia Project WiSe 2526} % <-- CHANGE COURSE NAME
|
||||||
|
\newcommand{\duedate}{2025-11-26} % <-- CHANGE DUE DATE
|
||||||
|
|
||||||
|
% Title formatting
|
||||||
|
\usepackage{titling}
|
||||||
|
\pretitle{
|
||||||
|
\vspace*{2cm}
|
||||||
|
\begin{center}
|
||||||
|
\LARGE\bfseries
|
||||||
|
}
|
||||||
|
\posttitle{\par\end{center}\vspace{1cm}}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\title{Assignment \assignmentnumber}
|
||||||
|
\author{\name}
|
||||||
|
\date{\duedate}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\textbf{Course:} \course
|
||||||
|
\end{center}
|
||||||
|
\vspace{0.5cm}
|
||||||
|
|
||||||
|
%------------------ START OF ASSIGNMENT -----------------------
|
||||||
|
|
||||||
|
% Write your solutions below
|
||||||
|
|
||||||
|
\section*{Exercise 5.2 Training}
|
||||||
|
\begin{figure}[htp]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=14cm]{image0.png}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\section*{Exercise 5.3 Negative Mining}
|
||||||
|
\begin{figure}[htp]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=14cm]{image1.jpg}
|
||||||
|
\includegraphics[width=14cm]{image2.png}
|
||||||
|
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
The idea of negative mining is, to get a certain balance between positive and negative labels.
|
||||||
|
Especially in our case it is important, since there are a lot more negative boxes then positive ones.
|
||||||
|
By sampling a random subset of negatives.
|
||||||
|
In my case I could observe that the loss was actually higher with negative mining. But the accuracy was better.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%------------------ END OF ASSIGNMENT -----------------------
|
||||||
|
|
||||||
|
\end{document}
|
||||||
BIN
mmp/a5/image0.png
Normal file
BIN
mmp/a5/image0.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
BIN
mmp/a5/image1.jpg
Normal file
BIN
mmp/a5/image1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 80 KiB |
BIN
mmp/a5/image2.png
Normal file
BIN
mmp/a5/image2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 95 KiB |
249
mmp/a5/main.py
249
mmp/a5/main.py
@@ -1,7 +1,16 @@
|
|||||||
|
import argparse
|
||||||
import torch
|
import torch
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
|
import torch.nn as nn
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
from torch import Tensor
|
||||||
|
from tqdm import tqdm
|
||||||
|
import datetime
|
||||||
|
|
||||||
from .model import MmpNet
|
from .model import MmpNet
|
||||||
|
from ..a4.anchor_grid import get_anchor_grid
|
||||||
|
from ..a4.dataset import get_dataloader
|
||||||
|
from ..a2.main import get_criterion_optimizer
|
||||||
|
|
||||||
|
|
||||||
def step(
|
def step(
|
||||||
@@ -11,11 +20,19 @@ def step(
|
|||||||
img_batch: torch.Tensor,
|
img_batch: torch.Tensor,
|
||||||
lbl_batch: torch.Tensor,
|
lbl_batch: torch.Tensor,
|
||||||
) -> float:
|
) -> float:
|
||||||
"""Performs one update step for the model
|
model.train()
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
@return: The loss for the specified batch. Return a float and not a PyTorch tensor
|
device = next(model.parameters()).device
|
||||||
"""
|
img_batch = img_batch.to(device)
|
||||||
raise NotImplementedError()
|
lbl_batch = lbl_batch.to(device)
|
||||||
|
|
||||||
|
outputs = model(img_batch)
|
||||||
|
loss = criterion(outputs, lbl_batch)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
return loss.item()
|
||||||
|
|
||||||
|
|
||||||
def get_random_sampling_mask(labels: torch.Tensor, neg_ratio: float) -> torch.Tensor:
|
def get_random_sampling_mask(labels: torch.Tensor, neg_ratio: float) -> torch.Tensor:
|
||||||
@@ -26,13 +43,229 @@ def get_random_sampling_mask(labels: torch.Tensor, neg_ratio: float) -> torch.Te
|
|||||||
Hint: after computing the mask, check if the neg_ratio is fulfilled.
|
Hint: after computing the mask, check if the neg_ratio is fulfilled.
|
||||||
@return: A tensor with the same shape as labels
|
@return: A tensor with the same shape as labels
|
||||||
"""
|
"""
|
||||||
assert labels.min() >= 0 and labels.max() <= 1 # remove this line if you want
|
# Flatten for easier indexing
|
||||||
raise NotImplementedError()
|
labels_flat = labels.view(-1)
|
||||||
|
pos_indices = (labels_flat == 1).nonzero(as_tuple=True)[0]
|
||||||
|
neg_indices = (labels_flat == 0).nonzero(as_tuple=True)[0]
|
||||||
|
|
||||||
|
num_pos = pos_indices.numel()
|
||||||
|
num_neg = neg_indices.numel()
|
||||||
|
|
||||||
|
num_neg_to_sample = min(int(neg_ratio * num_pos), num_neg)
|
||||||
|
|
||||||
|
perm = torch.randperm(num_neg, device=labels.device)
|
||||||
|
sampled_neg_indices = neg_indices[perm[:num_neg_to_sample]]
|
||||||
|
|
||||||
|
mask_flat = torch.zeros_like(labels_flat, dtype=torch.long)
|
||||||
|
mask_flat[pos_indices] = 1
|
||||||
|
mask_flat[sampled_neg_indices] = 1
|
||||||
|
|
||||||
|
# Reshape to original shape
|
||||||
|
mask = mask_flat.view_as(labels)
|
||||||
|
return mask
|
||||||
|
|
||||||
|
|
||||||
|
def get_detection_metrics(
|
||||||
|
output: Tensor, labels: torch.Tensor, threshold: float
|
||||||
|
) -> tuple[float, float, float, float]:
|
||||||
|
"""
|
||||||
|
Returns precision, recall, f1 for the positive (human) class, and overall accuracy.
|
||||||
|
"""
|
||||||
|
with torch.no_grad():
|
||||||
|
probs = torch.softmax(output, dim=-1)[..., 1]
|
||||||
|
|
||||||
|
preds = probs >= threshold
|
||||||
|
|
||||||
|
TP = ((preds == 1) & (labels == 1)).sum().item()
|
||||||
|
FP = ((preds == 1) & (labels == 0)).sum().item()
|
||||||
|
FN = ((preds == 0) & (labels == 1)).sum().item()
|
||||||
|
TN = ((preds == 0) & (labels == 0)).sum().item()
|
||||||
|
|
||||||
|
precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
|
||||||
|
recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
|
||||||
|
f1 = (
|
||||||
|
2 * precision * recall / (precision + recall)
|
||||||
|
if (precision + recall) > 0
|
||||||
|
else 0.0
|
||||||
|
)
|
||||||
|
accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0.0
|
||||||
|
|
||||||
|
return (
|
||||||
|
precision,
|
||||||
|
recall,
|
||||||
|
f1,
|
||||||
|
accuracy,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
model: MmpNet,
|
||||||
|
criterion,
|
||||||
|
dataloader: DataLoader,
|
||||||
|
) -> tuple[float, float, float, float]:
|
||||||
|
device = next(model.parameters()).device
|
||||||
|
model.eval()
|
||||||
|
total_loss = 0.0
|
||||||
|
total_samples = 0
|
||||||
|
all_outputs = []
|
||||||
|
all_labels = []
|
||||||
|
with torch.no_grad():
|
||||||
|
for img_batch, lbl_batch, _ in dataloader:
|
||||||
|
img_batch = img_batch.to(device)
|
||||||
|
lbl_batch = lbl_batch.to(device)
|
||||||
|
|
||||||
|
outputs = model(img_batch)
|
||||||
|
loss = criterion(outputs, lbl_batch)
|
||||||
|
batch_size = img_batch.size(0)
|
||||||
|
total_loss += loss.item() * batch_size
|
||||||
|
total_samples += batch_size
|
||||||
|
|
||||||
|
all_outputs.append(outputs.cpu())
|
||||||
|
all_labels.append(lbl_batch.cpu())
|
||||||
|
avg_loss = total_loss / total_samples if total_samples > 0 else 0.0
|
||||||
|
if all_outputs and all_labels:
|
||||||
|
outputs_cat = torch.cat(all_outputs)
|
||||||
|
labels_cat = torch.cat(all_labels)
|
||||||
|
precision, recall, f1, acc = get_detection_metrics(
|
||||||
|
outputs_cat, labels_cat, threshold=0.5
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
precision = recall = f1 = 0.0
|
||||||
|
return avg_loss, precision, recall, f1, acc
|
||||||
|
|
||||||
|
|
||||||
|
def train(
|
||||||
|
model: MmpNet,
|
||||||
|
loader: DataLoader,
|
||||||
|
criterion: nn.Module,
|
||||||
|
optimizer: optim.Optimizer,
|
||||||
|
):
|
||||||
|
model.train()
|
||||||
|
running_loss = 0.0
|
||||||
|
total_samples = 0
|
||||||
|
|
||||||
|
progress_bar = tqdm(loader, desc="Training", unit="batch")
|
||||||
|
|
||||||
|
for img_batch, lbl_batch, _ in progress_bar:
|
||||||
|
loss = step(
|
||||||
|
model=model,
|
||||||
|
criterion=criterion,
|
||||||
|
optimizer=optimizer,
|
||||||
|
img_batch=img_batch,
|
||||||
|
lbl_batch=lbl_batch,
|
||||||
|
)
|
||||||
|
batch_size = img_batch.size(0)
|
||||||
|
running_loss += loss * batch_size
|
||||||
|
total_samples += batch_size
|
||||||
|
progress_bar.set_postfix(
|
||||||
|
{"loss": running_loss / total_samples if total_samples > 0 else 0.0}
|
||||||
|
)
|
||||||
|
|
||||||
|
epoch_loss = running_loss / total_samples if total_samples > 0 else 0.0
|
||||||
|
progress_bar.close()
|
||||||
|
return epoch_loss
|
||||||
|
|
||||||
|
|
||||||
|
class NegativeMiningCriterion(nn.Module):
|
||||||
|
def __init__(self, neg_ratio=3.0, enable_negative_mining: bool = True):
|
||||||
|
super().__init__()
|
||||||
|
self.backbone = nn.CrossEntropyLoss(reduction="none")
|
||||||
|
self.neg_ratio = neg_ratio
|
||||||
|
self.enable_negative_mining = enable_negative_mining
|
||||||
|
|
||||||
|
def forward(self, outputs, labels):
|
||||||
|
outputs_flat = outputs.view(-1, outputs.shape[-1])
|
||||||
|
labels_flat = labels.view(-1).long()
|
||||||
|
unfiltered = self.backbone(outputs_flat, labels_flat)
|
||||||
|
assert unfiltered.shape == labels_flat.shape
|
||||||
|
|
||||||
|
if not self.enable_negative_mining:
|
||||||
|
return unfiltered.mean()
|
||||||
|
|
||||||
|
mask = get_random_sampling_mask(labels_flat, self.neg_ratio)
|
||||||
|
filtered_loss = unfiltered[mask == 1]
|
||||||
|
return filtered_loss.mean()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Put your training code for exercises 5.2 and 5.3 here"""
|
parser = argparse.ArgumentParser()
|
||||||
raise NotImplementedError()
|
parser.add_argument(
|
||||||
|
"--tensorboard",
|
||||||
|
nargs="?",
|
||||||
|
const=True,
|
||||||
|
default=False,
|
||||||
|
help="Enable TensorBoard logging. If a label is provided, it will be used in the log directory name.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.tensorboard:
|
||||||
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
if isinstance(args.tensorboard, str):
|
||||||
|
label = args.tensorboard
|
||||||
|
log_dir = f"runs/a5_mmpnet_{label}_{timestamp}"
|
||||||
|
else:
|
||||||
|
log_dir = f"runs/a5_mmpnet_{timestamp}"
|
||||||
|
writer = SummaryWriter(log_dir=log_dir)
|
||||||
|
else:
|
||||||
|
writer = None
|
||||||
|
|
||||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
model = MmpNet(num_aspect_ratios=8, num_widths=8).to(device)
|
||||||
|
|
||||||
|
anchor_grid = get_anchor_grid(
|
||||||
|
anchor_widths=[8, 16, 32, 64, 96, 128, 160, 192],
|
||||||
|
aspect_ratios=[1 / 2, 2 / 3, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
|
||||||
|
num_rows=7,
|
||||||
|
num_cols=7,
|
||||||
|
scale_factor=32,
|
||||||
|
)
|
||||||
|
|
||||||
|
dataloader_train = get_dataloader(
|
||||||
|
path_to_data=".data/mmp-public-3.2/train",
|
||||||
|
image_size=224,
|
||||||
|
batch_size=32,
|
||||||
|
num_workers=9,
|
||||||
|
is_test=False,
|
||||||
|
is_train=True,
|
||||||
|
anchor_grid=anchor_grid,
|
||||||
|
)
|
||||||
|
dataloader_val = get_dataloader(
|
||||||
|
path_to_data=".data/mmp-public-3.2/val",
|
||||||
|
image_size=224,
|
||||||
|
batch_size=32,
|
||||||
|
num_workers=9,
|
||||||
|
is_test=False,
|
||||||
|
is_train=False,
|
||||||
|
anchor_grid=anchor_grid,
|
||||||
|
)
|
||||||
|
_, optimizer = get_criterion_optimizer(model=model)
|
||||||
|
criterion = NegativeMiningCriterion(enable_negative_mining=True)
|
||||||
|
criterion_eval = NegativeMiningCriterion(enable_negative_mining=False)
|
||||||
|
num_epochs = 10
|
||||||
|
|
||||||
|
for epoch in range(num_epochs):
|
||||||
|
train_loss = train(
|
||||||
|
model=model,
|
||||||
|
loader=dataloader_train,
|
||||||
|
criterion=criterion,
|
||||||
|
optimizer=optimizer,
|
||||||
|
)
|
||||||
|
avg_loss, precision, recall, f1, acc = evaluate(
|
||||||
|
model=model, criterion=criterion_eval, dataloader=dataloader_val
|
||||||
|
)
|
||||||
|
|
||||||
|
if writer is not None:
|
||||||
|
writer.add_scalar("Loss/train_epoch", train_loss, epoch)
|
||||||
|
writer.add_scalar("Loss/eval_epoch", avg_loss, epoch)
|
||||||
|
writer.add_scalar("Acc/precision", precision, epoch)
|
||||||
|
writer.add_scalar("Acc/recall", recall, epoch)
|
||||||
|
writer.add_scalar("Acc/acc", acc, epoch)
|
||||||
|
writer.add_scalar("Acc/f1", f1, epoch)
|
||||||
|
|
||||||
|
if writer is not None:
|
||||||
|
writer.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -1,9 +1,40 @@
|
|||||||
import torch
|
import torch
|
||||||
|
from torchvision import models
|
||||||
|
from torchvision.models import MobileNet_V2_Weights
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
|
||||||
class MmpNet(torch.nn.Module):
|
class MmpNet(torch.nn.Module):
|
||||||
def __init__(self, num_widths: int, num_aspect_ratios: int):
|
def __init__(self, num_widths: int, num_aspect_ratios: int, num_classes: int = 2):
|
||||||
raise NotImplementedError()
|
super().__init__()
|
||||||
|
self.backbone = models.mobilenet_v2(
|
||||||
|
weights=MobileNet_V2_Weights.DEFAULT
|
||||||
|
).features
|
||||||
|
self.num_widths = num_widths
|
||||||
|
self.num_aspect_ratios = num_aspect_ratios
|
||||||
|
self.num_classes = num_classes
|
||||||
|
|
||||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
with torch.no_grad():
|
||||||
raise NotImplementedError()
|
dummy = torch.zeros(1, 3, 224, 224)
|
||||||
|
backbone_out = self.backbone(dummy)
|
||||||
|
in_channels = backbone_out.shape[1]
|
||||||
|
|
||||||
|
self.head = nn.Conv2d(
|
||||||
|
in_channels=in_channels,
|
||||||
|
kernel_size=3,
|
||||||
|
out_channels=self.get_required_output_channels(),
|
||||||
|
stride=1,
|
||||||
|
padding=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_required_output_channels(self):
|
||||||
|
return self.num_widths * self.num_aspect_ratios * self.num_classes
|
||||||
|
|
||||||
|
def forward(self, x: torch.Tensor):
|
||||||
|
x = self.backbone(x)
|
||||||
|
x = self.head(x)
|
||||||
|
b, out_c, h, w = x.shape
|
||||||
|
x = x.view(b, self.num_widths, self.num_aspect_ratios, self.num_classes, h, w)
|
||||||
|
x = x.permute(0, 1, 2, 4, 5, 3).contiguous()
|
||||||
|
# Now: (batch, num_widths, num_aspect_ratios, h, w, num_classes)
|
||||||
|
return x
|
||||||
|
|||||||
Reference in New Issue
Block a user