adds nms and eval

Merge branch 'assignment-a5' into 'main'
Assignment a5 See merge request mmc-mmp/mmp_wise2526_franksim!4
2025-12-02 11:04:47 +01:00 · 2025-11-18 09:53:18 +01:00 · 2025-11-18 09:14:23 +01:00 · 2025-11-16 16:39:17 +01:00 · 2025-11-16 16:28:13 +01:00 · 2025-11-11 11:09:42 +01:00
54 changed files with 865 additions and 990 deletions
--- a/mmp/a3/annotation.py
+++ b/mmp/a3/annotation.py
@@ -25,6 +25,7 @@ class AnnotationRect:
        self.x2 *= factor
        self.y1 *= factor
        self.y2 *= factor
        return self
    @staticmethod
    def fromarray(arr: np.ndarray):
--- a/mmp/a4/.output/2242079_transformed.png
+++ b/mmp/a4/.output/2242079_transformed.png
--- a/mmp/a4/.output/2245201_transformed.png
+++ b/mmp/a4/.output/2245201_transformed.png
--- a/mmp/a4/.output/2245364_transformed.png
+++ b/mmp/a4/.output/2245364_transformed.png
--- a/mmp/a4/.output/2246825_transformed.png
+++ b/mmp/a4/.output/2246825_transformed.png
--- a/mmp/a4/.output/2247504_transformed.png
+++ b/mmp/a4/.output/2247504_transformed.png
--- a/mmp/a4/.output/2251738_transformed.png
+++ b/mmp/a4/.output/2251738_transformed.png
--- a/mmp/a4/.output/2254896_transformed.png
+++ b/mmp/a4/.output/2254896_transformed.png
--- a/mmp/a4/.output/2258469_transformed.png
+++ b/mmp/a4/.output/2258469_transformed.png
--- a/mmp/a4/.output/2259787_transformed.png
+++ b/mmp/a4/.output/2259787_transformed.png
--- a/mmp/a4/.output/2262766_transformed.png
+++ b/mmp/a4/.output/2262766_transformed.png
--- a/mmp/a4/.output/2265095_transformed.png
+++ b/mmp/a4/.output/2265095_transformed.png
--- a/mmp/a4/.output/2265907_transformed.png
+++ b/mmp/a4/.output/2265907_transformed.png
--- a/mmp/a4/2243119_original.png
+++ b/mmp/a4/2243119_original.png
--- a/mmp/a4/2243119_transformed.png
+++ b/mmp/a4/2243119_transformed.png
--- a/mmp/a4/2244764_original.png
+++ b/mmp/a4/2244764_original.png
--- a/mmp/a4/2244764_transformed.png
+++ b/mmp/a4/2244764_transformed.png
--- a/mmp/a4/2244924_original.png
+++ b/mmp/a4/2244924_original.png
--- a/mmp/a4/2244924_transformed.png
+++ b/mmp/a4/2244924_transformed.png
--- a/mmp/a4/2250234_original.png
+++ b/mmp/a4/2250234_original.png
--- a/mmp/a4/2250234_transformed.png
+++ b/mmp/a4/2250234_transformed.png
--- a/mmp/a4/2251430_original.png
+++ b/mmp/a4/2251430_original.png
--- a/mmp/a4/2251430_transformed.png
+++ b/mmp/a4/2251430_transformed.png
--- a/mmp/a4/2254610_original.png
+++ b/mmp/a4/2254610_original.png
--- a/mmp/a4/2254610_transformed.png
+++ b/mmp/a4/2254610_transformed.png
--- a/mmp/a4/2256266_original.png
+++ b/mmp/a4/2256266_original.png
--- a/mmp/a4/2256266_transformed.png
+++ b/mmp/a4/2256266_transformed.png
--- a/mmp/a4/2256880_original.png
+++ b/mmp/a4/2256880_original.png
--- a/mmp/a4/2256880_transformed.png
+++ b/mmp/a4/2256880_transformed.png
--- a/mmp/a4/2258993_original.png
+++ b/mmp/a4/2258993_original.png
--- a/mmp/a4/2258993_transformed.png
+++ b/mmp/a4/2258993_transformed.png
--- a/mmp/a4/2260471_original.png
+++ b/mmp/a4/2260471_original.png
--- a/mmp/a4/2260471_transformed.png
+++ b/mmp/a4/2260471_transformed.png
--- a/mmp/a4/2261564_original.png
+++ b/mmp/a4/2261564_original.png
--- a/mmp/a4/2261564_transformed.png
+++ b/mmp/a4/2261564_transformed.png
--- a/mmp/a4/2262264_original.png
+++ b/mmp/a4/2262264_original.png
--- a/mmp/a4/2262264_transformed.png
+++ b/mmp/a4/2262264_transformed.png
--- a/mmp/a4/dataset.py
+++ b/mmp/a4/dataset.py
@@ -5,7 +5,7 @@ import numpy as np
 import torch
 from torch.utils.data import DataLoader
 from ..a3.annotation import read_groundtruth_file, AnnotationRect
-from .label_grid import get_label_grid, draw_annotation_rects
+from .label_grid import get_label_grid
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 from .anchor_grid import get_anchor_grid
@@ -29,10 +29,11 @@ class MMP_Dataset(torch.utils.data.Dataset):
        @param is_test: Whether this is the test set (True) or the validation/training set (False)
        """
        self.image_size = image_size
-        self.images: Sequence[Tuple[str, Sequence[AnnotationRect]]] = []
+        self.images: Sequence[Tuple[str, str | None]] = []
        self.anchor_grid = anchor_grid
        self.min_iou = min_iou
        self.is_test = is_test
        self.path_to_data = path_to_data
        img_pattern = re.compile(r"^(\d+)\.jpg$")
        files = set(os.listdir(path_to_data))
@@ -42,11 +43,10 @@ class MMP_Dataset(torch.utils.data.Dataset):
                img_file = os.path.join(path_to_data, fname)
                if is_test:
                    self.images.append((img_file, None))
-                else:
+                annotation_file = os.path.join(
-                    annotations = read_groundtruth_file(
+                    path_to_data, f"{match.group(1)}.gt_data.txt"
-                        os.path.join(path_to_data, f"{match.group(1)}.gt_data.txt")
+                )
-                    )
+                self.images.append((img_file, annotation_file))
                    self.images.append((img_file, annotations))
        self.images.sort(
            key=lambda x: int(re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2))
@@ -73,13 +73,13 @@ class MMP_Dataset(torch.utils.data.Dataset):
        if self.is_test:
            return (img_tensor, torch.Tensor(), int(img_id))
-        scaled_annotations = []
+        annotations = [
        for annotation in self.images[idx][1]:
            annotation.scale(self.image_size / max(img.size[0], img.size[1]))
-            scaled_annotations.append(annotation)
+            for annotation in read_groundtruth_file(self.images[idx][1])
        ]
        label_grid = get_label_grid(
-            anchor_grid=self.anchor_grid, gts=scaled_annotations, min_iou=self.min_iou
+            anchor_grid=self.anchor_grid, gts=annotations, min_iou=self.min_iou
        )
        return (img_tensor, label_grid, int(img_id))
@@ -120,21 +120,93 @@ def get_dataloader(
    return dataloader
-def calculate_max_coverage(loader: DataLoader, min_iou: float) -> float:
+def calculate_max_coverage(loader, min_iou):
    """
-    @param loader: A DataLoader object, generated with the get_dataloader function.
+    @param loader: DataLoader object.
-    @param min_iou: Minimum IoU overlap that is required to count a ground truth box as covered.
+    @param min_iou: Minimum IoU overlap to count a ground truth box as covered.
-    @return: Ratio of how mamy ground truth boxes are covered by a label grid box. Must be a value between 0 and 1.
+    @return: Ratio of how many ground truth boxes are covered by a label grid box. Value between 0 and 1.
    """
-    raise NotImplementedError()
+    total_boxes = 0
    covered_boxes = 0
    dataset = loader.dataset
    anchor_grid = dataset.anchor_grid  # Shape: (H, W, 4)
    # Reshape anchor grid to (N, 4)
    anchors = anchor_grid.reshape(-1, 4)
    for img, _, img_id in loader:
        for batch_index in range(len(img)):
            gts_file = os.path.join(
                dataset.path_to_data,
                f"{str(img_id[batch_index].item()).zfill(8)}.gt_data.txt",
            )
            # Load and scale ground truth boxes if necessary
            with Image.open(
                os.path.join(
                    dataset.path_to_data,
                    f"{str(img_id[batch_index].item()).zfill(8)}.jpg",
                )
            ) as original_image:
                original_w, original_h = original_image.size
                # Assume square resize for model, get transform size from img tensor
                transformed_size = img[batch_index].shape[-1]
                scale = transformed_size / max(original_w, original_h)
            annotations = [
                annotation.scale(scale)
                for annotation in read_groundtruth_file(gts_file)
            ]
            gt_boxes = np.stack(
                [np.array(a) for a in annotations], axis=0
            )  # shape (M, 4)
            total_boxes += len(gt_boxes)
            # Vectorized IoU calculation: (M, N)
            ious = compute_ious_vectorized(gt_boxes, anchors)  # shape (M, N)
            # Count ground truths for which any anchor box matches min_iou
            covered = (ious >= min_iou).any(axis=1).sum()
            covered_boxes += covered
    return covered_boxes / total_boxes if total_boxes > 0 else 0.0
-def print_img_tensor_with_annotations(
+def compute_ious_vectorized(boxes1, boxes2):
-    img: torch.Tensor, annotations: Sequence["AnnotationRect"], output_file: str
+    """
    Compute the IoU matrix between each box in boxes1 and each box in boxes2.
    boxes1: (M, 4), boxes2: (N, 4) -- format [x1, y1, x2, y2]
    Returns: (M, N) IoU
    """
    # Expand to (M, N, 4)
    boxes1 = boxes1[:, None, :]  # (M, 1, 4)
    boxes2 = boxes2[None, :, :]  # (1, N, 4)
    # Intersection box
    inter_x1 = np.maximum(boxes1[..., 0], boxes2[..., 0])
    inter_y1 = np.maximum(boxes1[..., 1], boxes2[..., 1])
    inter_x2 = np.minimum(boxes1[..., 2], boxes2[..., 2])
    inter_y2 = np.minimum(boxes1[..., 3], boxes2[..., 3])
    inter_w = np.clip(inter_x2 - inter_x1, 0, None)
    inter_h = np.clip(inter_y2 - inter_y1, 0, None)
    inter_area = inter_w * inter_h
    area1 = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    area2 = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
    union_area = area1 + area2 - inter_area
    return inter_area / (union_area + 1e-6)
 def draw_image_tensor_with_annotations(
    img: torch.Tensor,
    annotations: Sequence["AnnotationRect"] | None,
    output_file: str,
 ):
    # Convert tensor to numpy, permute dimensions
-    img_np = img.permute(1, 2, 0).cpu().numpy()
+    img_np = img.permute(1, 2, 0).numpy()
-    img_np = img_np.astype(np.uint8)
+    img_np = np.clip(img_np, 0, 1)
    fig, ax = plt.subplots(1)
    ax.imshow(img_np)
@@ -152,40 +224,43 @@ def print_img_tensor_with_annotations(
    plt.close(fig)
-def print_positive_boxes(
+def denormalize_image_tensor(
    img: torch.Tensor,
    mean=torch.tensor([0.485, 0.456, 0.406]).view(-1, 1, 1),
    std=torch.tensor([0.229, 0.224, 0.225]).view(-1, 1, 1),
 ) -> torch.Tensor:
    img_denormalized = img * std + mean
    return img_denormalized
 def draw_positive_boxes(
    img_tensor: torch.Tensor,
    label_grid: np.ndarray,
    img_id: torch.Tensor,
    anchor_grid: np.ndarray,
    path_to_data: str,
 ):
    annotations = [
        AnnotationRect.fromarray(anchor_grid[idx])
        for idx in np.ndindex(anchor_grid.shape[:-1])
        if label_grid[idx]
    ]
-    print_img_tensor_with_annotations(
+    draw_image_tensor_with_annotations(
        img_tensor,
        annotations=annotations,
-        output_file=f"mmp/a4/{img_id}_transformed.png",
+        output_file=f"mmp/a4/.output/{img_id}_transformed.png",
    )
    draw_annotation_rects(
        annotations=annotations,
        image=f"{os.path.join(path_to_data, f'{str(img_id.item()).zfill(8)}.jpg')}",
        output_path=f"mmp/a4/{img_id}_original.png",
    )
 def main():
    anchor_grid = get_anchor_grid(
-        anchor_widths=[16, 32, 64, 96, 128, 144, 150, 160, 192, 224, 256],
+        anchor_widths=[8, 16, 32, 64, 96, 128, 160, 192],
-        aspect_ratios=[1 / 3, 1 / 2, 3 / 5, 2 / 3, 3 / 4, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
+        aspect_ratios=[1 / 2, 2 / 3, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
-        num_rows=32,
+        num_rows=28,
-        num_cols=32,
+        num_cols=28,
-        scale_factor=20,
+        scale_factor=8,
    )
    dataloader = get_dataloader(
-        num_workers=6,
+        num_workers=9,
        is_train=True,
        is_test=False,
        batch_size=8,
@@ -194,13 +269,14 @@ def main():
        anchor_grid=anchor_grid,
    )
    # print(calculate_coverage(dataloader, 0.7))
    for img, label, img_id in islice(dataloader, 12):
-        print_positive_boxes(
+        draw_positive_boxes(
-            img_tensor=img[5],
+            img_tensor=denormalize_image_tensor(img=img[5]),
            label_grid=label[5],
            img_id=img_id[5],
            anchor_grid=anchor_grid,
            path_to_data=".data/mmp-public-3.2/train",
        )
--- a/mmp/a4/document.pdf
+++ b/mmp/a4/document.pdf
--- a/mmp/a4/document.tex
+++ b/mmp/a4/document.tex
@@ -47,7 +47,7 @@
 \maketitle
 \begin{center}
-    \textbf{Course:} \course
+  \textbf{Course:} \course
 \end{center}
 \vspace{0.5cm}
@@ -57,14 +57,22 @@
 \section*{Exercise 4.2 Label Grid}
 \begin{enumerate}[label=\alph*)]
-    \setcounter{enumi}{2}
+  \setcounter{enumi}{2}
-    \item \begin{figure}[htp]
+  \item \begin{figure}[htp]
-              \centering
+          \centering
-              \includegraphics[width=4cm]{output.jpg}
+          \includegraphics[width=4cm]{output.jpg}
-              \caption{output.txt}
+          \caption{output.txt}
-          \end{figure}
+        \end{figure}
 \end{enumerate}
 \section*{Exercise 4.3 Finalizing the Data Pipeline}
 \begin{enumerate}[label=\alph*)]
  \setcounter{enumi}{2}
  \item The generated images can be found at `.output/`.
 \end{enumerate}
 %------------------ END OF ASSIGNMENT -----------------------
 \end{document}
--- a/mmp/a5/document.pdf
+++ b/mmp/a5/document.pdf
--- a/mmp/a5/document.tex
+++ b/mmp/a5/document.tex
@@ -0,0 +1,82 @@
 \documentclass[11pt,a4paper]{article}
 % Language and encoding settings
 \usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage[english]{babel}
 % Page formatting
 \usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
 \usepackage{setspace}
 \onehalfspacing
 % Header/Footer
 \usepackage{fancyhdr}
 \pagestyle{fancy}
 \fancyhf{} % clear all header and footer fields
 \fancyhead[L]{\textbf{\course}}
 \fancyhead[C]{Assignment \assignmentnumber}
 \fancyhead[R]{\name}
 \fancyfoot[C]{\thepage}
 % Other packages
 \usepackage{enumitem}
 \usepackage{graphicx}
 % Custom commands for easy detail insertion
 \newcommand{\assignmentnumber}{05} % <-- CHANGE Assignment Number
 \newcommand{\name}{Simon Franken}      % <-- CHANGE YOUR NAME
 \newcommand{\course}{Multimedia Project WiSe 2526}  % <-- CHANGE COURSE NAME
 \newcommand{\duedate}{2025-11-26}  % <-- CHANGE DUE DATE
 % Title formatting
 \usepackage{titling}
 \pretitle{
  \vspace*{2cm}
  \begin{center}
  \LARGE\bfseries
 }
 \posttitle{\par\end{center}\vspace{1cm}}
 \begin{document}
 \title{Assignment \assignmentnumber}
 \author{\name}
 \date{\duedate}
 \maketitle
 \begin{center}
    \textbf{Course:} \course
 \end{center}
 \vspace{0.5cm}
 %------------------ START OF ASSIGNMENT -----------------------
 % Write your solutions below
 \section*{Exercise 5.2 Training}
 \begin{figure}[htp]
    \centering
    \includegraphics[width=14cm]{image0.png}
 \end{figure}
 \section*{Exercise 5.3 Negative Mining}
 \begin{figure}[htp]
    \centering
    \includegraphics[width=14cm]{image1.jpg}
    \includegraphics[width=14cm]{image2.png}
 \end{figure}
 The idea of negative mining is, to get a certain balance between positive and negative labels.
 Especially in our case it is important, since there are a lot more negative boxes then positive ones.
 By sampling a random subset of negatives.
 In my case I could observe that the loss was actually higher with negative mining. But the accuracy was better.
 %------------------ END OF ASSIGNMENT -----------------------
 \end{document}
--- a/mmp/a5/image0.png
+++ b/mmp/a5/image0.png
--- a/mmp/a5/image1.jpg
+++ b/mmp/a5/image1.jpg
--- a/mmp/a5/image2.png
+++ b/mmp/a5/image2.png
--- a/mmp/a5/main.py
+++ b/mmp/a5/main.py
@@ -1,7 +1,16 @@
 import argparse
 import torch
 import torch.optim as optim
 import torch.nn as nn
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 import datetime
 from .model import MmpNet
 from ..a4.anchor_grid import get_anchor_grid
 from ..a4.dataset import get_dataloader
 from ..a2.main import get_criterion_optimizer
 from ..a6.main import evaluate as evaluate_v2
 def step(
@@ -11,11 +20,19 @@ def step(
    img_batch: torch.Tensor,
    lbl_batch: torch.Tensor,
 ) -> float:
-    """Performs one update step for the model
+    model.train()
    optimizer.zero_grad()
-    @return: The loss for the specified batch. Return a float and not a PyTorch tensor
+    device = next(model.parameters()).device
-    """
+    img_batch = img_batch.to(device)
-    raise NotImplementedError()
+    lbl_batch = lbl_batch.to(device)
    outputs = model(img_batch)
    loss = criterion(outputs, lbl_batch)
    loss.backward()
    optimizer.step()
    return loss.item()
 def get_random_sampling_mask(labels: torch.Tensor, neg_ratio: float) -> torch.Tensor:
@@ -26,13 +43,187 @@ def get_random_sampling_mask(labels: torch.Tensor, neg_ratio: float) -> torch.Te
    Hint: after computing the mask, check if the neg_ratio is fulfilled.
    @return: A tensor with the same shape as labels
    """
-    assert labels.min() >= 0 and labels.max() <= 1  # remove this line if you want
+    # Flatten for easier indexing
-    raise NotImplementedError()
+    labels_flat = labels.view(-1)
    pos_indices = (labels_flat == 1).nonzero(as_tuple=True)[0]
    neg_indices = (labels_flat == 0).nonzero(as_tuple=True)[0]
    num_pos = pos_indices.numel()
    num_neg = neg_indices.numel()
    num_neg_to_sample = min(int(neg_ratio * num_pos), num_neg)
    perm = torch.randperm(num_neg, device=labels.device)
    sampled_neg_indices = neg_indices[perm[:num_neg_to_sample]]
    mask_flat = torch.zeros_like(labels_flat, dtype=torch.long)
    mask_flat[pos_indices] = 1
    mask_flat[sampled_neg_indices] = 1
    # Reshape to original shape
    mask = mask_flat.view_as(labels)
    return mask
 def evaluate(
    model: MmpNet,
    criterion,
    dataloader: DataLoader,
 ) -> float:
    device = next(model.parameters()).device
    model.eval()
    total_loss = 0.0
    total_samples = 0
    all_outputs = []
    all_labels = []
    with torch.no_grad():
        for img_batch, lbl_batch, _ in dataloader:
            img_batch = img_batch.to(device)
            lbl_batch = lbl_batch.to(device)
            outputs = model(img_batch)
            loss = criterion(outputs, lbl_batch)
            batch_size = img_batch.size(0)
            total_loss += loss.item() * batch_size
            total_samples += batch_size
            all_outputs.append(outputs.cpu())
            all_labels.append(lbl_batch.cpu())
    avg_loss = total_loss / total_samples if total_samples > 0 else 0.0
    return avg_loss
 def train(
    model: MmpNet,
    loader: DataLoader,
    criterion: nn.Module,
    optimizer: optim.Optimizer,
 ):
    model.train()
    running_loss = 0.0
    total_samples = 0
    progress_bar = tqdm(loader, desc="Training", unit="batch")
    for img_batch, lbl_batch, _ in progress_bar:
        loss = step(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            img_batch=img_batch,
            lbl_batch=lbl_batch,
        )
        batch_size = img_batch.size(0)
        running_loss += loss * batch_size
        total_samples += batch_size
        progress_bar.set_postfix(
            {"loss": running_loss / total_samples if total_samples > 0 else 0.0}
        )
    epoch_loss = running_loss / total_samples if total_samples > 0 else 0.0
    progress_bar.close()
    return epoch_loss
 class NegativeMiningCriterion(nn.Module):
    def __init__(self, neg_ratio=3.0, enable_negative_mining: bool = True):
        super().__init__()
        self.backbone = nn.CrossEntropyLoss(reduction="none")
        self.neg_ratio = neg_ratio
        self.enable_negative_mining = enable_negative_mining
    def forward(self, outputs, labels):
        outputs_flat = outputs.view(-1, outputs.shape[-1])
        labels_flat = labels.view(-1).long()
        unfiltered = self.backbone(outputs_flat, labels_flat)
        assert unfiltered.shape == labels_flat.shape
        if not self.enable_negative_mining:
            return unfiltered.mean()
        mask = get_random_sampling_mask(labels_flat, self.neg_ratio)
        filtered_loss = unfiltered[mask == 1]
        return filtered_loss.mean()
 def main():
-    """Put your training code for exercises 5.2 and 5.3 here"""
+    parser = argparse.ArgumentParser()
-    raise NotImplementedError()
+    parser.add_argument(
        "--tensorboard",
        nargs="?",
        const=True,
        default=False,
        help="Enable TensorBoard logging. If a label is provided, it will be used in the log directory name.",
    )
    args = parser.parse_args()
    if args.tensorboard:
        from torch.utils.tensorboard import SummaryWriter
        timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        if isinstance(args.tensorboard, str):
            label = args.tensorboard
            log_dir = f"runs/a5_mmpnet_{label}_{timestamp}"
        else:
            log_dir = f"runs/a5_mmpnet_{timestamp}"
        writer = SummaryWriter(log_dir=log_dir)
    else:
        writer = None
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = MmpNet(num_aspect_ratios=8, num_widths=8).to(device)
    anchor_grid = get_anchor_grid(
        anchor_widths=[8, 16, 32, 64, 96, 128, 160, 192],
        aspect_ratios=[1 / 2, 2 / 3, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
        num_rows=7,
        num_cols=7,
        scale_factor=32,
    )
    dataloader_train = get_dataloader(
        path_to_data=".data/mmp-public-3.2/train",
        image_size=224,
        batch_size=32,
        num_workers=9,
        is_test=False,
        is_train=True,
        anchor_grid=anchor_grid,
    )
    dataloader_val = get_dataloader(
        path_to_data=".data/mmp-public-3.2/val",
        image_size=224,
        batch_size=32,
        num_workers=9,
        is_test=False,
        is_train=False,
        anchor_grid=anchor_grid,
    )
    _, optimizer = get_criterion_optimizer(model=model)
    criterion = NegativeMiningCriterion(enable_negative_mining=True)
    criterion_eval = NegativeMiningCriterion(enable_negative_mining=False)
    num_epochs = 5
    for epoch in range(num_epochs):
        train_loss = train(
            model=model,
            loader=dataloader_train,
            criterion=criterion,
            optimizer=optimizer,
        )
        avg_loss = evaluate(
            model=model, criterion=criterion_eval, dataloader=dataloader_val
        )
        _ = evaluate_v2(
            model=model, device=device, anchor_grid=anchor_grid, loader=dataloader_train
        )
        if writer is not None:
            writer.add_scalar("Loss/train_epoch", train_loss, epoch)
            writer.add_scalar("Loss/eval_epoch", avg_loss, epoch)
    if writer is not None:
        writer.close()
 if __name__ == "__main__":
--- a/mmp/a5/model.py
+++ b/mmp/a5/model.py
@@ -1,9 +1,40 @@
 import torch
 from torchvision import models
 from torchvision.models import MobileNet_V2_Weights
 from torch import nn
 class MmpNet(torch.nn.Module):
-    def __init__(self, num_widths: int, num_aspect_ratios: int):
+    def __init__(self, num_widths: int, num_aspect_ratios: int, num_classes: int = 2):
-        raise NotImplementedError()
+        super().__init__()
        self.backbone = models.mobilenet_v2(
            weights=MobileNet_V2_Weights.DEFAULT
        ).features
        self.num_widths = num_widths
        self.num_aspect_ratios = num_aspect_ratios
        self.num_classes = num_classes
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        with torch.no_grad():
-        raise NotImplementedError()
+            dummy = torch.zeros(1, 3, 224, 224)
            backbone_out = self.backbone(dummy)
            in_channels = backbone_out.shape[1]
        self.head = nn.Conv2d(
            in_channels=in_channels,
            kernel_size=3,
            out_channels=self.get_required_output_channels(),
            stride=1,
            padding=1,
        )
    def get_required_output_channels(self):
        return self.num_widths * self.num_aspect_ratios * self.num_classes
    def forward(self, x: torch.Tensor):
        x = self.backbone(x)
        x = self.head(x)
        b, out_c, h, w = x.shape
        x = x.view(b, self.num_widths, self.num_aspect_ratios, self.num_classes, h, w)
        x = x.permute(0, 1, 2, 4, 5, 3).contiguous()
        # Now: (batch, num_widths, num_aspect_ratios, h, w, num_classes)
        return x
--- a/mmp/a6/init.py
+++ b/mmp/a6/init.py
--- a/mmp/a6/main.py
+++ b/mmp/a6/main.py
@@ -1,25 +1,144 @@
 from typing import List, Tuple
 import torch
 import numpy as np
 from tqdm import tqdm
 import os
 from torch.utils.data import DataLoader
 from mmp.a6.evallib import calculate_ap_pr
 from ..a4.label_grid import iou
 from ..a5.model import MmpNet
-from ..a3.annotation import AnnotationRect
+from ..a3.annotation import AnnotationRect, read_groundtruth_file
 from .nms import non_maximum_suppression
 def batch_inference(
    model: MmpNet, images: torch.Tensor, device: torch.device, anchor_grid: np.ndarray
 ) -> List[List[Tuple[AnnotationRect, float]]]:
-    raise NotImplementedError()
+    score_thresh = 0.5
    nms_thresh = 0.3
    model = model.to(device)
    model.eval()
    images = images.to(device)
    anchor_grid = anchor_grid  # shape [W, R, h, w, 4]
    results = []
    with torch.no_grad():
        outputs = model(images)  # (B, W, R, h, w, 2)
        probs = torch.softmax(outputs, dim=-1)[..., 1]  # (B, W, R, h, w)
        probs_np = probs.cpu().numpy()
        batch_size = outputs.shape[0]
        for b in range(batch_size):
            detections = []
            for idx in np.ndindex(anchor_grid.shape[:-1]):
                score = probs_np[b][idx]
                # if score >= score_thresh:
                box = anchor_grid[idx]
                rect = AnnotationRect.fromarray(box)
                detections.append((rect, float(score)))
            detections_nms = non_maximum_suppression(detections, nms_thresh)
            results.append(detections_nms)
    return results
-def evaluate() -> float:  # feel free to change the arguments
+def evaluate(
    model: MmpNet, loader: DataLoader, device: torch.device, anchor_grid: np.ndarray
 ) -> float:
    """Evaluates a specified model on the whole validation dataset.
    @return: AP for the validation set as a float.
    You decide which arguments this function should receive
    """
-    raise NotImplementedError()
+
    path_to_data = ".data/mmp-public-3.2/train"
    progress_bar = tqdm(loader, desc="Evaluation", unit="batch")
    image_count = 0
    ap_total = 0
    for img_batch, _, id_batch in progress_bar:
        inference = batch_inference(
            anchor_grid=anchor_grid, device=device, images=img_batch, model=model
        )
        gts = get_gts_for_batch(id_batch=id_batch, gt_base_path=path_to_data)
        dict_detections = {
            img_id.item(): inference[idx] for idx, img_id in enumerate(id_batch)
        }
        dict_gt = {img_id.item(): gts[idx] for idx, img_id in enumerate(id_batch)}
        average_prevision, precision, recall = calculate_ap_pr(dict_detections, dict_gt)
        ap_total = (ap_total * image_count + average_prevision) / (
            image_count + id_batch.shape[0]
        )
        image_count += id_batch.shape[0]
        progress_bar.set_postfix(
            {
                "ap": ap_total,
            }
        )
    return ap_total
 def get_gts_for_batch(
    id_batch: torch.Tensor, gt_base_path: str
 ) -> List[List[AnnotationRect]]:
    return [
        read_groundtruth_file(
            os.path.join(gt_base_path, f"{str(img_id.item()).zfill(8)}.gt_data.txt")
        )
        for img_id in id_batch
    ]
 def calc_tp_fp_fn(
    detections: List[Tuple[AnnotationRect, float]],
    gts: List[AnnotationRect],
    iou_threshold: float = 0.5,
    confidence_threshhold: float = 0.5,
 ) -> tuple[int, int, int]:
    """
    Calculates precision and recall for object detection results on a single image.
    Args:
        detections: List of (AnnotationRect, confidence) tuples representing predicted boxes and scores. Should be sorted by descending confidence.
        gts: List of AnnotationRect for ground truth.
        iou_threshold: Minimum IoU to consider a detection a true positive.
        confidence_threshhold: Minimum confidence required to include a detection.
    Returns:
        num_tp: Number of true positives (int).
        num_fp: Number of false positives (int).
        num_fn: Number of false negatives (int).
    """
    detections = [det for det in detections if det[1] >= confidence_threshhold]
    detections.sort(key=lambda x: x[1], reverse=True)
    matches = set()
    fp = 0
    tp = 0
    for det_rect, _ in detections:
        iou_map = [iou(det_rect, gt_rect) for gt_rect in gts]
        if len(iou_map) == 0:
            fp += 1
            continue
        max_idx = np.argmax(iou_map)
        if max_idx in matches or iou_map[max_idx] < iou_threshold:
            fp += 1
            continue
        matches.add(max_idx)
        tp += 1
    fn = len(gts) - len(matches)
    return tp, fp, fn
 def evaluate_test():  # feel free to change the arguments
--- a/mmp/a6/model_output.txt
+++ b/mmp/a6/model_output.txt
--- a/mmp/a6/nms.py
+++ b/mmp/a6/nms.py
@@ -1,6 +1,9 @@
 import os
 from typing import List, Sequence, Tuple
 from ..a3.annotation import AnnotationRect
 from ..a4.label_grid import iou, draw_annotation_rects
 from collections import defaultdict
 def non_maximum_suppression(
@@ -12,4 +15,68 @@ def non_maximum_suppression(
    @return: A list of tuples of the remaining boxes after NMS together with their scores
    """
-    raise NotImplementedError()
+    if not boxes_scores:
        return []
    # Sort the boxes by score in descending order
    boxes_scores_sorted = sorted(boxes_scores, key=lambda bs: bs[1], reverse=True)
    result = []
    while boxes_scores_sorted:
        # Select the box with highest score and remove it from the list
        curr_box, curr_score = boxes_scores_sorted.pop(0)
        result.append((curr_box, curr_score))
        # Remove boxes with IoU > threshold
        new_boxes = []
        for box, score in boxes_scores_sorted:
            if iou(curr_box, box) <= threshold:
                new_boxes.append((box, score))
        boxes_scores_sorted = new_boxes
    return result
 def read_boxes_from_file(filepath: str) -> List[Tuple[str, AnnotationRect, float]]:
    """
    Reads a file containing bounding boxes and scores in the format:
    {image_number} {x1} {y1} {x2} {y2} {score}
    Returns a list of tuples: (image_number, x1, y1, x2, y2, score)
    """
    boxes: List[Tuple[AnnotationRect, float]] = []
    with open(filepath, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 6:
                continue
            img_id = parts[0]
            x1, y1, x2, y2 = map(int, parts[1:5])
            annotation_rect = AnnotationRect(x1, y1, x2, y2)
            score = float(parts[5])
            boxes.append((img_id, annotation_rect, score))
    return boxes
 def main():
    boxes = read_boxes_from_file("mmp/a6/model_output.txt")
    grouped = defaultdict(list)
    for image_id, rect, score in boxes:
        grouped[image_id].append((rect, score))
    for image_id, rects_scores in grouped.items():
        filtered_boxes = non_maximum_suppression(rects_scores, 0.3)
        annotation_rects = [rect for rect, score in filtered_boxes if score > 0.5]
        input_path = f".data/mmp-public-3.2/test/{image_id}.jpg"
        output_path = f"mmp/a6/nms_output_{image_id}.png"
        if not os.path.exists(input_path):
            continue
        draw_annotation_rects(
            input_path,
            annotation_rects,
            rect_color=(255, 0, 0),
            rect_width=2,
            output_path=output_path,
        )
 if __name__ == "__main__":
    main()
--- a/mmp/a6/nms_output_02247421.png
+++ b/mmp/a6/nms_output_02247421.png
--- a/mmp/a6/nms_output_02249576.png
+++ b/mmp/a6/nms_output_02249576.png
--- a/mmp/a6/nms_output_02249614.png
+++ b/mmp/a6/nms_output_02249614.png
Author	SHA1	Message	Date
franksim	a6f70005f2	adds nms and eval	2025-12-02 11:04:47 +01:00
franksim	3b6a588719	Merge branch 'assignment-a5' into 'main' Assignment a5 See merge request mmc-mmp/mmp_wise2526_franksim!4	2025-11-18 09:53:18 +01:00
franksim	c50d9e83b8	adapts metrics	2025-11-18 09:14:23 +01:00
franksim	f21fb57303	adds documentation	2025-11-16 16:39:17 +01:00
franksim	edbad414e2	adds solutions	2025-11-16 16:28:13 +01:00
franksim	721e46b768	undo renaming	2025-11-11 11:09:42 +01:00
franksim	56e21a1e54	renaming	2025-11-11 11:08:07 +01:00
franksim	7245042b54	performance improvements	2025-11-11 10:52:27 +01:00
franksim	a00ddedb23	adapts doc	2025-11-09 17:52:35 +01:00
franksim	5c8e06f62f	small impovements	2025-11-09 17:49:50 +01:00