Merge branch 'assignment-a3' into 'main'

assignment-a3: adds code See merge request mmc-mmp/mmp_wise2526_franksim!3
2025-10-31 15:10:24 +01:00
parent 0f946c5518 cdc6a39699
commit 78bdb7155b
7 changed files with 243 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 .venv/
 .data
 runs/
 __pycache__/
 *.code-workspace
 .vscode/
--- a/mmp/a3/annotation.py
+++ b/mmp/a3/annotation.py
@@ -1,27 +1,80 @@
 import os
 import re
 from typing import List
 import numpy as np
 from PIL import Image, ImageDraw
 class AnnotationRect:
    """Exercise 3.1"""
    def __init__(self, x1, y1, x2, y2):
-        raise NotImplementedError()
+        self.x1 = x1
        self.x2 = x2
        self.y1 = y1
        self.y2 = y2
    def area(self):
-        raise NotImplementedError()
+        return (self.x2 - self.x1) * (self.y2 - self.y1)
    def __array__(self) -> np.ndarray:
-        raise NotImplementedError()
+        return np.array([self.x1, self.y1, self.x2, self.y2])
    @staticmethod
    def fromarray(arr: np.ndarray):
-        raise NotImplementedError()
+        return AnnotationRect(arr[0], arr[1], arr[2], arr[3])
 def read_groundtruth_file(path: str) -> List[AnnotationRect]:
    """Exercise 3.1b"""
-    raise NotImplementedError()
+    annotationRects = []
    with open(path, 'r') as file:
        for line in file:
            if line.strip():
                values = line.strip().split()
                annotationRects.append(AnnotationRect(float(values[0]), float(
                    values[1]), float(values[2]), float(values[3])))
    return annotationRects
-# put your solution for exercise 3.1c wherever you deem it right
+def get_image_with_max_annotations(dir_path: str) -> str:
    img_pattern = re.compile(r'^(\d+)\.jpg$')
    files = set(os.listdir(dir_path))
    max_file = None
    max_annotations = 0
    for fname in files:
        match = img_pattern.match(fname)
        if match:
            img_file = os.path.join(dir_path, fname)
            annotations_number = len(read_groundtruth_file(os.path.join(
                dir_path, f"{match.group(1)}.gt_data.txt")))
            if (annotations_number > max_annotations):
                max_file = img_file
                max_annotations = annotations_number
    return max_file
 def visualize_image(image_path: str, output_path='output.jpg', rect_color=(255, 0, 0), width=2):
    img_pattern = re.compile(r'(.*)(\.jpg)')
    match = img_pattern.match(image_path)
    annotations = read_groundtruth_file(f"{match.group(1)}.gt_data.txt")
    img = Image.open(image_path).convert('RGB')
    draw = ImageDraw.Draw(img)
    for annotation in annotations:
        draw.rectangle([annotation.x1, annotation.y1, annotation.x2, annotation.y2],
                       outline=rect_color, width=width)
    img.save(output_path)
 def main():
    image_file = get_image_with_max_annotations(
        "/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2/train")
    visualize_image(image_file)
 if __name__ == "__main__":
    main()
--- a/mmp/a3/dataset.py
+++ b/mmp/a3/dataset.py
@@ -1,6 +1,11 @@
 import os
 import re
 from PIL import Image
 from typing import Tuple
 import torch
 from torch.utils.data import DataLoader
 from .annotation import read_groundtruth_file
 from torchvision import transforms
 class MMP_Dataset(torch.utils.data.Dataset):
@@ -11,19 +16,61 @@ class MMP_Dataset(torch.utils.data.Dataset):
        @param path_to_data: Path to the folder that contains the images and annotation files, e.g. dataset_mmp/train
        @param image_size: Desired image size that this dataset should return
        """
-        raise NotImplementedError()
+        self.image_size = image_size
        img_pattern = re.compile(r'^(\d+)\.jpg$')
        files = set(os.listdir(path_to_data))
        self.images = []
        for fname in files:
            match = img_pattern.match(fname)
            if match:
                img_file = os.path.join(path_to_data, fname)
                annotations = read_groundtruth_file(os.path.join(
                    path_to_data, f"{match.group(1)}.gt_data.txt"))
                self.images.append((img_file, annotations))
        self.images.sort(key=lambda x: int(
            re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2)))
    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
        """
        @return: Tuple of image tensor and label. The label is 0 if there is one person and 1 if there a multiple people.
        """
-        raise NotImplementedError()
+        img = Image.open(self.images[idx][0]).convert("RGB")
        padding = self.__padding__(img)
        transform = transforms.Compose([
            transforms.Pad(padding, 0),
            transforms.Resize((self.image_size, self.image_size)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])
        return (transform(img), 1 if len(self.images[idx][1]) > 1 else 0)
    def __padding__(self, img) -> Tuple[int, int, int, int]:
        w, h = img.size
        size = max(w, h)
        right_pad = size - w
        bottom_pad = size - h
        return (0, 0, right_pad, bottom_pad)
    def __len__(self) -> int:
-        raise NotImplementedError()
+        return len(self.images)
 def get_dataloader(
        path_to_data: str, image_size: int, batch_size: int, num_workers: int, is_train: bool = True
 ) -> DataLoader:
    """Exercise 3.2d"""
    path = os.path.join(path_to_data, "train") if is_train else os.path.join(
        path_to_data, "val")
    dataset = MMP_Dataset(path_to_data=path, image_size=image_size)
    dataloader = DataLoader(
        dataset, batch_size=batch_size,
        shuffle=is_train,
        num_workers=num_workers,
        pin_memory=True
    )
    return dataloader
--- a/mmp/a3/document.pdf
+++ b/mmp/a3/document.pdf
--- a/mmp/a3/document.tex
+++ b/mmp/a3/document.tex
@@ -0,0 +1,80 @@
 \documentclass[11pt,a4paper]{article}
 % Language and encoding settings
 \usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage[english]{babel}
 % Page formatting
 \usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
 \usepackage{setspace}
 \onehalfspacing
 % Header/Footer
 \usepackage{fancyhdr}
 \pagestyle{fancy}
 \fancyhf{} % clear all header and footer fields
 \fancyhead[L]{\textbf{\course}}
 \fancyhead[C]{Assignment \assignmentnumber}
 \fancyhead[R]{\name}
 \fancyfoot[C]{\thepage}
 % Other packages
 \usepackage{enumitem}
 \usepackage{graphicx}
 % Custom commands for easy detail insertion
 \newcommand{\assignmentnumber}{03} % <-- CHANGE Assignment Number
 \newcommand{\name}{Simon Franken}      % <-- CHANGE YOUR NAME
 \newcommand{\course}{Multimedia Project WiSe 2526}  % <-- CHANGE COURSE NAME
 \newcommand{\duedate}{2025-11-05}  % <-- CHANGE DUE DATE
 % Title formatting
 \usepackage{titling}
 \pretitle{
  \vspace*{2cm}
  \begin{center}
  \LARGE\bfseries
 }
 \posttitle{\par\end{center}\vspace{1cm}}
 \begin{document}
 \title{Assignment \assignmentnumber}
 \author{\name}
 \date{\duedate}
 \maketitle
 \begin{center}
  \textbf{Course:} \course
 \end{center}
 \vspace{0.5cm}
 %------------------ START OF ASSIGNMENT -----------------------
 % Write your solutions below
 \section*{Exercise 3.1 Dataset Parsing}
 \begin{enumerate}[label=\alph*)]
  \setcounter{enumi}{2}
  \item \begin{figure}[htp]
          \centering
          \includegraphics[width=4cm]{output.jpg}
          \caption{02254418.jpg with 18 annotations}
        \end{figure}
 \end{enumerate}
 \section*{Exercise 3.3 Training}
 \begin{tabular}{|c||c|}
  \hline Batch size       & 32          \\
  \hline Training epoches & 10          \\
  \hline Loss             & 0.3719      \\
  \hline Accuracy         & 78.90    \% \\
  \hline
 \end{tabular}
 %------------------ END OF ASSIGNMENT -----------------------
 \end{document}
--- a/mmp/a3/main.py
+++ b/mmp/a3/main.py
@@ -1,6 +1,57 @@
 import torch
 import argparse
 from ..a2.main import MmpNet, get_criterion_optimizer, train_epoch, eval_epoch
 from .dataset import get_dataloader
 def main():
    """Put your code for Exercise 3.3 in here"""
-    raise NotImplementedError()
+    parser = argparse.ArgumentParser()
    parser.add_argument('--tensorboard', action='store_true',
                        help='Enable TensorBoard logging')
    args = parser.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_epochs = 10
    model = MmpNet(num_classes=2).to(device=device)
    dataloader_train = get_dataloader(
        path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2",
        image_size=244, batch_size=32, num_workers=6, is_train=True
    )
    dataloader_eval = get_dataloader(
        path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2",
        image_size=244, batch_size=32, num_workers=6, is_train=False
    )
    criterion, optimizer = get_criterion_optimizer(model=model)
    writer = None
    if args.tensorboard:
        from torch.utils.tensorboard import SummaryWriter
        writer = SummaryWriter(log_dir="runs/a3_mmpnet")
    for epoch in range(train_epochs):
        train_loss = train_epoch(
            model=model,
            loader=dataloader_train,
            optimizer=optimizer,
            device=device,
            criterion=criterion,
        )
        val_acc = eval_epoch(
            model=model,
            loader=dataloader_eval,
            device=device
        )
        print(
            f"Epoch [{epoch+1}/{train_epochs}] - Train Loss: {train_loss:.4f} - Val Acc: {val_acc:.4f}")
        if writer is not None:
            writer.add_scalar("Loss/train", train_loss, epoch)
            writer.add_scalar("Accuracy/val", val_acc, epoch)
    if writer is not None:
        writer.close()
 if __name__ == "__main__":
--- a/mmp/a3/output.jpg
+++ b/mmp/a3/output.jpg