Merge branch 'assignment-a3' into 'main'

assignment-a3: adds code

See merge request mmc-mmp/mmp_wise2526_franksim!3
This commit is contained in:
franksim
2025-10-31 15:10:24 +01:00
7 changed files with 243 additions and 11 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,6 @@
.venv/ .venv/
.data .data
runs/
__pycache__/ __pycache__/
*.code-workspace *.code-workspace
.vscode/ .vscode/

View File

@@ -1,27 +1,80 @@
import os
import re
from typing import List from typing import List
import numpy as np import numpy as np
from PIL import Image, ImageDraw
class AnnotationRect: class AnnotationRect:
"""Exercise 3.1""" """Exercise 3.1"""
def __init__(self, x1, y1, x2, y2): def __init__(self, x1, y1, x2, y2):
raise NotImplementedError() self.x1 = x1
self.x2 = x2
self.y1 = y1
self.y2 = y2
def area(self): def area(self):
raise NotImplementedError() return (self.x2 - self.x1) * (self.y2 - self.y1)
def __array__(self) -> np.ndarray: def __array__(self) -> np.ndarray:
raise NotImplementedError() return np.array([self.x1, self.y1, self.x2, self.y2])
@staticmethod @staticmethod
def fromarray(arr: np.ndarray): def fromarray(arr: np.ndarray):
raise NotImplementedError() return AnnotationRect(arr[0], arr[1], arr[2], arr[3])
def read_groundtruth_file(path: str) -> List[AnnotationRect]: def read_groundtruth_file(path: str) -> List[AnnotationRect]:
"""Exercise 3.1b""" """Exercise 3.1b"""
raise NotImplementedError() annotationRects = []
with open(path, 'r') as file:
for line in file:
if line.strip():
values = line.strip().split()
annotationRects.append(AnnotationRect(float(values[0]), float(
values[1]), float(values[2]), float(values[3])))
return annotationRects
# put your solution for exercise 3.1c wherever you deem it right def get_image_with_max_annotations(dir_path: str) -> str:
img_pattern = re.compile(r'^(\d+)\.jpg$')
files = set(os.listdir(dir_path))
max_file = None
max_annotations = 0
for fname in files:
match = img_pattern.match(fname)
if match:
img_file = os.path.join(dir_path, fname)
annotations_number = len(read_groundtruth_file(os.path.join(
dir_path, f"{match.group(1)}.gt_data.txt")))
if (annotations_number > max_annotations):
max_file = img_file
max_annotations = annotations_number
return max_file
def visualize_image(image_path: str, output_path='output.jpg', rect_color=(255, 0, 0), width=2):
img_pattern = re.compile(r'(.*)(\.jpg)')
match = img_pattern.match(image_path)
annotations = read_groundtruth_file(f"{match.group(1)}.gt_data.txt")
img = Image.open(image_path).convert('RGB')
draw = ImageDraw.Draw(img)
for annotation in annotations:
draw.rectangle([annotation.x1, annotation.y1, annotation.x2, annotation.y2],
outline=rect_color, width=width)
img.save(output_path)
def main():
image_file = get_image_with_max_annotations(
"/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2/train")
visualize_image(image_file)
if __name__ == "__main__":
main()

View File

@@ -1,6 +1,11 @@
import os
import re
from PIL import Image
from typing import Tuple from typing import Tuple
import torch import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from .annotation import read_groundtruth_file
from torchvision import transforms
class MMP_Dataset(torch.utils.data.Dataset): class MMP_Dataset(torch.utils.data.Dataset):
@@ -11,19 +16,61 @@ class MMP_Dataset(torch.utils.data.Dataset):
@param path_to_data: Path to the folder that contains the images and annotation files, e.g. dataset_mmp/train @param path_to_data: Path to the folder that contains the images and annotation files, e.g. dataset_mmp/train
@param image_size: Desired image size that this dataset should return @param image_size: Desired image size that this dataset should return
""" """
raise NotImplementedError() self.image_size = image_size
img_pattern = re.compile(r'^(\d+)\.jpg$')
files = set(os.listdir(path_to_data))
self.images = []
for fname in files:
match = img_pattern.match(fname)
if match:
img_file = os.path.join(path_to_data, fname)
annotations = read_groundtruth_file(os.path.join(
path_to_data, f"{match.group(1)}.gt_data.txt"))
self.images.append((img_file, annotations))
self.images.sort(key=lambda x: int(
re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2)))
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]: def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
""" """
@return: Tuple of image tensor and label. The label is 0 if there is one person and 1 if there a multiple people. @return: Tuple of image tensor and label. The label is 0 if there is one person and 1 if there a multiple people.
""" """
raise NotImplementedError() img = Image.open(self.images[idx][0]).convert("RGB")
padding = self.__padding__(img)
transform = transforms.Compose([
transforms.Pad(padding, 0),
transforms.Resize((self.image_size, self.image_size)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
return (transform(img), 1 if len(self.images[idx][1]) > 1 else 0)
def __padding__(self, img) -> Tuple[int, int, int, int]:
w, h = img.size
size = max(w, h)
right_pad = size - w
bottom_pad = size - h
return (0, 0, right_pad, bottom_pad)
def __len__(self) -> int: def __len__(self) -> int:
raise NotImplementedError() return len(self.images)
def get_dataloader( def get_dataloader(
path_to_data: str, image_size: int, batch_size: int, num_workers: int, is_train: bool = True path_to_data: str, image_size: int, batch_size: int, num_workers: int, is_train: bool = True
) -> DataLoader: ) -> DataLoader:
"""Exercise 3.2d""" """Exercise 3.2d"""
path = os.path.join(path_to_data, "train") if is_train else os.path.join(
path_to_data, "val")
dataset = MMP_Dataset(path_to_data=path, image_size=image_size)
dataloader = DataLoader(
dataset, batch_size=batch_size,
shuffle=is_train,
num_workers=num_workers,
pin_memory=True
)
return dataloader

BIN
mmp/a3/document.pdf Normal file

Binary file not shown.

80
mmp/a3/document.tex Normal file
View File

@@ -0,0 +1,80 @@
\documentclass[11pt,a4paper]{article}
% Language and encoding settings
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
% Page formatting
\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
\usepackage{setspace}
\onehalfspacing
% Header/Footer
\usepackage{fancyhdr}
\pagestyle{fancy}
\fancyhf{} % clear all header and footer fields
\fancyhead[L]{\textbf{\course}}
\fancyhead[C]{Assignment \assignmentnumber}
\fancyhead[R]{\name}
\fancyfoot[C]{\thepage}
% Other packages
\usepackage{enumitem}
\usepackage{graphicx}
% Custom commands for easy detail insertion
\newcommand{\assignmentnumber}{03} % <-- CHANGE Assignment Number
\newcommand{\name}{Simon Franken} % <-- CHANGE YOUR NAME
\newcommand{\course}{Multimedia Project WiSe 2526} % <-- CHANGE COURSE NAME
\newcommand{\duedate}{2025-11-05} % <-- CHANGE DUE DATE
% Title formatting
\usepackage{titling}
\pretitle{
\vspace*{2cm}
\begin{center}
\LARGE\bfseries
}
\posttitle{\par\end{center}\vspace{1cm}}
\begin{document}
\title{Assignment \assignmentnumber}
\author{\name}
\date{\duedate}
\maketitle
\begin{center}
\textbf{Course:} \course
\end{center}
\vspace{0.5cm}
%------------------ START OF ASSIGNMENT -----------------------
% Write your solutions below
\section*{Exercise 3.1 Dataset Parsing}
\begin{enumerate}[label=\alph*)]
\setcounter{enumi}{2}
\item \begin{figure}[htp]
\centering
\includegraphics[width=4cm]{output.jpg}
\caption{02254418.jpg with 18 annotations}
\end{figure}
\end{enumerate}
\section*{Exercise 3.3 Training}
\begin{tabular}{|c||c|}
\hline Batch size & 32 \\
\hline Training epoches & 10 \\
\hline Loss & 0.3719 \\
\hline Accuracy & 78.90 \% \\
\hline
\end{tabular}
%------------------ END OF ASSIGNMENT -----------------------
\end{document}

View File

@@ -1,6 +1,57 @@
import torch
import argparse
from ..a2.main import MmpNet, get_criterion_optimizer, train_epoch, eval_epoch
from .dataset import get_dataloader
def main(): def main():
"""Put your code for Exercise 3.3 in here""" """Put your code for Exercise 3.3 in here"""
raise NotImplementedError() parser = argparse.ArgumentParser()
parser.add_argument('--tensorboard', action='store_true',
help='Enable TensorBoard logging')
args = parser.parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_epochs = 10
model = MmpNet(num_classes=2).to(device=device)
dataloader_train = get_dataloader(
path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2",
image_size=244, batch_size=32, num_workers=6, is_train=True
)
dataloader_eval = get_dataloader(
path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2",
image_size=244, batch_size=32, num_workers=6, is_train=False
)
criterion, optimizer = get_criterion_optimizer(model=model)
writer = None
if args.tensorboard:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir="runs/a3_mmpnet")
for epoch in range(train_epochs):
train_loss = train_epoch(
model=model,
loader=dataloader_train,
optimizer=optimizer,
device=device,
criterion=criterion,
)
val_acc = eval_epoch(
model=model,
loader=dataloader_eval,
device=device
)
print(
f"Epoch [{epoch+1}/{train_epochs}] - Train Loss: {train_loss:.4f} - Val Acc: {val_acc:.4f}")
if writer is not None:
writer.add_scalar("Loss/train", train_loss, epoch)
writer.add_scalar("Accuracy/val", val_acc, epoch)
if writer is not None:
writer.close()
if __name__ == "__main__": if __name__ == "__main__":

BIN
mmp/a3/output.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB