Merge branch 'assignment-a3' into 'main'

assignment-a3: adds code

See merge request mmc-mmp/mmp_wise2526_franksim!3
This commit is contained in:
franksim
2025-10-31 15:10:24 +01:00
7 changed files with 243 additions and 11 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,6 @@
.venv/
.data
runs/
__pycache__/
*.code-workspace
.vscode/

View File

@@ -1,27 +1,80 @@
import os
import re
from typing import List
import numpy as np
from PIL import Image, ImageDraw
class AnnotationRect:
"""Exercise 3.1"""
def __init__(self, x1, y1, x2, y2):
raise NotImplementedError()
self.x1 = x1
self.x2 = x2
self.y1 = y1
self.y2 = y2
def area(self):
raise NotImplementedError()
return (self.x2 - self.x1) * (self.y2 - self.y1)
def __array__(self) -> np.ndarray:
raise NotImplementedError()
return np.array([self.x1, self.y1, self.x2, self.y2])
@staticmethod
def fromarray(arr: np.ndarray):
raise NotImplementedError()
return AnnotationRect(arr[0], arr[1], arr[2], arr[3])
def read_groundtruth_file(path: str) -> List[AnnotationRect]:
"""Exercise 3.1b"""
raise NotImplementedError()
annotationRects = []
with open(path, 'r') as file:
for line in file:
if line.strip():
values = line.strip().split()
annotationRects.append(AnnotationRect(float(values[0]), float(
values[1]), float(values[2]), float(values[3])))
return annotationRects
# put your solution for exercise 3.1c wherever you deem it right
def get_image_with_max_annotations(dir_path: str) -> str:
img_pattern = re.compile(r'^(\d+)\.jpg$')
files = set(os.listdir(dir_path))
max_file = None
max_annotations = 0
for fname in files:
match = img_pattern.match(fname)
if match:
img_file = os.path.join(dir_path, fname)
annotations_number = len(read_groundtruth_file(os.path.join(
dir_path, f"{match.group(1)}.gt_data.txt")))
if (annotations_number > max_annotations):
max_file = img_file
max_annotations = annotations_number
return max_file
def visualize_image(image_path: str, output_path='output.jpg', rect_color=(255, 0, 0), width=2):
img_pattern = re.compile(r'(.*)(\.jpg)')
match = img_pattern.match(image_path)
annotations = read_groundtruth_file(f"{match.group(1)}.gt_data.txt")
img = Image.open(image_path).convert('RGB')
draw = ImageDraw.Draw(img)
for annotation in annotations:
draw.rectangle([annotation.x1, annotation.y1, annotation.x2, annotation.y2],
outline=rect_color, width=width)
img.save(output_path)
def main():
image_file = get_image_with_max_annotations(
"/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2/train")
visualize_image(image_file)
if __name__ == "__main__":
main()

View File

@@ -1,6 +1,11 @@
import os
import re
from PIL import Image
from typing import Tuple
import torch
from torch.utils.data import DataLoader
from .annotation import read_groundtruth_file
from torchvision import transforms
class MMP_Dataset(torch.utils.data.Dataset):
@@ -11,19 +16,61 @@ class MMP_Dataset(torch.utils.data.Dataset):
@param path_to_data: Path to the folder that contains the images and annotation files, e.g. dataset_mmp/train
@param image_size: Desired image size that this dataset should return
"""
raise NotImplementedError()
self.image_size = image_size
img_pattern = re.compile(r'^(\d+)\.jpg$')
files = set(os.listdir(path_to_data))
self.images = []
for fname in files:
match = img_pattern.match(fname)
if match:
img_file = os.path.join(path_to_data, fname)
annotations = read_groundtruth_file(os.path.join(
path_to_data, f"{match.group(1)}.gt_data.txt"))
self.images.append((img_file, annotations))
self.images.sort(key=lambda x: int(
re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2)))
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
"""
@return: Tuple of image tensor and label. The label is 0 if there is one person and 1 if there a multiple people.
"""
raise NotImplementedError()
img = Image.open(self.images[idx][0]).convert("RGB")
padding = self.__padding__(img)
transform = transforms.Compose([
transforms.Pad(padding, 0),
transforms.Resize((self.image_size, self.image_size)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
return (transform(img), 1 if len(self.images[idx][1]) > 1 else 0)
def __padding__(self, img) -> Tuple[int, int, int, int]:
w, h = img.size
size = max(w, h)
right_pad = size - w
bottom_pad = size - h
return (0, 0, right_pad, bottom_pad)
def __len__(self) -> int:
raise NotImplementedError()
return len(self.images)
def get_dataloader(
path_to_data: str, image_size: int, batch_size: int, num_workers: int, is_train: bool = True
) -> DataLoader:
"""Exercise 3.2d"""
path = os.path.join(path_to_data, "train") if is_train else os.path.join(
path_to_data, "val")
dataset = MMP_Dataset(path_to_data=path, image_size=image_size)
dataloader = DataLoader(
dataset, batch_size=batch_size,
shuffle=is_train,
num_workers=num_workers,
pin_memory=True
)
return dataloader

BIN
mmp/a3/document.pdf Normal file

Binary file not shown.

80
mmp/a3/document.tex Normal file
View File

@@ -0,0 +1,80 @@
\documentclass[11pt,a4paper]{article}
% Language and encoding settings
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
% Page formatting
\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
\usepackage{setspace}
\onehalfspacing
% Header/Footer
\usepackage{fancyhdr}
\pagestyle{fancy}
\fancyhf{} % clear all header and footer fields
\fancyhead[L]{\textbf{\course}}
\fancyhead[C]{Assignment \assignmentnumber}
\fancyhead[R]{\name}
\fancyfoot[C]{\thepage}
% Other packages
\usepackage{enumitem}
\usepackage{graphicx}
% Custom commands for easy detail insertion
\newcommand{\assignmentnumber}{03} % <-- CHANGE Assignment Number
\newcommand{\name}{Simon Franken} % <-- CHANGE YOUR NAME
\newcommand{\course}{Multimedia Project WiSe 2526} % <-- CHANGE COURSE NAME
\newcommand{\duedate}{2025-11-05} % <-- CHANGE DUE DATE
% Title formatting
\usepackage{titling}
\pretitle{
\vspace*{2cm}
\begin{center}
\LARGE\bfseries
}
\posttitle{\par\end{center}\vspace{1cm}}
\begin{document}
\title{Assignment \assignmentnumber}
\author{\name}
\date{\duedate}
\maketitle
\begin{center}
\textbf{Course:} \course
\end{center}
\vspace{0.5cm}
%------------------ START OF ASSIGNMENT -----------------------
% Write your solutions below
\section*{Exercise 3.1 Dataset Parsing}
\begin{enumerate}[label=\alph*)]
\setcounter{enumi}{2}
\item \begin{figure}[htp]
\centering
\includegraphics[width=4cm]{output.jpg}
\caption{02254418.jpg with 18 annotations}
\end{figure}
\end{enumerate}
\section*{Exercise 3.3 Training}
\begin{tabular}{|c||c|}
\hline Batch size & 32 \\
\hline Training epoches & 10 \\
\hline Loss & 0.3719 \\
\hline Accuracy & 78.90 \% \\
\hline
\end{tabular}
%------------------ END OF ASSIGNMENT -----------------------
\end{document}

View File

@@ -1,6 +1,57 @@
import torch
import argparse
from ..a2.main import MmpNet, get_criterion_optimizer, train_epoch, eval_epoch
from .dataset import get_dataloader
def main():
"""Put your code for Exercise 3.3 in here"""
raise NotImplementedError()
parser = argparse.ArgumentParser()
parser.add_argument('--tensorboard', action='store_true',
help='Enable TensorBoard logging')
args = parser.parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_epochs = 10
model = MmpNet(num_classes=2).to(device=device)
dataloader_train = get_dataloader(
path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2",
image_size=244, batch_size=32, num_workers=6, is_train=True
)
dataloader_eval = get_dataloader(
path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2",
image_size=244, batch_size=32, num_workers=6, is_train=False
)
criterion, optimizer = get_criterion_optimizer(model=model)
writer = None
if args.tensorboard:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir="runs/a3_mmpnet")
for epoch in range(train_epochs):
train_loss = train_epoch(
model=model,
loader=dataloader_train,
optimizer=optimizer,
device=device,
criterion=criterion,
)
val_acc = eval_epoch(
model=model,
loader=dataloader_eval,
device=device
)
print(
f"Epoch [{epoch+1}/{train_epochs}] - Train Loss: {train_loss:.4f} - Val Acc: {val_acc:.4f}")
if writer is not None:
writer.add_scalar("Loss/train", train_loss, epoch)
writer.add_scalar("Accuracy/val", val_acc, epoch)
if writer is not None:
writer.close()
if __name__ == "__main__":

BIN
mmp/a3/output.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB