diff --git a/.gitignore b/.gitignore index c5ee606..8f7c7d3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .venv/ .data +runs/ __pycache__/ *.code-workspace .vscode/ diff --git a/mmp/a3/annotation.py b/mmp/a3/annotation.py index 68f2be1..4977868 100644 --- a/mmp/a3/annotation.py +++ b/mmp/a3/annotation.py @@ -1,27 +1,80 @@ +import os +import re from typing import List import numpy as np +from PIL import Image, ImageDraw class AnnotationRect: """Exercise 3.1""" def __init__(self, x1, y1, x2, y2): - raise NotImplementedError() + self.x1 = x1 + self.x2 = x2 + self.y1 = y1 + self.y2 = y2 def area(self): - raise NotImplementedError() + return (self.x2 - self.x1) * (self.y2 - self.y1) def __array__(self) -> np.ndarray: - raise NotImplementedError() + return np.array([self.x1, self.y1, self.x2, self.y2]) @staticmethod def fromarray(arr: np.ndarray): - raise NotImplementedError() + return AnnotationRect(arr[0], arr[1], arr[2], arr[3]) def read_groundtruth_file(path: str) -> List[AnnotationRect]: """Exercise 3.1b""" - raise NotImplementedError() + annotationRects = [] + with open(path, 'r') as file: + for line in file: + if line.strip(): + values = line.strip().split() + annotationRects.append(AnnotationRect(float(values[0]), float( + values[1]), float(values[2]), float(values[3]))) + return annotationRects -# put your solution for exercise 3.1c wherever you deem it right +def get_image_with_max_annotations(dir_path: str) -> str: + img_pattern = re.compile(r'^(\d+)\.jpg$') + files = set(os.listdir(dir_path)) + max_file = None + max_annotations = 0 + + for fname in files: + match = img_pattern.match(fname) + if match: + img_file = os.path.join(dir_path, fname) + annotations_number = len(read_groundtruth_file(os.path.join( + dir_path, f"{match.group(1)}.gt_data.txt"))) + if (annotations_number > max_annotations): + max_file = img_file + max_annotations = annotations_number + return max_file + + +def visualize_image(image_path: str, output_path='output.jpg', rect_color=(255, 0, 0), width=2): + img_pattern = re.compile(r'(.*)(\.jpg)') + match = img_pattern.match(image_path) + annotations = read_groundtruth_file(f"{match.group(1)}.gt_data.txt") + + img = Image.open(image_path).convert('RGB') + draw = ImageDraw.Draw(img) + + for annotation in annotations: + draw.rectangle([annotation.x1, annotation.y1, annotation.x2, annotation.y2], + outline=rect_color, width=width) + + img.save(output_path) + + +def main(): + image_file = get_image_with_max_annotations( + "/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2/train") + visualize_image(image_file) + + +if __name__ == "__main__": + main() diff --git a/mmp/a3/dataset.py b/mmp/a3/dataset.py index 75a9f5b..cd38a1c 100644 --- a/mmp/a3/dataset.py +++ b/mmp/a3/dataset.py @@ -1,6 +1,11 @@ +import os +import re +from PIL import Image from typing import Tuple import torch from torch.utils.data import DataLoader +from .annotation import read_groundtruth_file +from torchvision import transforms class MMP_Dataset(torch.utils.data.Dataset): @@ -11,19 +16,61 @@ class MMP_Dataset(torch.utils.data.Dataset): @param path_to_data: Path to the folder that contains the images and annotation files, e.g. dataset_mmp/train @param image_size: Desired image size that this dataset should return """ - raise NotImplementedError() + self.image_size = image_size + img_pattern = re.compile(r'^(\d+)\.jpg$') + files = set(os.listdir(path_to_data)) + self.images = [] + + for fname in files: + match = img_pattern.match(fname) + if match: + img_file = os.path.join(path_to_data, fname) + annotations = read_groundtruth_file(os.path.join( + path_to_data, f"{match.group(1)}.gt_data.txt")) + self.images.append((img_file, annotations)) + + self.images.sort(key=lambda x: int( + re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2))) def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]: """ @return: Tuple of image tensor and label. The label is 0 if there is one person and 1 if there a multiple people. """ - raise NotImplementedError() + img = Image.open(self.images[idx][0]).convert("RGB") + padding = self.__padding__(img) + transform = transforms.Compose([ + transforms.Pad(padding, 0), + transforms.Resize((self.image_size, self.image_size)), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225] + ) + ]) + return (transform(img), 1 if len(self.images[idx][1]) > 1 else 0) + + def __padding__(self, img) -> Tuple[int, int, int, int]: + w, h = img.size + size = max(w, h) + right_pad = size - w + bottom_pad = size - h + return (0, 0, right_pad, bottom_pad) def __len__(self) -> int: - raise NotImplementedError() + return len(self.images) + def get_dataloader( path_to_data: str, image_size: int, batch_size: int, num_workers: int, is_train: bool = True ) -> DataLoader: - """Exercise 3.2d""" + path = os.path.join(path_to_data, "train") if is_train else os.path.join( + path_to_data, "val") + dataset = MMP_Dataset(path_to_data=path, image_size=image_size) + dataloader = DataLoader( + dataset, batch_size=batch_size, + shuffle=is_train, + num_workers=num_workers, + pin_memory=True + ) + return dataloader diff --git a/mmp/a3/document.pdf b/mmp/a3/document.pdf new file mode 100644 index 0000000..050dd4c Binary files /dev/null and b/mmp/a3/document.pdf differ diff --git a/mmp/a3/document.tex b/mmp/a3/document.tex new file mode 100644 index 0000000..095db31 --- /dev/null +++ b/mmp/a3/document.tex @@ -0,0 +1,80 @@ +\documentclass[11pt,a4paper]{article} + +% Language and encoding settings +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage[english]{babel} + +% Page formatting +\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry} +\usepackage{setspace} +\onehalfspacing + +% Header/Footer +\usepackage{fancyhdr} +\pagestyle{fancy} +\fancyhf{} % clear all header and footer fields +\fancyhead[L]{\textbf{\course}} +\fancyhead[C]{Assignment \assignmentnumber} +\fancyhead[R]{\name} +\fancyfoot[C]{\thepage} + +% Other packages +\usepackage{enumitem} +\usepackage{graphicx} + +% Custom commands for easy detail insertion +\newcommand{\assignmentnumber}{03} % <-- CHANGE Assignment Number +\newcommand{\name}{Simon Franken} % <-- CHANGE YOUR NAME +\newcommand{\course}{Multimedia Project WiSe 2526} % <-- CHANGE COURSE NAME +\newcommand{\duedate}{2025-11-05} % <-- CHANGE DUE DATE + +% Title formatting +\usepackage{titling} +\pretitle{ + \vspace*{2cm} + \begin{center} + \LARGE\bfseries +} +\posttitle{\par\end{center}\vspace{1cm}} + +\begin{document} + +\title{Assignment \assignmentnumber} +\author{\name} +\date{\duedate} + +\maketitle + +\begin{center} + \textbf{Course:} \course +\end{center} +\vspace{0.5cm} + +%------------------ START OF ASSIGNMENT ----------------------- + +% Write your solutions below + +\section*{Exercise 3.1 Dataset Parsing} + +\begin{enumerate}[label=\alph*)] + \setcounter{enumi}{2} + \item \begin{figure}[htp] + \centering + \includegraphics[width=4cm]{output.jpg} + \caption{02254418.jpg with 18 annotations} + \end{figure} +\end{enumerate} + +\section*{Exercise 3.3 Training} +\begin{tabular}{|c||c|} + \hline Batch size & 32 \\ + \hline Training epoches & 10 \\ + \hline Loss & 0.3719 \\ + \hline Accuracy & 78.90 \% \\ + \hline +\end{tabular} + +%------------------ END OF ASSIGNMENT ----------------------- + +\end{document} \ No newline at end of file diff --git a/mmp/a3/main.py b/mmp/a3/main.py index e2959fe..bcdaf9c 100644 --- a/mmp/a3/main.py +++ b/mmp/a3/main.py @@ -1,6 +1,57 @@ +import torch +import argparse +from ..a2.main import MmpNet, get_criterion_optimizer, train_epoch, eval_epoch +from .dataset import get_dataloader + + def main(): """Put your code for Exercise 3.3 in here""" - raise NotImplementedError() + parser = argparse.ArgumentParser() + parser.add_argument('--tensorboard', action='store_true', + help='Enable TensorBoard logging') + args = parser.parse_args() + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + train_epochs = 10 + model = MmpNet(num_classes=2).to(device=device) + dataloader_train = get_dataloader( + path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2", + image_size=244, batch_size=32, num_workers=6, is_train=True + ) + dataloader_eval = get_dataloader( + path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2", + image_size=244, batch_size=32, num_workers=6, is_train=False + ) + criterion, optimizer = get_criterion_optimizer(model=model) + + writer = None + if args.tensorboard: + from torch.utils.tensorboard import SummaryWriter + writer = SummaryWriter(log_dir="runs/a3_mmpnet") + + for epoch in range(train_epochs): + train_loss = train_epoch( + model=model, + loader=dataloader_train, + optimizer=optimizer, + device=device, + criterion=criterion, + ) + val_acc = eval_epoch( + model=model, + loader=dataloader_eval, + device=device + ) + + print( + f"Epoch [{epoch+1}/{train_epochs}] - Train Loss: {train_loss:.4f} - Val Acc: {val_acc:.4f}") + + if writer is not None: + writer.add_scalar("Loss/train", train_loss, epoch) + writer.add_scalar("Accuracy/val", val_acc, epoch) + + if writer is not None: + writer.close() if __name__ == "__main__": diff --git a/mmp/a3/output.jpg b/mmp/a3/output.jpg new file mode 100644 index 0000000..d6bfc61 Binary files /dev/null and b/mmp/a3/output.jpg differ