From 5696de6e0493d8cc6982a40887431adee946f04d Mon Sep 17 00:00:00 2001 From: franksim Date: Tue, 28 Oct 2025 16:03:53 +0000 Subject: [PATCH] assignment-a3: adds code --- mmp/a3/annotation.py | 23 +++++++++++++----- mmp/a3/dataset.py | 55 ++++++++++++++++++++++++++++++++++++++++---- mmp/a3/main.py | 31 ++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 11 deletions(-) diff --git a/mmp/a3/annotation.py b/mmp/a3/annotation.py index 68f2be1..7199413 100644 --- a/mmp/a3/annotation.py +++ b/mmp/a3/annotation.py @@ -1,3 +1,5 @@ +import glob +import os from typing import List import numpy as np @@ -6,22 +8,31 @@ class AnnotationRect: """Exercise 3.1""" def __init__(self, x1, y1, x2, y2): - raise NotImplementedError() + self.x1 = x1 + self.x2 = x2 + self.y1 = y1 + self.y2 = y2 def area(self): - raise NotImplementedError() + return (self.x2 - self.x1) * (self.y2 - self.y1) def __array__(self) -> np.ndarray: - raise NotImplementedError() + return np.array([self.x1, self.y1, self.x2, self.y2]) @staticmethod def fromarray(arr: np.ndarray): - raise NotImplementedError() + return AnnotationRect(arr[0], arr[1], arr[2], arr[3]) def read_groundtruth_file(path: str) -> List[AnnotationRect]: """Exercise 3.1b""" - raise NotImplementedError() + annotationRects = [] + with open(path, 'r') as file: + for line in file: + if line.strip(): + values = line.strip().split() + annotationRects.append(AnnotationRect(float(values[0]), float( + values[1]), float(values[2]), float(values[3]))) + return annotationRects -# put your solution for exercise 3.1c wherever you deem it right diff --git a/mmp/a3/dataset.py b/mmp/a3/dataset.py index 75a9f5b..4db8e0c 100644 --- a/mmp/a3/dataset.py +++ b/mmp/a3/dataset.py @@ -1,6 +1,11 @@ +import os +import re +from PIL import Image from typing import Tuple import torch from torch.utils.data import DataLoader +from a3.annotation import read_groundtruth_file +from torchvision import transforms class MMP_Dataset(torch.utils.data.Dataset): @@ -11,19 +16,61 @@ class MMP_Dataset(torch.utils.data.Dataset): @param path_to_data: Path to the folder that contains the images and annotation files, e.g. dataset_mmp/train @param image_size: Desired image size that this dataset should return """ - raise NotImplementedError() + self.image_size = image_size + img_pattern = re.compile(r'^(\d+)\.jpg$') + files = set(os.listdir(path_to_data)) + self.images = [] + + for fname in files: + match = img_pattern.match(fname) + if match: + img_file = os.path.join(path_to_data, fname) + annotations = read_groundtruth_file(os.path.join( + path_to_data, f"{match.group(1)}.gt_data.txt")) + self.images.append((img_file, annotations)) + + self.images.sort(key=lambda x: int( + re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2))) def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]: """ @return: Tuple of image tensor and label. The label is 0 if there is one person and 1 if there a multiple people. """ - raise NotImplementedError() + img = Image.open(self.images[idx][0]).convert("RGB") + padding = self.__padding__(img) + transform = transforms.Compose([ + transforms.Pad(padding, 0), + transforms.Resize((self.image_size, self.image_size)), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225] + ) + ]) + return (transform(img), 1 if len(self.images[idx][1]) > 1 else 0) + + def __padding__(self, img) -> Tuple[int, int, int, int]: + w, h = img.size + size = max(w, h) + right_pad = size - w + bottom_pad = size - h + return (0, 0, right_pad, bottom_pad) def __len__(self) -> int: - raise NotImplementedError() + return len(self.images) + def get_dataloader( path_to_data: str, image_size: int, batch_size: int, num_workers: int, is_train: bool = True ) -> DataLoader: - """Exercise 3.2d""" + path = os.path.join(path_to_data, "train") if is_train else os.path.join( + path_to_data, "val") + dataset = MMP_Dataset(path_to_data=path, image_size=image_size) + dataloader = DataLoader( + dataset, batch_size=batch_size, + shuffle=is_train, + num_workers=num_workers, + pin_memory=True + ) + return dataloader diff --git a/mmp/a3/main.py b/mmp/a3/main.py index e2959fe..5ee6842 100644 --- a/mmp/a3/main.py +++ b/mmp/a3/main.py @@ -1,6 +1,35 @@ + +import torch +from a2.main import MmpNet, get_criterion_optimizer, log_epoch_progress, train_epoch, eval_epoch +from a3.dataset import get_dataloader + + def main(): """Put your code for Exercise 3.3 in here""" - raise NotImplementedError() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + train_epochs = 10 + model = MmpNet(num_classes=10).to(device=device) + dataloader_train = get_dataloader(path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2", + image_size=244, batch_size=32, num_workers=6, is_train=True) + dataloader_eval = get_dataloader(path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2", + image_size=244, batch_size=32, num_workers=6, is_train=False) + criterion, optimizer = get_criterion_optimizer(model=model) + + for epoche in range(train_epochs): + log_epoch_progress(epoche, train_epochs, "start") + train_epoch( + model=model, + loader=dataloader_train, + optimizer=optimizer, + device=device, + criterion=criterion, + ) + eval_epoch( + model=model, + loader=dataloader_eval, + device=device + ) + log_epoch_progress(epoche, train_epochs, "end") if __name__ == "__main__":