diff --git a/.gitignore b/.gitignore index a230a78..c5ee606 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,11 @@ .venv/ +.data __pycache__/ +*.code-workspace +.vscode/ +.idea/ +*.aux +*.fdb_latexmk +*.fls +*.log +*.synctex.gz \ No newline at end of file diff --git a/mmp/a1/document.pdf b/mmp/a1/document.pdf new file mode 100644 index 0000000..1e95e57 Binary files /dev/null and b/mmp/a1/document.pdf differ diff --git a/mmp/a1/document.tex b/mmp/a1/document.tex new file mode 100644 index 0000000..8d0df3d --- /dev/null +++ b/mmp/a1/document.tex @@ -0,0 +1,125 @@ +\documentclass[11pt,a4paper]{article} + +% Language and encoding settings +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage[english]{babel} + +% Page formatting +\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry} +\usepackage{setspace} +\onehalfspacing + +% Header/Footer +\usepackage{fancyhdr} +\pagestyle{fancy} +\fancyhf{} % clear all header and footer fields +\fancyhead[L]{\textbf{\course}} +\fancyhead[C]{Assignment \assignmentnumber} +\fancyhead[R]{\name} +\fancyfoot[C]{\thepage} + +% Other packages +\usepackage{enumitem} + +% Custom commands for easy detail insertion +\newcommand{\assignmentnumber}{01} % <-- CHANGE Assignment Number +\newcommand{\name}{Simon Franken} % <-- CHANGE YOUR NAME +\newcommand{\course}{Multimedia Project WiSe 2526} % <-- CHANGE COURSE NAME +\newcommand{\duedate}{2025-10-22} % <-- CHANGE DUE DATE + +% Title formatting +\usepackage{titling} +\pretitle{ + \vspace*{2cm} + \begin{center} + \LARGE\bfseries +} +\posttitle{\par\end{center}\vspace{1cm}} + +\begin{document} + +\title{Assignment \assignmentnumber} +\author{\name} +\date{\duedate} + +\maketitle + +\begin{center} + \textbf{Course:} \course +\end{center} +\vspace{0.5cm} + +%------------------ START OF ASSIGNMENT ----------------------- + +% Write your solutions below + +\section*{Excercise 1.3 Forward Pass} + +\begin{enumerate}[label=\alph*)] + \item + Two of the results were entirely correct. Some of the incorrect results were at least in the right direction, such as “llama” for “zoo.jpg” and “bathing cup” for “rubber duck sculpture.jpg”. \\ + \begin{tabular}{|c|c|c|} + \hline Image & Model Output & Result \\ + \hline + \hline + "golden retriever.jpg" & golden retriever & Correct \\ + \hline + "koala.jpg" & koala & Correct \\ + \hline + "pacifier.jpg" & Petri dish & Incorrect \\ + \hline + "rubber duck sculpture.jpg" & bathing cup & Incorrect \\ + \hline + "rubber ducks.jpg" & frying pan & Incorrect \\ + \hline + "shoehorn.jpg" & wine bottle & Incorrect \\ + \hline + "zoo.jpg" & llama & Incorrect, but close \\ + \hline + \end{tabular} + \item The resizing to a lower resolution resulted in none of the answers being entirely accurate. However, for the animals, the answers were quite close. This can be attributed to the loss of detail during the downsizing process.\\ + \begin{tabular}{|c|c|c|} + \hline Image & Model Output & Result \\ + \hline + \hline + "golden retriever.jpg" & Saluki & Incorrect, but close \\ + \hline + "koala.jpg" & Madagascar cat & Incorrect, but close \\ + \hline + "pacifier.jpg" & maze & Incorrect \\ + \hline + "rubber duck sculpture.jpg" & goldfinch & Incorrect \\ + \hline + "rubber ducks.jpg" & confectionery & Incorrect \\ + \hline + "shoehorn.jpg" & banana & Incorrect \\ + \hline + "zoo.jpg" & fountain & Incorrect \\ + \hline + \end{tabular} + \item The vertical flip posed a significant challenge to the model. While none of the results were entirely accurate, a few were moving in the right direction. It’s likely that the model was trained using images that were not upside down, which lead to those results.\\ + \begin{tabular}{|c|c|c|} + \hline Image & Model Output & Result \\ + \hline + \hline + "golden retriever.jpg" & fox squirrel & Incorrect, right direction \\ + \hline + "koala.jpg" & Madagascar cat & Incorrect, but close \\ + \hline + "pacifier.jpg" & nipple & Incorrect \\ + \hline + "rubber duck sculpture.jpg" & stage & Incorrect \\ + \hline + "rubber ducks.jpg" & frying pan & Incorrect \\ + \hline + "shoehorn.jpg" & punching bag & Incorrect \\ + \hline + "zoo.jpg" & shield & Incorrect \\ + \hline + \end{tabular} +\end{enumerate} + +%------------------ END OF ASSIGNMENT ----------------------- + +\end{document} \ No newline at end of file diff --git a/mmp/a1/main.py b/mmp/a1/main.py index c1d0eda..cb10388 100644 --- a/mmp/a1/main.py +++ b/mmp/a1/main.py @@ -1,5 +1,17 @@ from typing import Sequence import torch +import torchvision +from torchvision.transforms import functional as F +from torchvision import models, transforms +from PIL import Image + + +def pad_to_square(img): + w, h = img.size + max_wh = max(w, h) + pad = ((max_wh - w) // 2, (max_wh - h) // 2) + padding = (pad[0], pad[1], max_wh - w - pad[0], max_wh - h - pad[1]) + return F.pad(img, padding, fill=0, padding_mode='constant') def build_batch(paths: Sequence[str], transform=None) -> torch.Tensor: @@ -9,7 +21,21 @@ def build_batch(paths: Sequence[str], transform=None) -> torch.Tensor: @param transform: One or multiple image transformations for augmenting the batch images. @return: Returns one single tensor that contains every image. """ - raise NotImplementedError() + preprocess = transforms.Compose([ + transforms.Lambda(pad_to_square), + transforms.Resize((224, 224)), + *([transform] if transform is not None else []), + transforms.ToTensor() + ] + ) + imgs = [] + + for path in paths: + img = Image.open(path).convert('RGB') + img = preprocess(img) + imgs.append(img) + batch = torch.stack(imgs) + return batch def get_model() -> torch.nn.Module: @@ -17,7 +43,24 @@ def get_model() -> torch.nn.Module: @return: Returns a neural network, initialised with pretrained weights. """ - raise NotImplementedError() + model = models.resnet18( + weights=models.ResNet18_Weights.DEFAULT) + return model + + +def forward_pass(paths, batch, model): + with torch.no_grad(): + outputs = model(batch) + + max_scores, preds = outputs.max(dim=1) + + class_names = torchvision.models.ResNet18_Weights.DEFAULT.meta["categories"] + + for i, (p, s) in enumerate(zip(preds, max_scores)): + print(f"Image: {paths[i]}") + print(f"Model output score: {s.item():.4f}") + print(f"Predicted class: {class_names[p.item()]}") + print() def main(): @@ -25,7 +68,28 @@ def main(): Put all your code for exercise 1.3 here. """ - raise NotImplementedError() + + paths = [ + "./images/golden retriever.jpg", + "./images/koala.jpg", + "./images/pacifier.jpg", + "./images/rubber duck sculpture.jpg", + "./images/rubber ducks.jpg", + "./images/shoehorn.jpg", + "./images/zoo.jpg", + ] + batch_a = build_batch(paths) + model = get_model() + print("Batch A:") + forward_pass(paths, batch_a, model) + + print("Batch B:") + batch_b = build_batch(paths, transforms.Resize((100, 100))) + forward_pass(paths, batch_b, model) + + print("Batch C:") + batch_c = build_batch(paths, transforms.RandomVerticalFlip(1)) + forward_pass(paths, batch_c, model) if __name__ == "__main__": diff --git a/mmp/a1/tensors.py b/mmp/a1/tensors.py index 4574de1..3d87203 100644 --- a/mmp/a1/tensors.py +++ b/mmp/a1/tensors.py @@ -1,10 +1,14 @@ import torch + def avg_color(img: torch.Tensor): - raise NotImplementedError() + return img.mean(dim=(1, 2)) + def mask(foreground: torch.Tensor, background: torch.Tensor, mask_tensor: torch.Tensor, threshold: float): - raise NotImplementedError() + mask = mask_tensor > threshold + return torch.where(mask, foreground, background) + def add_matrix_vector(matrix: torch.Tensor, vector: torch.Tensor): - raise NotImplementedError() + return matrix.add(vector)