Merge branch 'assignment-a1' into 'main'

assignment-a1: init See merge request mmc-mmp/mmp_wise2526_franksim!1
2025-10-18 11:06:51 +02:00
parent b21abef505 884e950952
commit 7dd7537a4d
5 changed files with 208 additions and 6 deletions
@@ -1,2 +1,11 @@
 .venv/
+.data
 __pycache__/
+*.code-workspace
+.vscode/
+.idea/
+*.aux
+*.fdb_latexmk
+*.fls
+*.log
+*.synctex.gz
@@ -0,0 +1,125 @@
+\documentclass[11pt,a4paper]{article}
+
+% Language and encoding settings
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage[english]{babel}
+
+% Page formatting
+\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
+\usepackage{setspace}
+\onehalfspacing
+
+% Header/Footer
+\usepackage{fancyhdr}
+\pagestyle{fancy}
+\fancyhf{} % clear all header and footer fields
+\fancyhead[L]{\textbf{\course}}
+\fancyhead[C]{Assignment \assignmentnumber}
+\fancyhead[R]{\name}
+\fancyfoot[C]{\thepage}
+
+% Other packages
+\usepackage{enumitem}
+
+% Custom commands for easy detail insertion
+\newcommand{\assignmentnumber}{01} % <-- CHANGE Assignment Number
+\newcommand{\name}{Simon Franken}      % <-- CHANGE YOUR NAME
+\newcommand{\course}{Multimedia Project WiSe 2526}  % <-- CHANGE COURSE NAME
+\newcommand{\duedate}{2025-10-22}  % <-- CHANGE DUE DATE
+
+% Title formatting
+\usepackage{titling}
+\pretitle{
+  \vspace*{2cm}
+  \begin{center}
+  \LARGE\bfseries
+}
+\posttitle{\par\end{center}\vspace{1cm}}
+
+\begin{document}
+
+\title{Assignment \assignmentnumber}
+\author{\name}
+\date{\duedate}
+
+\maketitle
+
+\begin{center}
+  \textbf{Course:} \course
+\end{center}
+\vspace{0.5cm}
+
+%------------------ START OF ASSIGNMENT -----------------------
+
+% Write your solutions below
+
+\section*{Excercise 1.3 Forward Pass}
+
+\begin{enumerate}[label=\alph*)]
+  \item
+        Two of the results were entirely correct. Some of the incorrect results were at least in the right direction, such as “llama” for “zoo.jpg” and “bathing cup” for “rubber duck sculpture.jpg”. \\
+        \begin{tabular}{|c|c|c|}
+          \hline Image                & Model Output     & Result               \\
+          \hline
+          \hline
+          "golden retriever.jpg"      & golden retriever & Correct              \\
+          \hline
+          "koala.jpg"                 & koala            & Correct              \\
+          \hline
+          "pacifier.jpg"              & Petri dish       & Incorrect            \\
+          \hline
+          "rubber duck sculpture.jpg" & bathing cup      & Incorrect            \\
+          \hline
+          "rubber ducks.jpg"          & frying pan       & Incorrect            \\
+          \hline
+          "shoehorn.jpg"              & wine bottle      & Incorrect            \\
+          \hline
+          "zoo.jpg"                   & llama            & Incorrect, but close \\
+          \hline
+        \end{tabular}
+  \item The resizing to a lower resolution resulted in none of the answers being entirely accurate. However, for the animals, the answers were quite close. This can be attributed to the loss of detail during the downsizing process.\\
+        \begin{tabular}{|c|c|c|}
+          \hline Image                & Model Output   & Result               \\
+          \hline
+          \hline
+          "golden retriever.jpg"      & Saluki         & Incorrect, but close \\
+          \hline
+          "koala.jpg"                 & Madagascar cat & Incorrect, but close \\
+          \hline
+          "pacifier.jpg"              & maze           & Incorrect            \\
+          \hline
+          "rubber duck sculpture.jpg" & goldfinch      & Incorrect            \\
+          \hline
+          "rubber ducks.jpg"          & confectionery  & Incorrect            \\
+          \hline
+          "shoehorn.jpg"              & banana         & Incorrect            \\
+          \hline
+          "zoo.jpg"                   & fountain       & Incorrect            \\
+          \hline
+        \end{tabular}
+  \item The vertical flip posed a significant challenge to the model. While none of the results were entirely accurate, a few were moving in the right direction. It’s likely that the model was trained using images that were not upside down, which lead to those results.\\
+        \begin{tabular}{|c|c|c|}
+          \hline Image                & Model Output   & Result                     \\
+          \hline
+          \hline
+          "golden retriever.jpg"      & fox squirrel   & Incorrect, right direction \\
+          \hline
+          "koala.jpg"                 & Madagascar cat & Incorrect, but close       \\
+          \hline
+          "pacifier.jpg"              & nipple         & Incorrect                  \\
+          \hline
+          "rubber duck sculpture.jpg" & stage          & Incorrect                  \\
+          \hline
+          "rubber ducks.jpg"          & frying pan     & Incorrect                  \\
+          \hline
+          "shoehorn.jpg"              & punching bag   & Incorrect                  \\
+          \hline
+          "zoo.jpg"                   & shield         & Incorrect                  \\
+          \hline
+        \end{tabular}
+\end{enumerate}
+
+%------------------ END OF ASSIGNMENT -----------------------
+
+\end{document}
@@ -1,5 +1,17 @@
 from typing import Sequence
 import torch
+import torchvision
+from torchvision.transforms import functional as F
+from torchvision import models, transforms
+from PIL import Image
+
+
+def pad_to_square(img):
+    w, h = img.size
+    max_wh = max(w, h)
+    pad = ((max_wh - w) // 2, (max_wh - h) // 2)
+    padding = (pad[0], pad[1], max_wh - w - pad[0], max_wh - h - pad[1])
+    return F.pad(img, padding, fill=0, padding_mode='constant')


 def build_batch(paths: Sequence[str], transform=None) -> torch.Tensor:
@@ -9,7 +21,21 @@ def build_batch(paths: Sequence[str], transform=None) -> torch.Tensor:
    @param transform: One or multiple image transformations for augmenting the batch images.
    @return: Returns one single tensor that contains every image.
    """
-    raise NotImplementedError()
+    preprocess = transforms.Compose([
+        transforms.Lambda(pad_to_square),
+        transforms.Resize((224, 224)),
+        *([transform] if transform is not None else []),
+        transforms.ToTensor()
+    ]
+    )
+    imgs = []
+
+    for path in paths:
+        img = Image.open(path).convert('RGB')
+        img = preprocess(img)
+        imgs.append(img)
+    batch = torch.stack(imgs)
+    return batch


 def get_model() -> torch.nn.Module:
@@ -17,7 +43,24 @@ def get_model() -> torch.nn.Module:

    @return: Returns a neural network, initialised with pretrained weights.
    """
-    raise NotImplementedError()
+    model = models.resnet18(
+        weights=models.ResNet18_Weights.DEFAULT)
+    return model
+
+
+def forward_pass(paths, batch, model):
+    with torch.no_grad():
+        outputs = model(batch)
+
+    max_scores, preds = outputs.max(dim=1)
+
+    class_names = torchvision.models.ResNet18_Weights.DEFAULT.meta["categories"]
+
+    for i, (p, s) in enumerate(zip(preds, max_scores)):
+        print(f"Image: {paths[i]}")
+        print(f"Model output score: {s.item():.4f}")
+        print(f"Predicted class: {class_names[p.item()]}")
+        print()


 def main():
@@ -25,7 +68,28 @@ def main():

    Put all your code for exercise 1.3 here.
    """
-    raise NotImplementedError()
+
+    paths = [
+        "./images/golden retriever.jpg",
+        "./images/koala.jpg",
+        "./images/pacifier.jpg",
+        "./images/rubber duck sculpture.jpg",
+        "./images/rubber ducks.jpg",
+        "./images/shoehorn.jpg",
+        "./images/zoo.jpg",
+    ]
+    batch_a = build_batch(paths)
+    model = get_model()
+    print("Batch A:")
+    forward_pass(paths, batch_a, model)
+
+    print("Batch B:")
+    batch_b = build_batch(paths, transforms.Resize((100, 100)))
+    forward_pass(paths, batch_b, model)
+
+    print("Batch C:")
+    batch_c = build_batch(paths, transforms.RandomVerticalFlip(1))
+    forward_pass(paths, batch_c, model)


 if __name__ == "__main__":
@@ -1,10 +1,14 @@
 import torch

+
 def avg_color(img: torch.Tensor):
-    raise NotImplementedError()
+    return img.mean(dim=(1, 2))
+

 def mask(foreground: torch.Tensor, background: torch.Tensor, mask_tensor: torch.Tensor, threshold: float):
-    raise NotImplementedError()
+    mask = mask_tensor > threshold
+    return torch.where(mask, foreground, background)
+

 def add_matrix_vector(matrix: torch.Tensor, vector: torch.Tensor):
-    raise NotImplementedError()
+    return matrix.add(vector)