Merge branch 'assignment-a1' into 'main'

assignment-a1: init

See merge request mmc-mmp/mmp_wise2526_franksim!1
This commit is contained in:
franksim
2025-10-18 11:06:51 +02:00
5 changed files with 208 additions and 6 deletions

9
.gitignore vendored
View File

@@ -1,2 +1,11 @@
.venv/
.data
__pycache__/
*.code-workspace
.vscode/
.idea/
*.aux
*.fdb_latexmk
*.fls
*.log
*.synctex.gz

BIN
mmp/a1/document.pdf Normal file

Binary file not shown.

125
mmp/a1/document.tex Normal file
View File

@@ -0,0 +1,125 @@
\documentclass[11pt,a4paper]{article}
% Language and encoding settings
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
% Page formatting
\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
\usepackage{setspace}
\onehalfspacing
% Header/Footer
\usepackage{fancyhdr}
\pagestyle{fancy}
\fancyhf{} % clear all header and footer fields
\fancyhead[L]{\textbf{\course}}
\fancyhead[C]{Assignment \assignmentnumber}
\fancyhead[R]{\name}
\fancyfoot[C]{\thepage}
% Other packages
\usepackage{enumitem}
% Custom commands for easy detail insertion
\newcommand{\assignmentnumber}{01} % <-- CHANGE Assignment Number
\newcommand{\name}{Simon Franken} % <-- CHANGE YOUR NAME
\newcommand{\course}{Multimedia Project WiSe 2526} % <-- CHANGE COURSE NAME
\newcommand{\duedate}{2025-10-22} % <-- CHANGE DUE DATE
% Title formatting
\usepackage{titling}
\pretitle{
\vspace*{2cm}
\begin{center}
\LARGE\bfseries
}
\posttitle{\par\end{center}\vspace{1cm}}
\begin{document}
\title{Assignment \assignmentnumber}
\author{\name}
\date{\duedate}
\maketitle
\begin{center}
\textbf{Course:} \course
\end{center}
\vspace{0.5cm}
%------------------ START OF ASSIGNMENT -----------------------
% Write your solutions below
\section*{Excercise 1.3 Forward Pass}
\begin{enumerate}[label=\alph*)]
\item
Two of the results were entirely correct. Some of the incorrect results were at least in the right direction, such as “llama” for “zoo.jpg” and “bathing cup” for “rubber duck sculpture.jpg”. \\
\begin{tabular}{|c|c|c|}
\hline Image & Model Output & Result \\
\hline
\hline
"golden retriever.jpg" & golden retriever & Correct \\
\hline
"koala.jpg" & koala & Correct \\
\hline
"pacifier.jpg" & Petri dish & Incorrect \\
\hline
"rubber duck sculpture.jpg" & bathing cup & Incorrect \\
\hline
"rubber ducks.jpg" & frying pan & Incorrect \\
\hline
"shoehorn.jpg" & wine bottle & Incorrect \\
\hline
"zoo.jpg" & llama & Incorrect, but close \\
\hline
\end{tabular}
\item The resizing to a lower resolution resulted in none of the answers being entirely accurate. However, for the animals, the answers were quite close. This can be attributed to the loss of detail during the downsizing process.\\
\begin{tabular}{|c|c|c|}
\hline Image & Model Output & Result \\
\hline
\hline
"golden retriever.jpg" & Saluki & Incorrect, but close \\
\hline
"koala.jpg" & Madagascar cat & Incorrect, but close \\
\hline
"pacifier.jpg" & maze & Incorrect \\
\hline
"rubber duck sculpture.jpg" & goldfinch & Incorrect \\
\hline
"rubber ducks.jpg" & confectionery & Incorrect \\
\hline
"shoehorn.jpg" & banana & Incorrect \\
\hline
"zoo.jpg" & fountain & Incorrect \\
\hline
\end{tabular}
\item The vertical flip posed a significant challenge to the model. While none of the results were entirely accurate, a few were moving in the right direction. Its likely that the model was trained using images that were not upside down, which lead to those results.\\
\begin{tabular}{|c|c|c|}
\hline Image & Model Output & Result \\
\hline
\hline
"golden retriever.jpg" & fox squirrel & Incorrect, right direction \\
\hline
"koala.jpg" & Madagascar cat & Incorrect, but close \\
\hline
"pacifier.jpg" & nipple & Incorrect \\
\hline
"rubber duck sculpture.jpg" & stage & Incorrect \\
\hline
"rubber ducks.jpg" & frying pan & Incorrect \\
\hline
"shoehorn.jpg" & punching bag & Incorrect \\
\hline
"zoo.jpg" & shield & Incorrect \\
\hline
\end{tabular}
\end{enumerate}
%------------------ END OF ASSIGNMENT -----------------------
\end{document}

View File

@@ -1,5 +1,17 @@
from typing import Sequence
import torch
import torchvision
from torchvision.transforms import functional as F
from torchvision import models, transforms
from PIL import Image
def pad_to_square(img):
w, h = img.size
max_wh = max(w, h)
pad = ((max_wh - w) // 2, (max_wh - h) // 2)
padding = (pad[0], pad[1], max_wh - w - pad[0], max_wh - h - pad[1])
return F.pad(img, padding, fill=0, padding_mode='constant')
def build_batch(paths: Sequence[str], transform=None) -> torch.Tensor:
@@ -9,7 +21,21 @@ def build_batch(paths: Sequence[str], transform=None) -> torch.Tensor:
@param transform: One or multiple image transformations for augmenting the batch images.
@return: Returns one single tensor that contains every image.
"""
raise NotImplementedError()
preprocess = transforms.Compose([
transforms.Lambda(pad_to_square),
transforms.Resize((224, 224)),
*([transform] if transform is not None else []),
transforms.ToTensor()
]
)
imgs = []
for path in paths:
img = Image.open(path).convert('RGB')
img = preprocess(img)
imgs.append(img)
batch = torch.stack(imgs)
return batch
def get_model() -> torch.nn.Module:
@@ -17,7 +43,24 @@ def get_model() -> torch.nn.Module:
@return: Returns a neural network, initialised with pretrained weights.
"""
raise NotImplementedError()
model = models.resnet18(
weights=models.ResNet18_Weights.DEFAULT)
return model
def forward_pass(paths, batch, model):
with torch.no_grad():
outputs = model(batch)
max_scores, preds = outputs.max(dim=1)
class_names = torchvision.models.ResNet18_Weights.DEFAULT.meta["categories"]
for i, (p, s) in enumerate(zip(preds, max_scores)):
print(f"Image: {paths[i]}")
print(f"Model output score: {s.item():.4f}")
print(f"Predicted class: {class_names[p.item()]}")
print()
def main():
@@ -25,7 +68,28 @@ def main():
Put all your code for exercise 1.3 here.
"""
raise NotImplementedError()
paths = [
"./images/golden retriever.jpg",
"./images/koala.jpg",
"./images/pacifier.jpg",
"./images/rubber duck sculpture.jpg",
"./images/rubber ducks.jpg",
"./images/shoehorn.jpg",
"./images/zoo.jpg",
]
batch_a = build_batch(paths)
model = get_model()
print("Batch A:")
forward_pass(paths, batch_a, model)
print("Batch B:")
batch_b = build_batch(paths, transforms.Resize((100, 100)))
forward_pass(paths, batch_b, model)
print("Batch C:")
batch_c = build_batch(paths, transforms.RandomVerticalFlip(1))
forward_pass(paths, batch_c, model)
if __name__ == "__main__":

View File

@@ -1,10 +1,14 @@
import torch
def avg_color(img: torch.Tensor):
raise NotImplementedError()
return img.mean(dim=(1, 2))
def mask(foreground: torch.Tensor, background: torch.Tensor, mask_tensor: torch.Tensor, threshold: float):
raise NotImplementedError()
mask = mask_tensor > threshold
return torch.where(mask, foreground, background)
def add_matrix_vector(matrix: torch.Tensor, vector: torch.Tensor):
raise NotImplementedError()
return matrix.add(vector)