Merge branch 'assignment-a3' into 'main'
assignment-a3: adds code See merge request mmc-mmp/mmp_wise2526_franksim!3
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,5 +1,6 @@
|
|||||||
.venv/
|
.venv/
|
||||||
.data
|
.data
|
||||||
|
runs/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.code-workspace
|
*.code-workspace
|
||||||
.vscode/
|
.vscode/
|
||||||
|
|||||||
@@ -1,27 +1,80 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
from typing import List
|
from typing import List
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
|
||||||
|
|
||||||
class AnnotationRect:
|
class AnnotationRect:
|
||||||
"""Exercise 3.1"""
|
"""Exercise 3.1"""
|
||||||
|
|
||||||
def __init__(self, x1, y1, x2, y2):
|
def __init__(self, x1, y1, x2, y2):
|
||||||
raise NotImplementedError()
|
self.x1 = x1
|
||||||
|
self.x2 = x2
|
||||||
|
self.y1 = y1
|
||||||
|
self.y2 = y2
|
||||||
|
|
||||||
def area(self):
|
def area(self):
|
||||||
raise NotImplementedError()
|
return (self.x2 - self.x1) * (self.y2 - self.y1)
|
||||||
|
|
||||||
def __array__(self) -> np.ndarray:
|
def __array__(self) -> np.ndarray:
|
||||||
raise NotImplementedError()
|
return np.array([self.x1, self.y1, self.x2, self.y2])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def fromarray(arr: np.ndarray):
|
def fromarray(arr: np.ndarray):
|
||||||
raise NotImplementedError()
|
return AnnotationRect(arr[0], arr[1], arr[2], arr[3])
|
||||||
|
|
||||||
|
|
||||||
def read_groundtruth_file(path: str) -> List[AnnotationRect]:
|
def read_groundtruth_file(path: str) -> List[AnnotationRect]:
|
||||||
"""Exercise 3.1b"""
|
"""Exercise 3.1b"""
|
||||||
raise NotImplementedError()
|
annotationRects = []
|
||||||
|
with open(path, 'r') as file:
|
||||||
|
for line in file:
|
||||||
|
if line.strip():
|
||||||
|
values = line.strip().split()
|
||||||
|
annotationRects.append(AnnotationRect(float(values[0]), float(
|
||||||
|
values[1]), float(values[2]), float(values[3])))
|
||||||
|
return annotationRects
|
||||||
|
|
||||||
|
|
||||||
# put your solution for exercise 3.1c wherever you deem it right
|
def get_image_with_max_annotations(dir_path: str) -> str:
|
||||||
|
img_pattern = re.compile(r'^(\d+)\.jpg$')
|
||||||
|
files = set(os.listdir(dir_path))
|
||||||
|
max_file = None
|
||||||
|
max_annotations = 0
|
||||||
|
|
||||||
|
for fname in files:
|
||||||
|
match = img_pattern.match(fname)
|
||||||
|
if match:
|
||||||
|
img_file = os.path.join(dir_path, fname)
|
||||||
|
annotations_number = len(read_groundtruth_file(os.path.join(
|
||||||
|
dir_path, f"{match.group(1)}.gt_data.txt")))
|
||||||
|
if (annotations_number > max_annotations):
|
||||||
|
max_file = img_file
|
||||||
|
max_annotations = annotations_number
|
||||||
|
return max_file
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_image(image_path: str, output_path='output.jpg', rect_color=(255, 0, 0), width=2):
|
||||||
|
img_pattern = re.compile(r'(.*)(\.jpg)')
|
||||||
|
match = img_pattern.match(image_path)
|
||||||
|
annotations = read_groundtruth_file(f"{match.group(1)}.gt_data.txt")
|
||||||
|
|
||||||
|
img = Image.open(image_path).convert('RGB')
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
|
||||||
|
for annotation in annotations:
|
||||||
|
draw.rectangle([annotation.x1, annotation.y1, annotation.x2, annotation.y2],
|
||||||
|
outline=rect_color, width=width)
|
||||||
|
|
||||||
|
img.save(output_path)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
image_file = get_image_with_max_annotations(
|
||||||
|
"/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2/train")
|
||||||
|
visualize_image(image_file)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
from PIL import Image
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
from .annotation import read_groundtruth_file
|
||||||
|
from torchvision import transforms
|
||||||
|
|
||||||
|
|
||||||
class MMP_Dataset(torch.utils.data.Dataset):
|
class MMP_Dataset(torch.utils.data.Dataset):
|
||||||
@@ -11,19 +16,61 @@ class MMP_Dataset(torch.utils.data.Dataset):
|
|||||||
@param path_to_data: Path to the folder that contains the images and annotation files, e.g. dataset_mmp/train
|
@param path_to_data: Path to the folder that contains the images and annotation files, e.g. dataset_mmp/train
|
||||||
@param image_size: Desired image size that this dataset should return
|
@param image_size: Desired image size that this dataset should return
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
self.image_size = image_size
|
||||||
|
img_pattern = re.compile(r'^(\d+)\.jpg$')
|
||||||
|
files = set(os.listdir(path_to_data))
|
||||||
|
self.images = []
|
||||||
|
|
||||||
|
for fname in files:
|
||||||
|
match = img_pattern.match(fname)
|
||||||
|
if match:
|
||||||
|
img_file = os.path.join(path_to_data, fname)
|
||||||
|
annotations = read_groundtruth_file(os.path.join(
|
||||||
|
path_to_data, f"{match.group(1)}.gt_data.txt"))
|
||||||
|
self.images.append((img_file, annotations))
|
||||||
|
|
||||||
|
self.images.sort(key=lambda x: int(
|
||||||
|
re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2)))
|
||||||
|
|
||||||
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
|
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
|
||||||
"""
|
"""
|
||||||
@return: Tuple of image tensor and label. The label is 0 if there is one person and 1 if there a multiple people.
|
@return: Tuple of image tensor and label. The label is 0 if there is one person and 1 if there a multiple people.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
img = Image.open(self.images[idx][0]).convert("RGB")
|
||||||
|
padding = self.__padding__(img)
|
||||||
|
transform = transforms.Compose([
|
||||||
|
transforms.Pad(padding, 0),
|
||||||
|
transforms.Resize((self.image_size, self.image_size)),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(
|
||||||
|
mean=[0.485, 0.456, 0.406],
|
||||||
|
std=[0.229, 0.224, 0.225]
|
||||||
|
)
|
||||||
|
])
|
||||||
|
return (transform(img), 1 if len(self.images[idx][1]) > 1 else 0)
|
||||||
|
|
||||||
|
def __padding__(self, img) -> Tuple[int, int, int, int]:
|
||||||
|
w, h = img.size
|
||||||
|
size = max(w, h)
|
||||||
|
right_pad = size - w
|
||||||
|
bottom_pad = size - h
|
||||||
|
return (0, 0, right_pad, bottom_pad)
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
raise NotImplementedError()
|
return len(self.images)
|
||||||
|
|
||||||
|
|
||||||
def get_dataloader(
|
def get_dataloader(
|
||||||
path_to_data: str, image_size: int, batch_size: int, num_workers: int, is_train: bool = True
|
path_to_data: str, image_size: int, batch_size: int, num_workers: int, is_train: bool = True
|
||||||
) -> DataLoader:
|
) -> DataLoader:
|
||||||
|
|
||||||
"""Exercise 3.2d"""
|
"""Exercise 3.2d"""
|
||||||
|
path = os.path.join(path_to_data, "train") if is_train else os.path.join(
|
||||||
|
path_to_data, "val")
|
||||||
|
dataset = MMP_Dataset(path_to_data=path, image_size=image_size)
|
||||||
|
dataloader = DataLoader(
|
||||||
|
dataset, batch_size=batch_size,
|
||||||
|
shuffle=is_train,
|
||||||
|
num_workers=num_workers,
|
||||||
|
pin_memory=True
|
||||||
|
)
|
||||||
|
return dataloader
|
||||||
|
|||||||
BIN
mmp/a3/document.pdf
Normal file
BIN
mmp/a3/document.pdf
Normal file
Binary file not shown.
80
mmp/a3/document.tex
Normal file
80
mmp/a3/document.tex
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
\documentclass[11pt,a4paper]{article}
|
||||||
|
|
||||||
|
% Language and encoding settings
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage[english]{babel}
|
||||||
|
|
||||||
|
% Page formatting
|
||||||
|
\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
|
||||||
|
\usepackage{setspace}
|
||||||
|
\onehalfspacing
|
||||||
|
|
||||||
|
% Header/Footer
|
||||||
|
\usepackage{fancyhdr}
|
||||||
|
\pagestyle{fancy}
|
||||||
|
\fancyhf{} % clear all header and footer fields
|
||||||
|
\fancyhead[L]{\textbf{\course}}
|
||||||
|
\fancyhead[C]{Assignment \assignmentnumber}
|
||||||
|
\fancyhead[R]{\name}
|
||||||
|
\fancyfoot[C]{\thepage}
|
||||||
|
|
||||||
|
% Other packages
|
||||||
|
\usepackage{enumitem}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
|
||||||
|
% Custom commands for easy detail insertion
|
||||||
|
\newcommand{\assignmentnumber}{03} % <-- CHANGE Assignment Number
|
||||||
|
\newcommand{\name}{Simon Franken} % <-- CHANGE YOUR NAME
|
||||||
|
\newcommand{\course}{Multimedia Project WiSe 2526} % <-- CHANGE COURSE NAME
|
||||||
|
\newcommand{\duedate}{2025-11-05} % <-- CHANGE DUE DATE
|
||||||
|
|
||||||
|
% Title formatting
|
||||||
|
\usepackage{titling}
|
||||||
|
\pretitle{
|
||||||
|
\vspace*{2cm}
|
||||||
|
\begin{center}
|
||||||
|
\LARGE\bfseries
|
||||||
|
}
|
||||||
|
\posttitle{\par\end{center}\vspace{1cm}}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\title{Assignment \assignmentnumber}
|
||||||
|
\author{\name}
|
||||||
|
\date{\duedate}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\textbf{Course:} \course
|
||||||
|
\end{center}
|
||||||
|
\vspace{0.5cm}
|
||||||
|
|
||||||
|
%------------------ START OF ASSIGNMENT -----------------------
|
||||||
|
|
||||||
|
% Write your solutions below
|
||||||
|
|
||||||
|
\section*{Exercise 3.1 Dataset Parsing}
|
||||||
|
|
||||||
|
\begin{enumerate}[label=\alph*)]
|
||||||
|
\setcounter{enumi}{2}
|
||||||
|
\item \begin{figure}[htp]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=4cm]{output.jpg}
|
||||||
|
\caption{02254418.jpg with 18 annotations}
|
||||||
|
\end{figure}
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
\section*{Exercise 3.3 Training}
|
||||||
|
\begin{tabular}{|c||c|}
|
||||||
|
\hline Batch size & 32 \\
|
||||||
|
\hline Training epoches & 10 \\
|
||||||
|
\hline Loss & 0.3719 \\
|
||||||
|
\hline Accuracy & 78.90 \% \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
|
||||||
|
%------------------ END OF ASSIGNMENT -----------------------
|
||||||
|
|
||||||
|
\end{document}
|
||||||
@@ -1,6 +1,57 @@
|
|||||||
|
import torch
|
||||||
|
import argparse
|
||||||
|
from ..a2.main import MmpNet, get_criterion_optimizer, train_epoch, eval_epoch
|
||||||
|
from .dataset import get_dataloader
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Put your code for Exercise 3.3 in here"""
|
"""Put your code for Exercise 3.3 in here"""
|
||||||
raise NotImplementedError()
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--tensorboard', action='store_true',
|
||||||
|
help='Enable TensorBoard logging')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
train_epochs = 10
|
||||||
|
model = MmpNet(num_classes=2).to(device=device)
|
||||||
|
dataloader_train = get_dataloader(
|
||||||
|
path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2",
|
||||||
|
image_size=244, batch_size=32, num_workers=6, is_train=True
|
||||||
|
)
|
||||||
|
dataloader_eval = get_dataloader(
|
||||||
|
path_to_data="/home/ubuntu/mmp_wise2526_franksim/.data/mmp-public-3.2",
|
||||||
|
image_size=244, batch_size=32, num_workers=6, is_train=False
|
||||||
|
)
|
||||||
|
criterion, optimizer = get_criterion_optimizer(model=model)
|
||||||
|
|
||||||
|
writer = None
|
||||||
|
if args.tensorboard:
|
||||||
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
writer = SummaryWriter(log_dir="runs/a3_mmpnet")
|
||||||
|
|
||||||
|
for epoch in range(train_epochs):
|
||||||
|
train_loss = train_epoch(
|
||||||
|
model=model,
|
||||||
|
loader=dataloader_train,
|
||||||
|
optimizer=optimizer,
|
||||||
|
device=device,
|
||||||
|
criterion=criterion,
|
||||||
|
)
|
||||||
|
val_acc = eval_epoch(
|
||||||
|
model=model,
|
||||||
|
loader=dataloader_eval,
|
||||||
|
device=device
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"Epoch [{epoch+1}/{train_epochs}] - Train Loss: {train_loss:.4f} - Val Acc: {val_acc:.4f}")
|
||||||
|
|
||||||
|
if writer is not None:
|
||||||
|
writer.add_scalar("Loss/train", train_loss, epoch)
|
||||||
|
writer.add_scalar("Accuracy/val", val_acc, epoch)
|
||||||
|
|
||||||
|
if writer is not None:
|
||||||
|
writer.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
BIN
mmp/a3/output.jpg
Normal file
BIN
mmp/a3/output.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 54 KiB |
Reference in New Issue
Block a user