Compare commits
10 Commits
93d752209f
...
a6f70005f2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6f70005f2 | ||
|
|
3b6a588719 | ||
|
|
c50d9e83b8 | ||
|
|
f21fb57303 | ||
|
|
edbad414e2 | ||
|
|
721e46b768 | ||
|
|
56e21a1e54 | ||
|
|
7245042b54 | ||
|
|
a00ddedb23 | ||
|
|
5c8e06f62f |
@@ -25,6 +25,7 @@ class AnnotationRect:
|
||||
self.x2 *= factor
|
||||
self.y1 *= factor
|
||||
self.y2 *= factor
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def fromarray(arr: np.ndarray):
|
||||
|
||||
BIN
mmp/a4/.output/2242079_transformed.png
Normal file
|
After Width: | Height: | Size: 180 KiB |
BIN
mmp/a4/.output/2245201_transformed.png
Normal file
|
After Width: | Height: | Size: 141 KiB |
BIN
mmp/a4/.output/2245364_transformed.png
Normal file
|
After Width: | Height: | Size: 140 KiB |
BIN
mmp/a4/.output/2246825_transformed.png
Normal file
|
After Width: | Height: | Size: 241 KiB |
BIN
mmp/a4/.output/2247504_transformed.png
Normal file
|
After Width: | Height: | Size: 202 KiB |
BIN
mmp/a4/.output/2251738_transformed.png
Normal file
|
After Width: | Height: | Size: 159 KiB |
BIN
mmp/a4/.output/2254896_transformed.png
Normal file
|
After Width: | Height: | Size: 152 KiB |
BIN
mmp/a4/.output/2258469_transformed.png
Normal file
|
After Width: | Height: | Size: 260 KiB |
BIN
mmp/a4/.output/2259787_transformed.png
Normal file
|
After Width: | Height: | Size: 226 KiB |
BIN
mmp/a4/.output/2262766_transformed.png
Normal file
|
After Width: | Height: | Size: 214 KiB |
BIN
mmp/a4/.output/2265095_transformed.png
Normal file
|
After Width: | Height: | Size: 238 KiB |
BIN
mmp/a4/.output/2265907_transformed.png
Normal file
|
After Width: | Height: | Size: 223 KiB |
|
Before Width: | Height: | Size: 285 KiB |
|
Before Width: | Height: | Size: 71 KiB |
|
Before Width: | Height: | Size: 429 KiB |
|
Before Width: | Height: | Size: 58 KiB |
|
Before Width: | Height: | Size: 141 KiB |
|
Before Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 252 KiB |
|
Before Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 192 KiB |
|
Before Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 150 KiB |
|
Before Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 243 KiB |
|
Before Width: | Height: | Size: 56 KiB |
|
Before Width: | Height: | Size: 432 KiB |
|
Before Width: | Height: | Size: 112 KiB |
|
Before Width: | Height: | Size: 234 KiB |
|
Before Width: | Height: | Size: 29 KiB |
|
Before Width: | Height: | Size: 274 KiB |
|
Before Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 349 KiB |
|
Before Width: | Height: | Size: 112 KiB |
|
Before Width: | Height: | Size: 257 KiB |
|
Before Width: | Height: | Size: 51 KiB |
@@ -5,7 +5,7 @@ import numpy as np
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from ..a3.annotation import read_groundtruth_file, AnnotationRect
|
||||
from .label_grid import get_label_grid, draw_annotation_rects
|
||||
from .label_grid import get_label_grid
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
from .anchor_grid import get_anchor_grid
|
||||
@@ -29,10 +29,11 @@ class MMP_Dataset(torch.utils.data.Dataset):
|
||||
@param is_test: Whether this is the test set (True) or the validation/training set (False)
|
||||
"""
|
||||
self.image_size = image_size
|
||||
self.images: Sequence[Tuple[str, Sequence[AnnotationRect]]] = []
|
||||
self.images: Sequence[Tuple[str, str | None]] = []
|
||||
self.anchor_grid = anchor_grid
|
||||
self.min_iou = min_iou
|
||||
self.is_test = is_test
|
||||
self.path_to_data = path_to_data
|
||||
img_pattern = re.compile(r"^(\d+)\.jpg$")
|
||||
files = set(os.listdir(path_to_data))
|
||||
|
||||
@@ -42,11 +43,10 @@ class MMP_Dataset(torch.utils.data.Dataset):
|
||||
img_file = os.path.join(path_to_data, fname)
|
||||
if is_test:
|
||||
self.images.append((img_file, None))
|
||||
else:
|
||||
annotations = read_groundtruth_file(
|
||||
os.path.join(path_to_data, f"{match.group(1)}.gt_data.txt")
|
||||
annotation_file = os.path.join(
|
||||
path_to_data, f"{match.group(1)}.gt_data.txt"
|
||||
)
|
||||
self.images.append((img_file, annotations))
|
||||
self.images.append((img_file, annotation_file))
|
||||
|
||||
self.images.sort(
|
||||
key=lambda x: int(re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2))
|
||||
@@ -73,13 +73,13 @@ class MMP_Dataset(torch.utils.data.Dataset):
|
||||
if self.is_test:
|
||||
return (img_tensor, torch.Tensor(), int(img_id))
|
||||
|
||||
scaled_annotations = []
|
||||
for annotation in self.images[idx][1]:
|
||||
annotations = [
|
||||
annotation.scale(self.image_size / max(img.size[0], img.size[1]))
|
||||
scaled_annotations.append(annotation)
|
||||
for annotation in read_groundtruth_file(self.images[idx][1])
|
||||
]
|
||||
|
||||
label_grid = get_label_grid(
|
||||
anchor_grid=self.anchor_grid, gts=scaled_annotations, min_iou=self.min_iou
|
||||
anchor_grid=self.anchor_grid, gts=annotations, min_iou=self.min_iou
|
||||
)
|
||||
return (img_tensor, label_grid, int(img_id))
|
||||
|
||||
@@ -120,21 +120,93 @@ def get_dataloader(
|
||||
return dataloader
|
||||
|
||||
|
||||
def calculate_max_coverage(loader: DataLoader, min_iou: float) -> float:
|
||||
def calculate_max_coverage(loader, min_iou):
|
||||
"""
|
||||
@param loader: A DataLoader object, generated with the get_dataloader function.
|
||||
@param min_iou: Minimum IoU overlap that is required to count a ground truth box as covered.
|
||||
@return: Ratio of how mamy ground truth boxes are covered by a label grid box. Must be a value between 0 and 1.
|
||||
@param loader: DataLoader object.
|
||||
@param min_iou: Minimum IoU overlap to count a ground truth box as covered.
|
||||
@return: Ratio of how many ground truth boxes are covered by a label grid box. Value between 0 and 1.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
total_boxes = 0
|
||||
covered_boxes = 0
|
||||
dataset = loader.dataset
|
||||
anchor_grid = dataset.anchor_grid # Shape: (H, W, 4)
|
||||
|
||||
# Reshape anchor grid to (N, 4)
|
||||
anchors = anchor_grid.reshape(-1, 4)
|
||||
|
||||
for img, _, img_id in loader:
|
||||
for batch_index in range(len(img)):
|
||||
gts_file = os.path.join(
|
||||
dataset.path_to_data,
|
||||
f"{str(img_id[batch_index].item()).zfill(8)}.gt_data.txt",
|
||||
)
|
||||
# Load and scale ground truth boxes if necessary
|
||||
with Image.open(
|
||||
os.path.join(
|
||||
dataset.path_to_data,
|
||||
f"{str(img_id[batch_index].item()).zfill(8)}.jpg",
|
||||
)
|
||||
) as original_image:
|
||||
original_w, original_h = original_image.size
|
||||
# Assume square resize for model, get transform size from img tensor
|
||||
transformed_size = img[batch_index].shape[-1]
|
||||
scale = transformed_size / max(original_w, original_h)
|
||||
|
||||
annotations = [
|
||||
annotation.scale(scale)
|
||||
for annotation in read_groundtruth_file(gts_file)
|
||||
]
|
||||
|
||||
gt_boxes = np.stack(
|
||||
[np.array(a) for a in annotations], axis=0
|
||||
) # shape (M, 4)
|
||||
total_boxes += len(gt_boxes)
|
||||
|
||||
# Vectorized IoU calculation: (M, N)
|
||||
ious = compute_ious_vectorized(gt_boxes, anchors) # shape (M, N)
|
||||
|
||||
# Count ground truths for which any anchor box matches min_iou
|
||||
covered = (ious >= min_iou).any(axis=1).sum()
|
||||
covered_boxes += covered
|
||||
|
||||
return covered_boxes / total_boxes if total_boxes > 0 else 0.0
|
||||
|
||||
|
||||
def print_img_tensor_with_annotations(
|
||||
img: torch.Tensor, annotations: Sequence["AnnotationRect"], output_file: str
|
||||
def compute_ious_vectorized(boxes1, boxes2):
|
||||
"""
|
||||
Compute the IoU matrix between each box in boxes1 and each box in boxes2.
|
||||
boxes1: (M, 4), boxes2: (N, 4) -- format [x1, y1, x2, y2]
|
||||
Returns: (M, N) IoU
|
||||
"""
|
||||
|
||||
# Expand to (M, N, 4)
|
||||
boxes1 = boxes1[:, None, :] # (M, 1, 4)
|
||||
boxes2 = boxes2[None, :, :] # (1, N, 4)
|
||||
|
||||
# Intersection box
|
||||
inter_x1 = np.maximum(boxes1[..., 0], boxes2[..., 0])
|
||||
inter_y1 = np.maximum(boxes1[..., 1], boxes2[..., 1])
|
||||
inter_x2 = np.minimum(boxes1[..., 2], boxes2[..., 2])
|
||||
inter_y2 = np.minimum(boxes1[..., 3], boxes2[..., 3])
|
||||
inter_w = np.clip(inter_x2 - inter_x1, 0, None)
|
||||
inter_h = np.clip(inter_y2 - inter_y1, 0, None)
|
||||
inter_area = inter_w * inter_h
|
||||
|
||||
area1 = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
|
||||
area2 = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
|
||||
union_area = area1 + area2 - inter_area
|
||||
|
||||
return inter_area / (union_area + 1e-6)
|
||||
|
||||
|
||||
def draw_image_tensor_with_annotations(
|
||||
img: torch.Tensor,
|
||||
annotations: Sequence["AnnotationRect"] | None,
|
||||
output_file: str,
|
||||
):
|
||||
# Convert tensor to numpy, permute dimensions
|
||||
img_np = img.permute(1, 2, 0).cpu().numpy()
|
||||
img_np = img_np.astype(np.uint8)
|
||||
img_np = img.permute(1, 2, 0).numpy()
|
||||
img_np = np.clip(img_np, 0, 1)
|
||||
|
||||
fig, ax = plt.subplots(1)
|
||||
ax.imshow(img_np)
|
||||
@@ -152,40 +224,43 @@ def print_img_tensor_with_annotations(
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def print_positive_boxes(
|
||||
def denormalize_image_tensor(
|
||||
img: torch.Tensor,
|
||||
mean=torch.tensor([0.485, 0.456, 0.406]).view(-1, 1, 1),
|
||||
std=torch.tensor([0.229, 0.224, 0.225]).view(-1, 1, 1),
|
||||
) -> torch.Tensor:
|
||||
img_denormalized = img * std + mean
|
||||
return img_denormalized
|
||||
|
||||
|
||||
def draw_positive_boxes(
|
||||
img_tensor: torch.Tensor,
|
||||
label_grid: np.ndarray,
|
||||
img_id: torch.Tensor,
|
||||
anchor_grid: np.ndarray,
|
||||
path_to_data: str,
|
||||
):
|
||||
annotations = [
|
||||
AnnotationRect.fromarray(anchor_grid[idx])
|
||||
for idx in np.ndindex(anchor_grid.shape[:-1])
|
||||
if label_grid[idx]
|
||||
]
|
||||
print_img_tensor_with_annotations(
|
||||
draw_image_tensor_with_annotations(
|
||||
img_tensor,
|
||||
annotations=annotations,
|
||||
output_file=f"mmp/a4/{img_id}_transformed.png",
|
||||
)
|
||||
draw_annotation_rects(
|
||||
annotations=annotations,
|
||||
image=f"{os.path.join(path_to_data, f'{str(img_id.item()).zfill(8)}.jpg')}",
|
||||
output_path=f"mmp/a4/{img_id}_original.png",
|
||||
output_file=f"mmp/a4/.output/{img_id}_transformed.png",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
anchor_grid = get_anchor_grid(
|
||||
anchor_widths=[16, 32, 64, 96, 128, 144, 150, 160, 192, 224, 256],
|
||||
aspect_ratios=[1 / 3, 1 / 2, 3 / 5, 2 / 3, 3 / 4, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
|
||||
num_rows=32,
|
||||
num_cols=32,
|
||||
scale_factor=20,
|
||||
anchor_widths=[8, 16, 32, 64, 96, 128, 160, 192],
|
||||
aspect_ratios=[1 / 2, 2 / 3, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
|
||||
num_rows=28,
|
||||
num_cols=28,
|
||||
scale_factor=8,
|
||||
)
|
||||
dataloader = get_dataloader(
|
||||
num_workers=6,
|
||||
num_workers=9,
|
||||
is_train=True,
|
||||
is_test=False,
|
||||
batch_size=8,
|
||||
@@ -194,13 +269,14 @@ def main():
|
||||
anchor_grid=anchor_grid,
|
||||
)
|
||||
|
||||
# print(calculate_coverage(dataloader, 0.7))
|
||||
|
||||
for img, label, img_id in islice(dataloader, 12):
|
||||
print_positive_boxes(
|
||||
img_tensor=img[5],
|
||||
draw_positive_boxes(
|
||||
img_tensor=denormalize_image_tensor(img=img[5]),
|
||||
label_grid=label[5],
|
||||
img_id=img_id[5],
|
||||
anchor_grid=anchor_grid,
|
||||
path_to_data=".data/mmp-public-3.2/train",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -65,6 +65,14 @@
|
||||
\end{figure}
|
||||
\end{enumerate}
|
||||
|
||||
\section*{Exercise 4.3 Finalizing the Data Pipeline}
|
||||
\begin{enumerate}[label=\alph*)]
|
||||
\setcounter{enumi}{2}
|
||||
\item The generated images can be found at `.output/`.
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
|
||||
%------------------ END OF ASSIGNMENT -----------------------
|
||||
|
||||
\end{document}
|
||||
BIN
mmp/a5/document.pdf
Normal file
82
mmp/a5/document.tex
Normal file
@@ -0,0 +1,82 @@
|
||||
\documentclass[11pt,a4paper]{article}
|
||||
|
||||
% Language and encoding settings
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[english]{babel}
|
||||
|
||||
% Page formatting
|
||||
\usepackage[left=1in, right=1in, top=1in, bottom=1in]{geometry}
|
||||
\usepackage{setspace}
|
||||
\onehalfspacing
|
||||
|
||||
% Header/Footer
|
||||
\usepackage{fancyhdr}
|
||||
\pagestyle{fancy}
|
||||
\fancyhf{} % clear all header and footer fields
|
||||
\fancyhead[L]{\textbf{\course}}
|
||||
\fancyhead[C]{Assignment \assignmentnumber}
|
||||
\fancyhead[R]{\name}
|
||||
\fancyfoot[C]{\thepage}
|
||||
|
||||
% Other packages
|
||||
\usepackage{enumitem}
|
||||
\usepackage{graphicx}
|
||||
|
||||
% Custom commands for easy detail insertion
|
||||
\newcommand{\assignmentnumber}{05} % <-- CHANGE Assignment Number
|
||||
\newcommand{\name}{Simon Franken} % <-- CHANGE YOUR NAME
|
||||
\newcommand{\course}{Multimedia Project WiSe 2526} % <-- CHANGE COURSE NAME
|
||||
\newcommand{\duedate}{2025-11-26} % <-- CHANGE DUE DATE
|
||||
|
||||
% Title formatting
|
||||
\usepackage{titling}
|
||||
\pretitle{
|
||||
\vspace*{2cm}
|
||||
\begin{center}
|
||||
\LARGE\bfseries
|
||||
}
|
||||
\posttitle{\par\end{center}\vspace{1cm}}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\title{Assignment \assignmentnumber}
|
||||
\author{\name}
|
||||
\date{\duedate}
|
||||
|
||||
\maketitle
|
||||
|
||||
\begin{center}
|
||||
\textbf{Course:} \course
|
||||
\end{center}
|
||||
\vspace{0.5cm}
|
||||
|
||||
%------------------ START OF ASSIGNMENT -----------------------
|
||||
|
||||
% Write your solutions below
|
||||
|
||||
\section*{Exercise 5.2 Training}
|
||||
\begin{figure}[htp]
|
||||
\centering
|
||||
\includegraphics[width=14cm]{image0.png}
|
||||
\end{figure}
|
||||
|
||||
\section*{Exercise 5.3 Negative Mining}
|
||||
\begin{figure}[htp]
|
||||
\centering
|
||||
\includegraphics[width=14cm]{image1.jpg}
|
||||
\includegraphics[width=14cm]{image2.png}
|
||||
|
||||
\end{figure}
|
||||
|
||||
The idea of negative mining is, to get a certain balance between positive and negative labels.
|
||||
Especially in our case it is important, since there are a lot more negative boxes then positive ones.
|
||||
By sampling a random subset of negatives.
|
||||
In my case I could observe that the loss was actually higher with negative mining. But the accuracy was better.
|
||||
|
||||
|
||||
|
||||
|
||||
%------------------ END OF ASSIGNMENT -----------------------
|
||||
|
||||
\end{document}
|
||||
BIN
mmp/a5/image0.png
Normal file
|
After Width: | Height: | Size: 34 KiB |
BIN
mmp/a5/image1.jpg
Normal file
|
After Width: | Height: | Size: 80 KiB |
BIN
mmp/a5/image2.png
Normal file
|
After Width: | Height: | Size: 95 KiB |
207
mmp/a5/main.py
@@ -1,7 +1,16 @@
|
||||
import argparse
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm
|
||||
import datetime
|
||||
|
||||
from .model import MmpNet
|
||||
from ..a4.anchor_grid import get_anchor_grid
|
||||
from ..a4.dataset import get_dataloader
|
||||
from ..a2.main import get_criterion_optimizer
|
||||
from ..a6.main import evaluate as evaluate_v2
|
||||
|
||||
|
||||
def step(
|
||||
@@ -11,11 +20,19 @@ def step(
|
||||
img_batch: torch.Tensor,
|
||||
lbl_batch: torch.Tensor,
|
||||
) -> float:
|
||||
"""Performs one update step for the model
|
||||
model.train()
|
||||
optimizer.zero_grad()
|
||||
|
||||
@return: The loss for the specified batch. Return a float and not a PyTorch tensor
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
device = next(model.parameters()).device
|
||||
img_batch = img_batch.to(device)
|
||||
lbl_batch = lbl_batch.to(device)
|
||||
|
||||
outputs = model(img_batch)
|
||||
loss = criterion(outputs, lbl_batch)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
return loss.item()
|
||||
|
||||
|
||||
def get_random_sampling_mask(labels: torch.Tensor, neg_ratio: float) -> torch.Tensor:
|
||||
@@ -26,13 +43,187 @@ def get_random_sampling_mask(labels: torch.Tensor, neg_ratio: float) -> torch.Te
|
||||
Hint: after computing the mask, check if the neg_ratio is fulfilled.
|
||||
@return: A tensor with the same shape as labels
|
||||
"""
|
||||
assert labels.min() >= 0 and labels.max() <= 1 # remove this line if you want
|
||||
raise NotImplementedError()
|
||||
# Flatten for easier indexing
|
||||
labels_flat = labels.view(-1)
|
||||
pos_indices = (labels_flat == 1).nonzero(as_tuple=True)[0]
|
||||
neg_indices = (labels_flat == 0).nonzero(as_tuple=True)[0]
|
||||
|
||||
num_pos = pos_indices.numel()
|
||||
num_neg = neg_indices.numel()
|
||||
|
||||
num_neg_to_sample = min(int(neg_ratio * num_pos), num_neg)
|
||||
|
||||
perm = torch.randperm(num_neg, device=labels.device)
|
||||
sampled_neg_indices = neg_indices[perm[:num_neg_to_sample]]
|
||||
|
||||
mask_flat = torch.zeros_like(labels_flat, dtype=torch.long)
|
||||
mask_flat[pos_indices] = 1
|
||||
mask_flat[sampled_neg_indices] = 1
|
||||
|
||||
# Reshape to original shape
|
||||
mask = mask_flat.view_as(labels)
|
||||
return mask
|
||||
|
||||
|
||||
def evaluate(
|
||||
model: MmpNet,
|
||||
criterion,
|
||||
dataloader: DataLoader,
|
||||
) -> float:
|
||||
device = next(model.parameters()).device
|
||||
model.eval()
|
||||
total_loss = 0.0
|
||||
total_samples = 0
|
||||
all_outputs = []
|
||||
all_labels = []
|
||||
with torch.no_grad():
|
||||
for img_batch, lbl_batch, _ in dataloader:
|
||||
img_batch = img_batch.to(device)
|
||||
lbl_batch = lbl_batch.to(device)
|
||||
|
||||
outputs = model(img_batch)
|
||||
loss = criterion(outputs, lbl_batch)
|
||||
batch_size = img_batch.size(0)
|
||||
total_loss += loss.item() * batch_size
|
||||
total_samples += batch_size
|
||||
|
||||
all_outputs.append(outputs.cpu())
|
||||
all_labels.append(lbl_batch.cpu())
|
||||
avg_loss = total_loss / total_samples if total_samples > 0 else 0.0
|
||||
return avg_loss
|
||||
|
||||
|
||||
def train(
|
||||
model: MmpNet,
|
||||
loader: DataLoader,
|
||||
criterion: nn.Module,
|
||||
optimizer: optim.Optimizer,
|
||||
):
|
||||
model.train()
|
||||
running_loss = 0.0
|
||||
total_samples = 0
|
||||
|
||||
progress_bar = tqdm(loader, desc="Training", unit="batch")
|
||||
|
||||
for img_batch, lbl_batch, _ in progress_bar:
|
||||
loss = step(
|
||||
model=model,
|
||||
criterion=criterion,
|
||||
optimizer=optimizer,
|
||||
img_batch=img_batch,
|
||||
lbl_batch=lbl_batch,
|
||||
)
|
||||
batch_size = img_batch.size(0)
|
||||
running_loss += loss * batch_size
|
||||
total_samples += batch_size
|
||||
progress_bar.set_postfix(
|
||||
{"loss": running_loss / total_samples if total_samples > 0 else 0.0}
|
||||
)
|
||||
|
||||
epoch_loss = running_loss / total_samples if total_samples > 0 else 0.0
|
||||
progress_bar.close()
|
||||
return epoch_loss
|
||||
|
||||
|
||||
class NegativeMiningCriterion(nn.Module):
|
||||
def __init__(self, neg_ratio=3.0, enable_negative_mining: bool = True):
|
||||
super().__init__()
|
||||
self.backbone = nn.CrossEntropyLoss(reduction="none")
|
||||
self.neg_ratio = neg_ratio
|
||||
self.enable_negative_mining = enable_negative_mining
|
||||
|
||||
def forward(self, outputs, labels):
|
||||
outputs_flat = outputs.view(-1, outputs.shape[-1])
|
||||
labels_flat = labels.view(-1).long()
|
||||
unfiltered = self.backbone(outputs_flat, labels_flat)
|
||||
assert unfiltered.shape == labels_flat.shape
|
||||
|
||||
if not self.enable_negative_mining:
|
||||
return unfiltered.mean()
|
||||
|
||||
mask = get_random_sampling_mask(labels_flat, self.neg_ratio)
|
||||
filtered_loss = unfiltered[mask == 1]
|
||||
return filtered_loss.mean()
|
||||
|
||||
|
||||
def main():
|
||||
"""Put your training code for exercises 5.2 and 5.3 here"""
|
||||
raise NotImplementedError()
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--tensorboard",
|
||||
nargs="?",
|
||||
const=True,
|
||||
default=False,
|
||||
help="Enable TensorBoard logging. If a label is provided, it will be used in the log directory name.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.tensorboard:
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
if isinstance(args.tensorboard, str):
|
||||
label = args.tensorboard
|
||||
log_dir = f"runs/a5_mmpnet_{label}_{timestamp}"
|
||||
else:
|
||||
log_dir = f"runs/a5_mmpnet_{timestamp}"
|
||||
writer = SummaryWriter(log_dir=log_dir)
|
||||
else:
|
||||
writer = None
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model = MmpNet(num_aspect_ratios=8, num_widths=8).to(device)
|
||||
|
||||
anchor_grid = get_anchor_grid(
|
||||
anchor_widths=[8, 16, 32, 64, 96, 128, 160, 192],
|
||||
aspect_ratios=[1 / 2, 2 / 3, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
|
||||
num_rows=7,
|
||||
num_cols=7,
|
||||
scale_factor=32,
|
||||
)
|
||||
|
||||
dataloader_train = get_dataloader(
|
||||
path_to_data=".data/mmp-public-3.2/train",
|
||||
image_size=224,
|
||||
batch_size=32,
|
||||
num_workers=9,
|
||||
is_test=False,
|
||||
is_train=True,
|
||||
anchor_grid=anchor_grid,
|
||||
)
|
||||
dataloader_val = get_dataloader(
|
||||
path_to_data=".data/mmp-public-3.2/val",
|
||||
image_size=224,
|
||||
batch_size=32,
|
||||
num_workers=9,
|
||||
is_test=False,
|
||||
is_train=False,
|
||||
anchor_grid=anchor_grid,
|
||||
)
|
||||
_, optimizer = get_criterion_optimizer(model=model)
|
||||
criterion = NegativeMiningCriterion(enable_negative_mining=True)
|
||||
criterion_eval = NegativeMiningCriterion(enable_negative_mining=False)
|
||||
num_epochs = 5
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
train_loss = train(
|
||||
model=model,
|
||||
loader=dataloader_train,
|
||||
criterion=criterion,
|
||||
optimizer=optimizer,
|
||||
)
|
||||
avg_loss = evaluate(
|
||||
model=model, criterion=criterion_eval, dataloader=dataloader_val
|
||||
)
|
||||
_ = evaluate_v2(
|
||||
model=model, device=device, anchor_grid=anchor_grid, loader=dataloader_train
|
||||
)
|
||||
|
||||
if writer is not None:
|
||||
writer.add_scalar("Loss/train_epoch", train_loss, epoch)
|
||||
writer.add_scalar("Loss/eval_epoch", avg_loss, epoch)
|
||||
|
||||
if writer is not None:
|
||||
writer.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,9 +1,40 @@
|
||||
import torch
|
||||
from torchvision import models
|
||||
from torchvision.models import MobileNet_V2_Weights
|
||||
from torch import nn
|
||||
|
||||
|
||||
class MmpNet(torch.nn.Module):
|
||||
def __init__(self, num_widths: int, num_aspect_ratios: int):
|
||||
raise NotImplementedError()
|
||||
def __init__(self, num_widths: int, num_aspect_ratios: int, num_classes: int = 2):
|
||||
super().__init__()
|
||||
self.backbone = models.mobilenet_v2(
|
||||
weights=MobileNet_V2_Weights.DEFAULT
|
||||
).features
|
||||
self.num_widths = num_widths
|
||||
self.num_aspect_ratios = num_aspect_ratios
|
||||
self.num_classes = num_classes
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
raise NotImplementedError()
|
||||
with torch.no_grad():
|
||||
dummy = torch.zeros(1, 3, 224, 224)
|
||||
backbone_out = self.backbone(dummy)
|
||||
in_channels = backbone_out.shape[1]
|
||||
|
||||
self.head = nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
kernel_size=3,
|
||||
out_channels=self.get_required_output_channels(),
|
||||
stride=1,
|
||||
padding=1,
|
||||
)
|
||||
|
||||
def get_required_output_channels(self):
|
||||
return self.num_widths * self.num_aspect_ratios * self.num_classes
|
||||
|
||||
def forward(self, x: torch.Tensor):
|
||||
x = self.backbone(x)
|
||||
x = self.head(x)
|
||||
b, out_c, h, w = x.shape
|
||||
x = x.view(b, self.num_widths, self.num_aspect_ratios, self.num_classes, h, w)
|
||||
x = x.permute(0, 1, 2, 4, 5, 3).contiguous()
|
||||
# Now: (batch, num_widths, num_aspect_ratios, h, w, num_classes)
|
||||
return x
|
||||
|
||||
0
mmp/a6/__init__.py
Normal file
127
mmp/a6/main.py
@@ -1,25 +1,144 @@
|
||||
from typing import List, Tuple
|
||||
import torch
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from mmp.a6.evallib import calculate_ap_pr
|
||||
from ..a4.label_grid import iou
|
||||
|
||||
from ..a5.model import MmpNet
|
||||
from ..a3.annotation import AnnotationRect
|
||||
from ..a3.annotation import AnnotationRect, read_groundtruth_file
|
||||
|
||||
from .nms import non_maximum_suppression
|
||||
|
||||
|
||||
def batch_inference(
|
||||
model: MmpNet, images: torch.Tensor, device: torch.device, anchor_grid: np.ndarray
|
||||
) -> List[List[Tuple[AnnotationRect, float]]]:
|
||||
raise NotImplementedError()
|
||||
score_thresh = 0.5
|
||||
nms_thresh = 0.3
|
||||
|
||||
model = model.to(device)
|
||||
model.eval()
|
||||
images = images.to(device)
|
||||
anchor_grid = anchor_grid # shape [W, R, h, w, 4]
|
||||
|
||||
results = []
|
||||
with torch.no_grad():
|
||||
outputs = model(images) # (B, W, R, h, w, 2)
|
||||
probs = torch.softmax(outputs, dim=-1)[..., 1] # (B, W, R, h, w)
|
||||
probs_np = probs.cpu().numpy()
|
||||
|
||||
batch_size = outputs.shape[0]
|
||||
for b in range(batch_size):
|
||||
detections = []
|
||||
for idx in np.ndindex(anchor_grid.shape[:-1]):
|
||||
score = probs_np[b][idx]
|
||||
# if score >= score_thresh:
|
||||
box = anchor_grid[idx]
|
||||
rect = AnnotationRect.fromarray(box)
|
||||
detections.append((rect, float(score)))
|
||||
detections_nms = non_maximum_suppression(detections, nms_thresh)
|
||||
results.append(detections_nms)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def evaluate() -> float: # feel free to change the arguments
|
||||
def evaluate(
|
||||
model: MmpNet, loader: DataLoader, device: torch.device, anchor_grid: np.ndarray
|
||||
) -> float:
|
||||
"""Evaluates a specified model on the whole validation dataset.
|
||||
|
||||
@return: AP for the validation set as a float.
|
||||
|
||||
You decide which arguments this function should receive
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
path_to_data = ".data/mmp-public-3.2/train"
|
||||
|
||||
progress_bar = tqdm(loader, desc="Evaluation", unit="batch")
|
||||
image_count = 0
|
||||
ap_total = 0
|
||||
for img_batch, _, id_batch in progress_bar:
|
||||
inference = batch_inference(
|
||||
anchor_grid=anchor_grid, device=device, images=img_batch, model=model
|
||||
)
|
||||
gts = get_gts_for_batch(id_batch=id_batch, gt_base_path=path_to_data)
|
||||
|
||||
dict_detections = {
|
||||
img_id.item(): inference[idx] for idx, img_id in enumerate(id_batch)
|
||||
}
|
||||
dict_gt = {img_id.item(): gts[idx] for idx, img_id in enumerate(id_batch)}
|
||||
average_prevision, precision, recall = calculate_ap_pr(dict_detections, dict_gt)
|
||||
ap_total = (ap_total * image_count + average_prevision) / (
|
||||
image_count + id_batch.shape[0]
|
||||
)
|
||||
image_count += id_batch.shape[0]
|
||||
|
||||
progress_bar.set_postfix(
|
||||
{
|
||||
"ap": ap_total,
|
||||
}
|
||||
)
|
||||
|
||||
return ap_total
|
||||
|
||||
|
||||
def get_gts_for_batch(
|
||||
id_batch: torch.Tensor, gt_base_path: str
|
||||
) -> List[List[AnnotationRect]]:
|
||||
return [
|
||||
read_groundtruth_file(
|
||||
os.path.join(gt_base_path, f"{str(img_id.item()).zfill(8)}.gt_data.txt")
|
||||
)
|
||||
for img_id in id_batch
|
||||
]
|
||||
|
||||
|
||||
def calc_tp_fp_fn(
|
||||
detections: List[Tuple[AnnotationRect, float]],
|
||||
gts: List[AnnotationRect],
|
||||
iou_threshold: float = 0.5,
|
||||
confidence_threshhold: float = 0.5,
|
||||
) -> tuple[int, int, int]:
|
||||
"""
|
||||
Calculates precision and recall for object detection results on a single image.
|
||||
|
||||
Args:
|
||||
detections: List of (AnnotationRect, confidence) tuples representing predicted boxes and scores. Should be sorted by descending confidence.
|
||||
gts: List of AnnotationRect for ground truth.
|
||||
iou_threshold: Minimum IoU to consider a detection a true positive.
|
||||
confidence_threshhold: Minimum confidence required to include a detection.
|
||||
Returns:
|
||||
num_tp: Number of true positives (int).
|
||||
num_fp: Number of false positives (int).
|
||||
num_fn: Number of false negatives (int).
|
||||
"""
|
||||
detections = [det for det in detections if det[1] >= confidence_threshhold]
|
||||
detections.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
matches = set()
|
||||
fp = 0
|
||||
tp = 0
|
||||
|
||||
for det_rect, _ in detections:
|
||||
iou_map = [iou(det_rect, gt_rect) for gt_rect in gts]
|
||||
if len(iou_map) == 0:
|
||||
fp += 1
|
||||
continue
|
||||
max_idx = np.argmax(iou_map)
|
||||
if max_idx in matches or iou_map[max_idx] < iou_threshold:
|
||||
fp += 1
|
||||
continue
|
||||
matches.add(max_idx)
|
||||
tp += 1
|
||||
|
||||
fn = len(gts) - len(matches)
|
||||
|
||||
return tp, fp, fn
|
||||
|
||||
|
||||
def evaluate_test(): # feel free to change the arguments
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import os
|
||||
from typing import List, Sequence, Tuple
|
||||
|
||||
from ..a3.annotation import AnnotationRect
|
||||
from ..a4.label_grid import iou, draw_annotation_rects
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
def non_maximum_suppression(
|
||||
@@ -12,4 +15,68 @@ def non_maximum_suppression(
|
||||
|
||||
@return: A list of tuples of the remaining boxes after NMS together with their scores
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
if not boxes_scores:
|
||||
return []
|
||||
|
||||
# Sort the boxes by score in descending order
|
||||
boxes_scores_sorted = sorted(boxes_scores, key=lambda bs: bs[1], reverse=True)
|
||||
result = []
|
||||
while boxes_scores_sorted:
|
||||
# Select the box with highest score and remove it from the list
|
||||
curr_box, curr_score = boxes_scores_sorted.pop(0)
|
||||
result.append((curr_box, curr_score))
|
||||
# Remove boxes with IoU > threshold
|
||||
new_boxes = []
|
||||
for box, score in boxes_scores_sorted:
|
||||
if iou(curr_box, box) <= threshold:
|
||||
new_boxes.append((box, score))
|
||||
boxes_scores_sorted = new_boxes
|
||||
return result
|
||||
|
||||
|
||||
def read_boxes_from_file(filepath: str) -> List[Tuple[str, AnnotationRect, float]]:
|
||||
"""
|
||||
Reads a file containing bounding boxes and scores in the format:
|
||||
{image_number} {x1} {y1} {x2} {y2} {score}
|
||||
Returns a list of tuples: (image_number, x1, y1, x2, y2, score)
|
||||
"""
|
||||
boxes: List[Tuple[AnnotationRect, float]] = []
|
||||
with open(filepath, "r") as f:
|
||||
for line in f:
|
||||
parts = line.strip().split()
|
||||
if len(parts) != 6:
|
||||
continue
|
||||
img_id = parts[0]
|
||||
x1, y1, x2, y2 = map(int, parts[1:5])
|
||||
annotation_rect = AnnotationRect(x1, y1, x2, y2)
|
||||
score = float(parts[5])
|
||||
boxes.append((img_id, annotation_rect, score))
|
||||
return boxes
|
||||
|
||||
|
||||
def main():
|
||||
boxes = read_boxes_from_file("mmp/a6/model_output.txt")
|
||||
|
||||
grouped = defaultdict(list)
|
||||
for image_id, rect, score in boxes:
|
||||
grouped[image_id].append((rect, score))
|
||||
|
||||
for image_id, rects_scores in grouped.items():
|
||||
filtered_boxes = non_maximum_suppression(rects_scores, 0.3)
|
||||
annotation_rects = [rect for rect, score in filtered_boxes if score > 0.5]
|
||||
input_path = f".data/mmp-public-3.2/test/{image_id}.jpg"
|
||||
output_path = f"mmp/a6/nms_output_{image_id}.png"
|
||||
if not os.path.exists(input_path):
|
||||
continue
|
||||
|
||||
draw_annotation_rects(
|
||||
input_path,
|
||||
annotation_rects,
|
||||
rect_color=(255, 0, 0),
|
||||
rect_width=2,
|
||||
output_path=output_path,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
BIN
mmp/a6/nms_output_02247421.png
Normal file
|
After Width: | Height: | Size: 359 KiB |
BIN
mmp/a6/nms_output_02249576.png
Normal file
|
After Width: | Height: | Size: 231 KiB |
BIN
mmp/a6/nms_output_02249614.png
Normal file
|
After Width: | Height: | Size: 232 KiB |