performance improvements

This commit is contained in:
franksim
2025-11-11 10:52:27 +01:00
parent a00ddedb23
commit 7245042b54
26 changed files with 84 additions and 32 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 223 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 154 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 161 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 138 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 197 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 224 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 233 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 238 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 223 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 190 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 191 KiB

View File

@@ -29,7 +29,7 @@ class MMP_Dataset(torch.utils.data.Dataset):
@param is_test: Whether this is the test set (True) or the validation/training set (False)
"""
self.image_size = image_size
self.images: Sequence[Tuple[str, Sequence[AnnotationRect]]] = []
self.images: Sequence[Tuple[str, str | None]] = []
self.anchor_grid = anchor_grid
self.min_iou = min_iou
self.is_test = is_test
@@ -43,11 +43,10 @@ class MMP_Dataset(torch.utils.data.Dataset):
img_file = os.path.join(path_to_data, fname)
if is_test:
self.images.append((img_file, None))
else:
annotations = read_groundtruth_file(
os.path.join(path_to_data, f"{match.group(1)}.gt_data.txt")
)
self.images.append((img_file, annotations))
annotation_file = os.path.join(
path_to_data, f"{match.group(1)}.gt_data.txt"
)
self.images.append((img_file, annotation_file))
self.images.sort(
key=lambda x: int(re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2))
@@ -74,13 +73,13 @@ class MMP_Dataset(torch.utils.data.Dataset):
if self.is_test:
return (img_tensor, torch.Tensor(), int(img_id))
scaled_annotations = []
for annotation in self.images[idx][1]:
annotations = [
annotation.scale(self.image_size / max(img.size[0], img.size[1]))
scaled_annotations.append(annotation)
for annotation in read_groundtruth_file(self.images[idx][1])
]
label_grid = get_label_grid(
anchor_grid=self.anchor_grid, gts=scaled_annotations, min_iou=self.min_iou
anchor_grid=self.anchor_grid, gts=annotations, min_iou=self.min_iou
)
return (img_tensor, label_grid, int(img_id))
@@ -121,34 +120,84 @@ def get_dataloader(
return dataloader
def calculate_max_coverage(loader: DataLoader, min_iou: float) -> float:
def calculate_max_coverage(loader, min_iou):
"""
@param loader: A DataLoader object, generated with the get_dataloader function.
@param min_iou: Minimum IoU overlap that is required to count a ground truth box as covered.
@return: Ratio of how mamy ground truth boxes are covered by a label grid box. Must be a value between 0 and 1.
@param loader: DataLoader object.
@param min_iou: Minimum IoU overlap to count a ground truth box as covered.
@return: Ratio of how many ground truth boxes are covered by a label grid box. Value between 0 and 1.
"""
total_boxes = 0
covered_boxes = 0
dataset: MMP_Dataset = loader.dataset
anchor_grid = dataset.anchor_grid
dataset = loader.dataset
anchor_grid = dataset.anchor_grid # Shape: (H, W, 4)
for img, _, img_id in islice(loader, 4):
# Reshape anchor grid to (N, 4)
anchors = anchor_grid.reshape(-1, 4)
for img, _, img_id in loader:
for batch_index in range(len(img)):
gts_file = os.path.join(
dataset.path_to_data,
f"{str(img_id[batch_index].item()).zfill(8)}.gt_data.txt",
)
# Load and scale ground truth boxes if necessary
with Image.open(
os.path.join(
dataset.path_to_data,
f"{str(img_id[batch_index].item()).zfill(8)}.jpg",
)
) as original_image:
original_w, original_h = original_image.size
# Assume square resize for model, get transform size from img tensor
transformed_size = img[batch_index].shape[-1]
scale = transformed_size / max(original_w, original_h)
gts = read_groundtruth_file(gts_file)
total_boxes += len(gts)
for annotation in gts:
for box_idx in np.ndindex(anchor_grid.shape[:-1]):
box_annotation = AnnotationRect.fromarray(anchor_grid[box_idx])
calculated_iou = iou(annotation, box_annotation)
if calculated_iou >= min_iou:
covered_boxes += 1
break
return covered_boxes / total_boxes
annotations = [
annotation.scale(scale)
for annotation in read_groundtruth_file(gts_file)
]
gt_boxes = np.stack(
[np.array(a) for a in annotations], axis=0
) # shape (M, 4)
total_boxes += len(gt_boxes)
# Vectorized IoU calculation: (M, N)
ious = compute_ious_vectorized(gt_boxes, anchors) # shape (M, N)
# Count ground truths for which any anchor box matches min_iou
covered = (ious >= min_iou).any(axis=1).sum()
covered_boxes += covered
return covered_boxes / total_boxes if total_boxes > 0 else 0.0
def compute_ious_vectorized(boxes1, boxes2):
"""
Compute the IoU matrix between each box in boxes1 and each box in boxes2.
boxes1: (M, 4), boxes2: (N, 4) -- format [x1, y1, x2, y2]
Returns: (M, N) IoU
"""
M, N = boxes1.shape[0], boxes2.shape[0]
# Expand to (M, N, 4)
boxes1 = boxes1[:, None, :] # (M, 1, 4)
boxes2 = boxes2[None, :, :] # (1, N, 4)
# Intersection box
inter_x1 = np.maximum(boxes1[..., 0], boxes2[..., 0])
inter_y1 = np.maximum(boxes1[..., 1], boxes2[..., 1])
inter_x2 = np.minimum(boxes1[..., 2], boxes2[..., 2])
inter_y2 = np.minimum(boxes1[..., 3], boxes2[..., 3])
inter_w = np.clip(inter_x2 - inter_x1, 0, None)
inter_h = np.clip(inter_y2 - inter_y1, 0, None)
inter_area = inter_w * inter_h
area1 = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
area2 = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
union_area = area1 + area2 - inter_area
return inter_area / (union_area + 1e-6)
def draw_image_tensor_with_annotations(
@@ -205,14 +254,14 @@ def draw_positive_boxes(
def main():
anchor_grid = get_anchor_grid(
anchor_widths=[16, 32, 64, 96, 128, 144, 150, 160, 192, 224, 256],
anchor_widths=[8, 16, 35, 32, 64, 96, 128, 144, 150, 160, 192, 224],
aspect_ratios=[1 / 3, 1 / 2, 3 / 5, 2 / 3, 3 / 4, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
num_rows=32,
num_cols=32,
scale_factor=20,
num_rows=28,
num_cols=28,
scale_factor=8,
)
dataloader = get_dataloader(
num_workers=6,
num_workers=9,
is_train=True,
is_test=False,
batch_size=8,
@@ -221,6 +270,8 @@ def main():
anchor_grid=anchor_grid,
)
# print(calculate_max_coverage(dataloader, 0.7))
for img, label, img_id in islice(dataloader, 12):
draw_positive_boxes(
img_tensor=denormalize_image_tensor(img=img[5]),