performance improvements

2025-11-11 10:52:27 +01:00
parent a00ddedb23
commit 7245042b54
26 changed files with 84 additions and 32 deletions
--- a/mmp/a3/annotation.py
+++ b/mmp/a3/annotation.py
@@ -25,6 +25,7 @@ class AnnotationRect:
        self.x2 *= factor
        self.y1 *= factor
        self.y2 *= factor
+        return self

    @staticmethod
    def fromarray(arr: np.ndarray):
--- a/mmp/a4/.output/2243526_transformed.png
+++ b/mmp/a4/.output/2243526_transformed.png
--- a/mmp/a4/.output/2243986_transformed.png
+++ b/mmp/a4/.output/2243986_transformed.png
--- a/mmp/a4/.output/2244905_transformed.png
+++ b/mmp/a4/.output/2244905_transformed.png
--- a/mmp/a4/.output/2245169_transformed.png
+++ b/mmp/a4/.output/2245169_transformed.png
--- a/mmp/a4/.output/2245295_transformed.png
+++ b/mmp/a4/.output/2245295_transformed.png
--- a/mmp/a4/.output/2245476_transformed.png
+++ b/mmp/a4/.output/2245476_transformed.png
--- a/mmp/a4/.output/2246530_transformed.png
+++ b/mmp/a4/.output/2246530_transformed.png
--- a/mmp/a4/.output/2246871_transformed.png
+++ b/mmp/a4/.output/2246871_transformed.png
--- a/mmp/a4/.output/2248186_transformed.png
+++ b/mmp/a4/.output/2248186_transformed.png
--- a/mmp/a4/.output/2248597_transformed.png
+++ b/mmp/a4/.output/2248597_transformed.png
--- a/mmp/a4/.output/2248930_transformed.png
+++ b/mmp/a4/.output/2248930_transformed.png
--- a/mmp/a4/.output/2249347_transformed.png
+++ b/mmp/a4/.output/2249347_transformed.png
--- a/mmp/a4/.output/2252317_transformed.png
+++ b/mmp/a4/.output/2252317_transformed.png
--- a/mmp/a4/.output/2253259_transformed.png
+++ b/mmp/a4/.output/2253259_transformed.png
--- a/mmp/a4/.output/2254918_transformed.png
+++ b/mmp/a4/.output/2254918_transformed.png
--- a/mmp/a4/.output/2257498_transformed.png
+++ b/mmp/a4/.output/2257498_transformed.png
--- a/mmp/a4/.output/2257580_transformed.png
+++ b/mmp/a4/.output/2257580_transformed.png
--- a/mmp/a4/.output/2260011_transformed.png
+++ b/mmp/a4/.output/2260011_transformed.png
--- a/mmp/a4/.output/2260743_transformed.png
+++ b/mmp/a4/.output/2260743_transformed.png
--- a/mmp/a4/.output/2261017_transformed.png
+++ b/mmp/a4/.output/2261017_transformed.png
--- a/mmp/a4/.output/2262101_transformed.png
+++ b/mmp/a4/.output/2262101_transformed.png
--- a/mmp/a4/.output/2263691_transformed.png
+++ b/mmp/a4/.output/2263691_transformed.png
--- a/mmp/a4/.output/2264479_transformed.png
+++ b/mmp/a4/.output/2264479_transformed.png
--- a/mmp/a4/.output/2265233_transformed.png
+++ b/mmp/a4/.output/2265233_transformed.png
--- a/mmp/a4/dataset.py
+++ b/mmp/a4/dataset.py
@@ -29,7 +29,7 @@ class MMP_Dataset(torch.utils.data.Dataset):
        @param is_test: Whether this is the test set (True) or the validation/training set (False)
        """
        self.image_size = image_size
-        self.images: Sequence[Tuple[str, Sequence[AnnotationRect]]] = []
+        self.images: Sequence[Tuple[str, str | None]] = []
        self.anchor_grid = anchor_grid
        self.min_iou = min_iou
        self.is_test = is_test
@@ -43,11 +43,10 @@ class MMP_Dataset(torch.utils.data.Dataset):
                img_file = os.path.join(path_to_data, fname)
                if is_test:
                    self.images.append((img_file, None))
-                else:
-                    annotations = read_groundtruth_file(
-                        os.path.join(path_to_data, f"{match.group(1)}.gt_data.txt")
-                    )
-                    self.images.append((img_file, annotations))
+                annotation_file = os.path.join(
+                    path_to_data, f"{match.group(1)}.gt_data.txt"
+                )
+                self.images.append((img_file, annotation_file))

        self.images.sort(
            key=lambda x: int(re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2))
@@ -74,13 +73,13 @@ class MMP_Dataset(torch.utils.data.Dataset):
        if self.is_test:
            return (img_tensor, torch.Tensor(), int(img_id))

-        scaled_annotations = []
-        for annotation in self.images[idx][1]:
+        annotations = [
            annotation.scale(self.image_size / max(img.size[0], img.size[1]))
-            scaled_annotations.append(annotation)
+            for annotation in read_groundtruth_file(self.images[idx][1])
+        ]

        label_grid = get_label_grid(
-            anchor_grid=self.anchor_grid, gts=scaled_annotations, min_iou=self.min_iou
+            anchor_grid=self.anchor_grid, gts=annotations, min_iou=self.min_iou
        )
        return (img_tensor, label_grid, int(img_id))

@@ -121,34 +120,84 @@ def get_dataloader(
    return dataloader


-def calculate_max_coverage(loader: DataLoader, min_iou: float) -> float:
+def calculate_max_coverage(loader, min_iou):
    """
-    @param loader: A DataLoader object, generated with the get_dataloader function.
-    @param min_iou: Minimum IoU overlap that is required to count a ground truth box as covered.
-    @return: Ratio of how mamy ground truth boxes are covered by a label grid box. Must be a value between 0 and 1.
+    @param loader: DataLoader object.
+    @param min_iou: Minimum IoU overlap to count a ground truth box as covered.
+    @return: Ratio of how many ground truth boxes are covered by a label grid box. Value between 0 and 1.
    """
    total_boxes = 0
    covered_boxes = 0
-    dataset: MMP_Dataset = loader.dataset
-    anchor_grid = dataset.anchor_grid
+    dataset = loader.dataset
+    anchor_grid = dataset.anchor_grid  # Shape: (H, W, 4)

-    for img, _, img_id in islice(loader, 4):
+    # Reshape anchor grid to (N, 4)
+    anchors = anchor_grid.reshape(-1, 4)
+
+    for img, _, img_id in loader:
        for batch_index in range(len(img)):
            gts_file = os.path.join(
                dataset.path_to_data,
                f"{str(img_id[batch_index].item()).zfill(8)}.gt_data.txt",
            )
+            # Load and scale ground truth boxes if necessary
+            with Image.open(
+                os.path.join(
+                    dataset.path_to_data,
+                    f"{str(img_id[batch_index].item()).zfill(8)}.jpg",
+                )
+            ) as original_image:
+                original_w, original_h = original_image.size
+                # Assume square resize for model, get transform size from img tensor
+                transformed_size = img[batch_index].shape[-1]
+                scale = transformed_size / max(original_w, original_h)

-            gts = read_groundtruth_file(gts_file)
-            total_boxes += len(gts)
-            for annotation in gts:
-                for box_idx in np.ndindex(anchor_grid.shape[:-1]):
-                    box_annotation = AnnotationRect.fromarray(anchor_grid[box_idx])
-                    calculated_iou = iou(annotation, box_annotation)
-                    if calculated_iou >= min_iou:
-                        covered_boxes += 1
-                        break
-    return covered_boxes / total_boxes
+            annotations = [
+                annotation.scale(scale)
+                for annotation in read_groundtruth_file(gts_file)
+            ]
+
+            gt_boxes = np.stack(
+                [np.array(a) for a in annotations], axis=0
+            )  # shape (M, 4)
+            total_boxes += len(gt_boxes)
+
+            # Vectorized IoU calculation: (M, N)
+            ious = compute_ious_vectorized(gt_boxes, anchors)  # shape (M, N)
+
+            # Count ground truths for which any anchor box matches min_iou
+            covered = (ious >= min_iou).any(axis=1).sum()
+            covered_boxes += covered
+
+    return covered_boxes / total_boxes if total_boxes > 0 else 0.0
+
+
+def compute_ious_vectorized(boxes1, boxes2):
+    """
+    Compute the IoU matrix between each box in boxes1 and each box in boxes2.
+    boxes1: (M, 4), boxes2: (N, 4) -- format [x1, y1, x2, y2]
+    Returns: (M, N) IoU
+    """
+    M, N = boxes1.shape[0], boxes2.shape[0]
+
+    # Expand to (M, N, 4)
+    boxes1 = boxes1[:, None, :]  # (M, 1, 4)
+    boxes2 = boxes2[None, :, :]  # (1, N, 4)
+
+    # Intersection box
+    inter_x1 = np.maximum(boxes1[..., 0], boxes2[..., 0])
+    inter_y1 = np.maximum(boxes1[..., 1], boxes2[..., 1])
+    inter_x2 = np.minimum(boxes1[..., 2], boxes2[..., 2])
+    inter_y2 = np.minimum(boxes1[..., 3], boxes2[..., 3])
+    inter_w = np.clip(inter_x2 - inter_x1, 0, None)
+    inter_h = np.clip(inter_y2 - inter_y1, 0, None)
+    inter_area = inter_w * inter_h
+
+    area1 = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
+    area2 = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
+    union_area = area1 + area2 - inter_area
+
+    return inter_area / (union_area + 1e-6)


 def draw_image_tensor_with_annotations(
@@ -205,14 +254,14 @@ def draw_positive_boxes(

 def main():
    anchor_grid = get_anchor_grid(
-        anchor_widths=[16, 32, 64, 96, 128, 144, 150, 160, 192, 224, 256],
+        anchor_widths=[8, 16, 35, 32, 64, 96, 128, 144, 150, 160, 192, 224],
        aspect_ratios=[1 / 3, 1 / 2, 3 / 5, 2 / 3, 3 / 4, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
-        num_rows=32,
-        num_cols=32,
-        scale_factor=20,
+        num_rows=28,
+        num_cols=28,
+        scale_factor=8,
    )
    dataloader = get_dataloader(
-        num_workers=6,
+        num_workers=9,
        is_train=True,
        is_test=False,
        batch_size=8,
@@ -221,6 +270,8 @@ def main():
        anchor_grid=anchor_grid,
    )

+    # print(calculate_max_coverage(dataloader, 0.7))
+
    for img, label, img_id in islice(dataloader, 12):
        draw_positive_boxes(
            img_tensor=denormalize_image_tensor(img=img[5]),