diff --git a/mmp/a3/annotation.py b/mmp/a3/annotation.py index 9329dfb..0f02359 100644 --- a/mmp/a3/annotation.py +++ b/mmp/a3/annotation.py @@ -25,6 +25,7 @@ class AnnotationRect: self.x2 *= factor self.y1 *= factor self.y2 *= factor + return self @staticmethod def fromarray(arr: np.ndarray): diff --git a/mmp/a4/.output/2243526_transformed.png b/mmp/a4/.output/2243526_transformed.png new file mode 100644 index 0000000..afe422f Binary files /dev/null and b/mmp/a4/.output/2243526_transformed.png differ diff --git a/mmp/a4/.output/2243986_transformed.png b/mmp/a4/.output/2243986_transformed.png new file mode 100644 index 0000000..0628e0c Binary files /dev/null and b/mmp/a4/.output/2243986_transformed.png differ diff --git a/mmp/a4/.output/2244905_transformed.png b/mmp/a4/.output/2244905_transformed.png deleted file mode 100644 index 366bfd3..0000000 Binary files a/mmp/a4/.output/2244905_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2245169_transformed.png b/mmp/a4/.output/2245169_transformed.png new file mode 100644 index 0000000..2e3eb0a Binary files /dev/null and b/mmp/a4/.output/2245169_transformed.png differ diff --git a/mmp/a4/.output/2245295_transformed.png b/mmp/a4/.output/2245295_transformed.png new file mode 100644 index 0000000..17420b3 Binary files /dev/null and b/mmp/a4/.output/2245295_transformed.png differ diff --git a/mmp/a4/.output/2245476_transformed.png b/mmp/a4/.output/2245476_transformed.png new file mode 100644 index 0000000..7d834d8 Binary files /dev/null and b/mmp/a4/.output/2245476_transformed.png differ diff --git a/mmp/a4/.output/2246530_transformed.png b/mmp/a4/.output/2246530_transformed.png deleted file mode 100644 index 3474f06..0000000 Binary files a/mmp/a4/.output/2246530_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2246871_transformed.png b/mmp/a4/.output/2246871_transformed.png new file mode 100644 index 0000000..a177374 Binary files /dev/null and b/mmp/a4/.output/2246871_transformed.png differ diff --git a/mmp/a4/.output/2248186_transformed.png b/mmp/a4/.output/2248186_transformed.png new file mode 100644 index 0000000..4158002 Binary files /dev/null and b/mmp/a4/.output/2248186_transformed.png differ diff --git a/mmp/a4/.output/2248597_transformed.png b/mmp/a4/.output/2248597_transformed.png deleted file mode 100644 index 43f96b0..0000000 Binary files a/mmp/a4/.output/2248597_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2248930_transformed.png b/mmp/a4/.output/2248930_transformed.png new file mode 100644 index 0000000..4663da5 Binary files /dev/null and b/mmp/a4/.output/2248930_transformed.png differ diff --git a/mmp/a4/.output/2249347_transformed.png b/mmp/a4/.output/2249347_transformed.png deleted file mode 100644 index ef2d494..0000000 Binary files a/mmp/a4/.output/2249347_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2252317_transformed.png b/mmp/a4/.output/2252317_transformed.png new file mode 100644 index 0000000..c80827f Binary files /dev/null and b/mmp/a4/.output/2252317_transformed.png differ diff --git a/mmp/a4/.output/2253259_transformed.png b/mmp/a4/.output/2253259_transformed.png deleted file mode 100644 index 59fd157..0000000 Binary files a/mmp/a4/.output/2253259_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2254918_transformed.png b/mmp/a4/.output/2254918_transformed.png new file mode 100644 index 0000000..dd89bb9 Binary files /dev/null and b/mmp/a4/.output/2254918_transformed.png differ diff --git a/mmp/a4/.output/2257498_transformed.png b/mmp/a4/.output/2257498_transformed.png deleted file mode 100644 index b6d17a5..0000000 Binary files a/mmp/a4/.output/2257498_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2257580_transformed.png b/mmp/a4/.output/2257580_transformed.png deleted file mode 100644 index 964a722..0000000 Binary files a/mmp/a4/.output/2257580_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2260011_transformed.png b/mmp/a4/.output/2260011_transformed.png deleted file mode 100644 index 42eb465..0000000 Binary files a/mmp/a4/.output/2260011_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2260743_transformed.png b/mmp/a4/.output/2260743_transformed.png deleted file mode 100644 index deae5d2..0000000 Binary files a/mmp/a4/.output/2260743_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2261017_transformed.png b/mmp/a4/.output/2261017_transformed.png new file mode 100644 index 0000000..d9c2a12 Binary files /dev/null and b/mmp/a4/.output/2261017_transformed.png differ diff --git a/mmp/a4/.output/2262101_transformed.png b/mmp/a4/.output/2262101_transformed.png deleted file mode 100644 index ed1e11b..0000000 Binary files a/mmp/a4/.output/2262101_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2263691_transformed.png b/mmp/a4/.output/2263691_transformed.png deleted file mode 100644 index 6af3fe3..0000000 Binary files a/mmp/a4/.output/2263691_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2264479_transformed.png b/mmp/a4/.output/2264479_transformed.png deleted file mode 100644 index 1e9827c..0000000 Binary files a/mmp/a4/.output/2264479_transformed.png and /dev/null differ diff --git a/mmp/a4/.output/2265233_transformed.png b/mmp/a4/.output/2265233_transformed.png new file mode 100644 index 0000000..ebd7668 Binary files /dev/null and b/mmp/a4/.output/2265233_transformed.png differ diff --git a/mmp/a4/dataset.py b/mmp/a4/dataset.py index fd3ad9c..d1a1d8c 100644 --- a/mmp/a4/dataset.py +++ b/mmp/a4/dataset.py @@ -29,7 +29,7 @@ class MMP_Dataset(torch.utils.data.Dataset): @param is_test: Whether this is the test set (True) or the validation/training set (False) """ self.image_size = image_size - self.images: Sequence[Tuple[str, Sequence[AnnotationRect]]] = [] + self.images: Sequence[Tuple[str, str | None]] = [] self.anchor_grid = anchor_grid self.min_iou = min_iou self.is_test = is_test @@ -43,11 +43,10 @@ class MMP_Dataset(torch.utils.data.Dataset): img_file = os.path.join(path_to_data, fname) if is_test: self.images.append((img_file, None)) - else: - annotations = read_groundtruth_file( - os.path.join(path_to_data, f"{match.group(1)}.gt_data.txt") - ) - self.images.append((img_file, annotations)) + annotation_file = os.path.join( + path_to_data, f"{match.group(1)}.gt_data.txt" + ) + self.images.append((img_file, annotation_file)) self.images.sort( key=lambda x: int(re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2)) @@ -74,13 +73,13 @@ class MMP_Dataset(torch.utils.data.Dataset): if self.is_test: return (img_tensor, torch.Tensor(), int(img_id)) - scaled_annotations = [] - for annotation in self.images[idx][1]: + annotations = [ annotation.scale(self.image_size / max(img.size[0], img.size[1])) - scaled_annotations.append(annotation) + for annotation in read_groundtruth_file(self.images[idx][1]) + ] label_grid = get_label_grid( - anchor_grid=self.anchor_grid, gts=scaled_annotations, min_iou=self.min_iou + anchor_grid=self.anchor_grid, gts=annotations, min_iou=self.min_iou ) return (img_tensor, label_grid, int(img_id)) @@ -121,34 +120,84 @@ def get_dataloader( return dataloader -def calculate_max_coverage(loader: DataLoader, min_iou: float) -> float: +def calculate_max_coverage(loader, min_iou): """ - @param loader: A DataLoader object, generated with the get_dataloader function. - @param min_iou: Minimum IoU overlap that is required to count a ground truth box as covered. - @return: Ratio of how mamy ground truth boxes are covered by a label grid box. Must be a value between 0 and 1. + @param loader: DataLoader object. + @param min_iou: Minimum IoU overlap to count a ground truth box as covered. + @return: Ratio of how many ground truth boxes are covered by a label grid box. Value between 0 and 1. """ total_boxes = 0 covered_boxes = 0 - dataset: MMP_Dataset = loader.dataset - anchor_grid = dataset.anchor_grid + dataset = loader.dataset + anchor_grid = dataset.anchor_grid # Shape: (H, W, 4) - for img, _, img_id in islice(loader, 4): + # Reshape anchor grid to (N, 4) + anchors = anchor_grid.reshape(-1, 4) + + for img, _, img_id in loader: for batch_index in range(len(img)): gts_file = os.path.join( dataset.path_to_data, f"{str(img_id[batch_index].item()).zfill(8)}.gt_data.txt", ) + # Load and scale ground truth boxes if necessary + with Image.open( + os.path.join( + dataset.path_to_data, + f"{str(img_id[batch_index].item()).zfill(8)}.jpg", + ) + ) as original_image: + original_w, original_h = original_image.size + # Assume square resize for model, get transform size from img tensor + transformed_size = img[batch_index].shape[-1] + scale = transformed_size / max(original_w, original_h) - gts = read_groundtruth_file(gts_file) - total_boxes += len(gts) - for annotation in gts: - for box_idx in np.ndindex(anchor_grid.shape[:-1]): - box_annotation = AnnotationRect.fromarray(anchor_grid[box_idx]) - calculated_iou = iou(annotation, box_annotation) - if calculated_iou >= min_iou: - covered_boxes += 1 - break - return covered_boxes / total_boxes + annotations = [ + annotation.scale(scale) + for annotation in read_groundtruth_file(gts_file) + ] + + gt_boxes = np.stack( + [np.array(a) for a in annotations], axis=0 + ) # shape (M, 4) + total_boxes += len(gt_boxes) + + # Vectorized IoU calculation: (M, N) + ious = compute_ious_vectorized(gt_boxes, anchors) # shape (M, N) + + # Count ground truths for which any anchor box matches min_iou + covered = (ious >= min_iou).any(axis=1).sum() + covered_boxes += covered + + return covered_boxes / total_boxes if total_boxes > 0 else 0.0 + + +def compute_ious_vectorized(boxes1, boxes2): + """ + Compute the IoU matrix between each box in boxes1 and each box in boxes2. + boxes1: (M, 4), boxes2: (N, 4) -- format [x1, y1, x2, y2] + Returns: (M, N) IoU + """ + M, N = boxes1.shape[0], boxes2.shape[0] + + # Expand to (M, N, 4) + boxes1 = boxes1[:, None, :] # (M, 1, 4) + boxes2 = boxes2[None, :, :] # (1, N, 4) + + # Intersection box + inter_x1 = np.maximum(boxes1[..., 0], boxes2[..., 0]) + inter_y1 = np.maximum(boxes1[..., 1], boxes2[..., 1]) + inter_x2 = np.minimum(boxes1[..., 2], boxes2[..., 2]) + inter_y2 = np.minimum(boxes1[..., 3], boxes2[..., 3]) + inter_w = np.clip(inter_x2 - inter_x1, 0, None) + inter_h = np.clip(inter_y2 - inter_y1, 0, None) + inter_area = inter_w * inter_h + + area1 = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + area2 = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + union_area = area1 + area2 - inter_area + + return inter_area / (union_area + 1e-6) def draw_image_tensor_with_annotations( @@ -205,14 +254,14 @@ def draw_positive_boxes( def main(): anchor_grid = get_anchor_grid( - anchor_widths=[16, 32, 64, 96, 128, 144, 150, 160, 192, 224, 256], + anchor_widths=[8, 16, 35, 32, 64, 96, 128, 144, 150, 160, 192, 224], aspect_ratios=[1 / 3, 1 / 2, 3 / 5, 2 / 3, 3 / 4, 1, 4 / 3, 5 / 3, 2, 2.5, 3], - num_rows=32, - num_cols=32, - scale_factor=20, + num_rows=28, + num_cols=28, + scale_factor=8, ) dataloader = get_dataloader( - num_workers=6, + num_workers=9, is_train=True, is_test=False, batch_size=8, @@ -221,6 +270,8 @@ def main(): anchor_grid=anchor_grid, ) + # print(calculate_max_coverage(dataloader, 0.7)) + for img, label, img_id in islice(dataloader, 12): draw_positive_boxes( img_tensor=denormalize_image_tensor(img=img[5]),