performance improvements

This commit is contained in:
franksim
2025-11-11 10:52:27 +01:00
parent a00ddedb23
commit 7245042b54
26 changed files with 84 additions and 32 deletions

View File

@@ -25,6 +25,7 @@ class AnnotationRect:
self.x2 *= factor self.x2 *= factor
self.y1 *= factor self.y1 *= factor
self.y2 *= factor self.y2 *= factor
return self
@staticmethod @staticmethod
def fromarray(arr: np.ndarray): def fromarray(arr: np.ndarray):

Binary file not shown.

After

Width:  |  Height:  |  Size: 223 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 154 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 161 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 138 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 197 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 224 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 233 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 238 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 223 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 190 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 191 KiB

View File

@@ -29,7 +29,7 @@ class MMP_Dataset(torch.utils.data.Dataset):
@param is_test: Whether this is the test set (True) or the validation/training set (False) @param is_test: Whether this is the test set (True) or the validation/training set (False)
""" """
self.image_size = image_size self.image_size = image_size
self.images: Sequence[Tuple[str, Sequence[AnnotationRect]]] = [] self.images: Sequence[Tuple[str, str | None]] = []
self.anchor_grid = anchor_grid self.anchor_grid = anchor_grid
self.min_iou = min_iou self.min_iou = min_iou
self.is_test = is_test self.is_test = is_test
@@ -43,11 +43,10 @@ class MMP_Dataset(torch.utils.data.Dataset):
img_file = os.path.join(path_to_data, fname) img_file = os.path.join(path_to_data, fname)
if is_test: if is_test:
self.images.append((img_file, None)) self.images.append((img_file, None))
else: annotation_file = os.path.join(
annotations = read_groundtruth_file( path_to_data, f"{match.group(1)}.gt_data.txt"
os.path.join(path_to_data, f"{match.group(1)}.gt_data.txt") )
) self.images.append((img_file, annotation_file))
self.images.append((img_file, annotations))
self.images.sort( self.images.sort(
key=lambda x: int(re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2)) key=lambda x: int(re.match(r"(.*/)(\d+)(\.jpg)", x[0]).group(2))
@@ -74,13 +73,13 @@ class MMP_Dataset(torch.utils.data.Dataset):
if self.is_test: if self.is_test:
return (img_tensor, torch.Tensor(), int(img_id)) return (img_tensor, torch.Tensor(), int(img_id))
scaled_annotations = [] annotations = [
for annotation in self.images[idx][1]:
annotation.scale(self.image_size / max(img.size[0], img.size[1])) annotation.scale(self.image_size / max(img.size[0], img.size[1]))
scaled_annotations.append(annotation) for annotation in read_groundtruth_file(self.images[idx][1])
]
label_grid = get_label_grid( label_grid = get_label_grid(
anchor_grid=self.anchor_grid, gts=scaled_annotations, min_iou=self.min_iou anchor_grid=self.anchor_grid, gts=annotations, min_iou=self.min_iou
) )
return (img_tensor, label_grid, int(img_id)) return (img_tensor, label_grid, int(img_id))
@@ -121,34 +120,84 @@ def get_dataloader(
return dataloader return dataloader
def calculate_max_coverage(loader: DataLoader, min_iou: float) -> float: def calculate_max_coverage(loader, min_iou):
""" """
@param loader: A DataLoader object, generated with the get_dataloader function. @param loader: DataLoader object.
@param min_iou: Minimum IoU overlap that is required to count a ground truth box as covered. @param min_iou: Minimum IoU overlap to count a ground truth box as covered.
@return: Ratio of how mamy ground truth boxes are covered by a label grid box. Must be a value between 0 and 1. @return: Ratio of how many ground truth boxes are covered by a label grid box. Value between 0 and 1.
""" """
total_boxes = 0 total_boxes = 0
covered_boxes = 0 covered_boxes = 0
dataset: MMP_Dataset = loader.dataset dataset = loader.dataset
anchor_grid = dataset.anchor_grid anchor_grid = dataset.anchor_grid # Shape: (H, W, 4)
for img, _, img_id in islice(loader, 4): # Reshape anchor grid to (N, 4)
anchors = anchor_grid.reshape(-1, 4)
for img, _, img_id in loader:
for batch_index in range(len(img)): for batch_index in range(len(img)):
gts_file = os.path.join( gts_file = os.path.join(
dataset.path_to_data, dataset.path_to_data,
f"{str(img_id[batch_index].item()).zfill(8)}.gt_data.txt", f"{str(img_id[batch_index].item()).zfill(8)}.gt_data.txt",
) )
# Load and scale ground truth boxes if necessary
with Image.open(
os.path.join(
dataset.path_to_data,
f"{str(img_id[batch_index].item()).zfill(8)}.jpg",
)
) as original_image:
original_w, original_h = original_image.size
# Assume square resize for model, get transform size from img tensor
transformed_size = img[batch_index].shape[-1]
scale = transformed_size / max(original_w, original_h)
gts = read_groundtruth_file(gts_file) annotations = [
total_boxes += len(gts) annotation.scale(scale)
for annotation in gts: for annotation in read_groundtruth_file(gts_file)
for box_idx in np.ndindex(anchor_grid.shape[:-1]): ]
box_annotation = AnnotationRect.fromarray(anchor_grid[box_idx])
calculated_iou = iou(annotation, box_annotation) gt_boxes = np.stack(
if calculated_iou >= min_iou: [np.array(a) for a in annotations], axis=0
covered_boxes += 1 ) # shape (M, 4)
break total_boxes += len(gt_boxes)
return covered_boxes / total_boxes
# Vectorized IoU calculation: (M, N)
ious = compute_ious_vectorized(gt_boxes, anchors) # shape (M, N)
# Count ground truths for which any anchor box matches min_iou
covered = (ious >= min_iou).any(axis=1).sum()
covered_boxes += covered
return covered_boxes / total_boxes if total_boxes > 0 else 0.0
def compute_ious_vectorized(boxes1, boxes2):
"""
Compute the IoU matrix between each box in boxes1 and each box in boxes2.
boxes1: (M, 4), boxes2: (N, 4) -- format [x1, y1, x2, y2]
Returns: (M, N) IoU
"""
M, N = boxes1.shape[0], boxes2.shape[0]
# Expand to (M, N, 4)
boxes1 = boxes1[:, None, :] # (M, 1, 4)
boxes2 = boxes2[None, :, :] # (1, N, 4)
# Intersection box
inter_x1 = np.maximum(boxes1[..., 0], boxes2[..., 0])
inter_y1 = np.maximum(boxes1[..., 1], boxes2[..., 1])
inter_x2 = np.minimum(boxes1[..., 2], boxes2[..., 2])
inter_y2 = np.minimum(boxes1[..., 3], boxes2[..., 3])
inter_w = np.clip(inter_x2 - inter_x1, 0, None)
inter_h = np.clip(inter_y2 - inter_y1, 0, None)
inter_area = inter_w * inter_h
area1 = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
area2 = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
union_area = area1 + area2 - inter_area
return inter_area / (union_area + 1e-6)
def draw_image_tensor_with_annotations( def draw_image_tensor_with_annotations(
@@ -205,14 +254,14 @@ def draw_positive_boxes(
def main(): def main():
anchor_grid = get_anchor_grid( anchor_grid = get_anchor_grid(
anchor_widths=[16, 32, 64, 96, 128, 144, 150, 160, 192, 224, 256], anchor_widths=[8, 16, 35, 32, 64, 96, 128, 144, 150, 160, 192, 224],
aspect_ratios=[1 / 3, 1 / 2, 3 / 5, 2 / 3, 3 / 4, 1, 4 / 3, 5 / 3, 2, 2.5, 3], aspect_ratios=[1 / 3, 1 / 2, 3 / 5, 2 / 3, 3 / 4, 1, 4 / 3, 5 / 3, 2, 2.5, 3],
num_rows=32, num_rows=28,
num_cols=32, num_cols=28,
scale_factor=20, scale_factor=8,
) )
dataloader = get_dataloader( dataloader = get_dataloader(
num_workers=6, num_workers=9,
is_train=True, is_train=True,
is_test=False, is_test=False,
batch_size=8, batch_size=8,
@@ -221,6 +270,8 @@ def main():
anchor_grid=anchor_grid, anchor_grid=anchor_grid,
) )
# print(calculate_max_coverage(dataloader, 0.7))
for img, label, img_id in islice(dataloader, 12): for img, label, img_id in islice(dataloader, 12):
draw_positive_boxes( draw_positive_boxes(
img_tensor=denormalize_image_tensor(img=img[5]), img_tensor=denormalize_image_tensor(img=img[5]),