API details.

bbox_iou[source]

bbox_iou(boxA, boxB)

import random
import numpy as np
from detection_nbdev.utils import random_bbox, visualize

bboxA = random_bbox()
bboxB = random_bbox()
iou = bbox_iou(bboxA, bboxB)

print('label:', bboxA)
print('bbox:', bboxB)
print('IoU:', iou)

visualize(
    np.zeros((224,224,3)),
    [bboxA, bboxB],
    [0, 1],
    {0:'bboxA', 1:'bboxB'},
)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
label: tensor([116, 133, 197, 189])
bbox: tensor([36, 41, 79, 87])
IoU: 0.0

class SingleBoxDetector[source]

SingleBoxDetector(model=None, pretrained=False, freeze_extractor=False, log_level=10, num_classes=None, weight_path=None) :: LightningModule

Helper class that provides a standard way to create an ABC using inheritance.

class TorchVisionDetector[source]

TorchVisionDetector(model=None, pretrained=False, freeze_extractor=False, log_level=10, num_classes=None, weight_path=None) :: SingleBoxDetector

Helper class that provides a standard way to create an ABC using inheritance.

import numpy as np
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torchvision.transforms import ToTensor, Compose, ToPILImage, Normalize
from detection_nbdev.dataset import XMLDetectionDataModule, XMLDetectionDataset

# Setup a model
modelname = 'fasterrcnn_resnet50_fpn'
model = TorchVisionDetector(modelname, pretrained=True)

# Prepare data
data_dir = '../detection/data/'
image_transform = Compose([
    ToTensor(),
    lambda x: x.unsqueeze(0)])

dataset = XMLDetectionDataset(root='../detection/data/train', image_transform=image_transform, target_transform=None, transform=None)
img, label = dataset[0]
model.eval()
pred = model(img)
bbox = pred[0][0].detach().numpy()
iou = bbox_iou(label, bbox)

print('label:', label)
print('bbox:', bbox)
print('IoU:', iou)
label: [323, 174, 478, 324]
bbox: [320.31696 176.1747  484.72095 325.00894]
IoU: 0.9241439868482342
visualize(
    img.detach().numpy()[0].transpose(1,2,0),
    [bbox.astype(int), label],
    [0, 1],
    {0:'bboxA', 1:'bboxB'},
)
# visualize(
#     img.detach().numpy().transpose(1,2,0),
#     [bbox,label],
#     [0, 1],
#     {0:'prediction', 1:'label'},
# )
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
dm = XMLDetectionDataModule(
    data_dir, 
    image_transform=ToTensor())

dm.setup(mode='use_dir')
trainer = pl.Trainer(gpus=1, checkpoint_callback=False)
trainer.test(model, dm.test_dataloader())
# test_eq(trainer.fit(model, dm), 1)
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'iou': 0.8561266660690308}
--------------------------------------------------------------------------------
[{'iou': 0.8561266660690308}]