Skip to content

Instantly share code, notes, and snippets.

@shreejalt
Created June 22, 2020 15:37
Show Gist options
  • Save shreejalt/ad3b8c8fd8e5fba41fed5ed3f33522e4 to your computer and use it in GitHub Desktop.
Save shreejalt/ad3b8c8fd8e5fba41fed5ed3f33522e4 to your computer and use it in GitHub Desktop.
Numpy version of Bounding Box Generator from the paper: Generating Positive Bounding Boxes for Balanced Training of Object Detectors
'''
Description: Generate bounding boxes of different aspect ratio and scales given a ground truth box and IoU Threshold(NUMPY Version)
Reference Paper: Generating Positive Bounding Boxes for Balanced Training of Object Detectors - September 2019.
Link: https://arxiv.org/pdf/1909.09777.pdf
Official Github Repository Code: https://github.com/kemaloksuz/BoundingBoxGenerator
NOTE: THIS CODE IS TAKEN FROM OFFICIAL REPOSITORY MENTIONED BY THE AUTHORS IN PAPER.
CHANGES MADE: TORCH -> NUMPY CONVERSION OF THE SNIPPET
FILE NAME: kemaloksuz/BoundingBoxGenerator/mmdet/core/bbox/samplers/bounding_box_generator.py
'''
#from __future__ import division # For Python 2.X users
import numpy as np
from matplotlib import path
import sys
import copy
import os
#Configuration Lists
referenceBox = [0.3, 0.3, 0.6, 0.6]
IoULimitPrecision = 1e-5
outpath = 'contours' # Path in which contours will be written
if not os.path.exists(outpath):
os.mkdir(outpath)
VISUALIZE = True
if VISUALIZE:
import cv2
img = np.zeros([800, 800, 3], dtype=np.uint8)
def visualizeContours(box, X, Y, scales, shifts, flag, name='Q1', color=(0, 255, 0), thickness=2):
'''
Input:: box: [TLx, TYy, BRx, BRy]: 1X4, x: [x1, x2, x3, ..., xn]: 1XN, y: [y1, y2, y3, ..., yn]: 1XN
scales: [s1, s2]: 1X2, shifts: [sh1, sh2]: 1X2, name: string
Output:: None
Descripting: Visualizing polygons generated in each quadrant.
'''
global img
for x, y in zip(X, Y):
if flag:
bb = [1 - x, 1 - y, box[2], box[3]]
else:
bb = [x, y, box[2], box[3]]
sampledBox = unnormalizeBox(np.expand_dims(bb, axis=0), scales[0], shifts[0])
cv2.circle(img, (int(sampledBox[0][0]), int(sampledBox[0][1])), thickness, color, -1)
cv2.imwrite(os.path.join(outpath, name + '.jpg'), img)
def drawBox(box, color=(255, 0, 0)):
global img
cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color=color, thickness=2)
def findTopLeftPointBorders(box, IoU, boxArea, flag, scales, shifts):
x1BR = np.arange(box[0], box[2] - IoU * (box[2] - box[0]), step=IoULimitPrecision)
I = (box[2] - x1BR) * (box[3] - box[1])
y1BR = box[3] - (I / IoU - boxArea + I) / (box[2] - x1BR)
y1BL = np.arange(box[1], box[3] - (boxArea * IoU) / (box[2] - box[0]), step=IoULimitPrecision)
x1BL = box[2] - ((boxArea * IoU) / ((box[3] - y1BL)))
y1TL = np.arange(box[1], box[3] - (boxArea * IoU) / (box[2] - box[0]), step=IoULimitPrecision)
I = (box[2] - box[0]) * (box[3] - y1TL)
x1TL = box[2] - (I / IoU - boxArea + I) / (box[3] - y1TL)
inv_idx = np.arange(y1TL.shape[0] - 1, -1, -1).astype(int)
y1TL = y1TL[inv_idx]
x1TL = x1TL[inv_idx]
y1TR = np.arange((((box[3] * (IoU - 1)) + box[1]) / IoU), box[1], step=IoULimitPrecision)
x1TR = box[2] - (boxArea / (IoU * (box[3] - y1TR)))
inv_idx = np.arange(y1TR.shape[0] - 1, -1, -1).astype(int)
y1TR = y1TR[inv_idx]
x1TR = x1TR[inv_idx]
if VISUALIZE:
box_copy = box
if flag:
box_copy = [1 - box[0], 1 - box[1], 1 - box[2], 1 - box[3]]
unnormBox = unnormalizeBox(np.expand_dims(box_copy, axis=0), scales[0], shifts[0])[0].astype('int')
drawBox(unnormBox)
visualizeContours(box, x1BR, y1BR, scales, shifts, flag, name='TLQ1', color=(0, 0, 255))
visualizeContours(box, x1BL, y1BL, scales, shifts, flag, name='TLQ2', color=(255, 0, 255))
visualizeContours(box, x1TL, y1TL, scales, shifts, flag, name='TLQ3', color=(255, 255, 0))
visualizeContours(box, x1TR, y1TR, scales, shifts, flag, name='TLQ4')
x1 = np.concatenate((x1TR, x1BR, x1BL, x1TL))
y1 = np.concatenate((y1TR, y1BR, y1BL, y1TL))
P = np.concatenate((np.expand_dims(x1, axis=1), np.expand_dims((1 - y1), axis=1)), axis=1)
return P
def findBottomRightMaxBorders(box, IoU, boxArea, proposedx1, proposedy1):
xA = np.maximum(proposedx1, box[0])
yA = np.maximum(proposedy1, box[1])
xB = box[2]
yB = box[3]
I = np.clip(xB - xA, a_min=0, a_max=sys.maxsize) * np.clip(yB - yA, a_min=0, a_max=sys.maxsize)
limitLeftX = IoU * boxArea + xA * IoU * (box[3] - yA) + xA * (box[3] - yA) - IoU * proposedx1 * (box[3] - proposedy1)
limitLeftX /= ((IoU + 1) * (box[3] - yA) - IoU * (box[3] - proposedy1))
limitRightX = (I / IoU - boxArea + I) / (box[3] - proposedy1)
limitRightX += proposedx1
limitTopY = IoU * boxArea + IoU * (box[2] - xA) * yA + yA * (box[2] - xA) - IoU * proposedy1 * (box[2] - proposedx1)
limitTopY /= ((IoU + 1) * (box[2] - xA) - IoU * (box[2] - proposedx1))
limitBottomY = (I / IoU - boxArea + I) / (box[2] - proposedx1)
limitBottomY += proposedy1
return limitLeftX, limitRightX, limitTopY, limitBottomY
def findBottomRightBorders(box, IoU, boxArea, proposedx1, proposedy1, limitLeftX, limitRightX, limitTopY, limitBottomY, scales, shifts, flag):
xA = np.maximum(proposedx1, box[0])
yA = np.maximum(proposedy1, box[1])
xB = box[2]
yB = box[3]
I = np.clip(xB - xA, a_min=0, a_max=sys.maxsize) * np.clip(yB - yA, a_min=0, a_max=sys.maxsize)
y2TR = np.arange(limitTopY, box[3] + IoULimitPrecision, step=IoULimitPrecision)
yBnew = np.minimum(y2TR, box[3])
Inew = np.clip(xB - xA, a_min=0, a_max=sys.maxsize) * np.clip(yBnew - yA, a_min=0, a_max=sys.maxsize)
x2TR = (Inew / IoU - boxArea + Inew) / (y2TR - proposedy1)
x2TR += proposedx1
x2BR = np.arange(limitRightX, box[2] - IoULimitPrecision, step=-IoULimitPrecision)
y2BR = (I / IoU - boxArea + I) / (x2BR - proposedx1)
y2BR += proposedy1
y2BL = np.arange(limitBottomY, box[3] - IoULimitPrecision, step=-IoULimitPrecision)
yBnew = np.minimum(y2BL, box[3])
x2BL = IoU * boxArea + xA * IoU * (yBnew - yA) + xA * (yBnew - yA) - IoU * proposedx1 * (y2BL - proposedy1)
x2BL /= ((IoU + 1) * (yBnew - yA) - IoU * (y2BL - proposedy1))
x2TL = np.arange(limitLeftX, box[2] + IoULimitPrecision, step=IoULimitPrecision)
xBnew = np.minimum(x2TL, box[2])
y2TL = IoU * boxArea + IoU * (xBnew - xA) * yA + yA * (xBnew - xA) - IoU * proposedy1 * (x2TL - proposedx1)
y2TL /= ((IoU + 1) * (xBnew - xA) - IoU * (x2TL - proposedx1))
if VISUALIZE:
visualizeContours(box, x2TR, y2TR, scales, shifts, flag, name='BRQ1')
visualizeContours(box, x2BR, y2BR, scales, shifts, flag, name='BRQ2', color=(0, 0, 255))
visualizeContours(box, x2BL, y2BL, scales, shifts, flag, name='BRQ3', color=(255, 0, 255))
visualizeContours(box, x2TL, y2TL, scales, shifts, flag, name='BRQ4', color=(255, 255, 0))
x2 = np.concatenate((x2TR, x2BR, x2BL, x2TL))
y2 = np.concatenate((y2TR, y2BR, y2BL, y2TL))
bottomRightBorders = np.concatenate((np.expand_dims(x2, axis=1), np.expand_dims((1 - y2), axis=1)), axis=1)
return bottomRightBorders
def samplePolygon(P, box, scales, shifts, flag, name):
maxX = np.max(P[:, 0])
maxY = np.max(1 - P[:, 1])
minX = np.min(P[:, 0])
minY = np.min(1 - P[:, 1])
inpoly = 0
while inpoly == 0:
proposedx1, proposedy1 = sampleRectangle([minX, minY, maxX, maxY])
p = path.Path(P)
if p.contains_point([proposedx1, 1 - proposedy1]):
if VISUALIZE:
visualizeContours(box, [proposedx1], [proposedy1], scales, shifts, flag, name=name, color=(255, 255, 255), thickness=6)
inpoly = 1
return (proposedx1, proposedy1)
def sampleRectangle(B, numSamples=1):
x = np.random.rand(numSamples) * (B[2] - B[0]) + B[0]
y = np.random.rand(numSamples) * (B[3] - B[1]) + B[1]
return (x, y)
def unnormalizeBox(bbox, scales, shifts):
bb = copy.deepcopy(bbox)
bb -= referenceBox[0]
bb[:, [0, 2]] = bb[:, [0, 2]] * scales[0] + shifts[0]
bb[:, [1, 3]] = bb[:, [1, 3]] * scales[1] + shifts[1]
return bb
def normalizeBox(bbox):
bb = copy.deepcopy(bbox)
shifts = bb[:, [0, 1]]
scales = (np.concatenate((np.expand_dims((bb[:, 2] - bb[:, 0]), axis=1), np.expand_dims((bb[:, 3] - bb[:, 1]), axis=1)), axis=1)) / (referenceBox[2] - referenceBox[0])
bb[:, [0, 2]] = (bb[:, [0, 2]] - np.expand_dims(shifts[:, 0], axis=1)) / np.expand_dims(scales[:, 0], axis=1) + referenceBox[0]
bb[:, [1, 3]] = (bb[:, [1, 3]] - np.expand_dims(shifts[:, 1], axis=1)) / np.expand_dims(scales[:, 1], axis=1) + referenceBox[1]
return bb, scales, shifts
def runVecIoU(bboxes1, bboxes2):
'''
Input: bboxes1[Ground Truths/Generated Boxes] [NX4] and bboxes2[Ground Truth/Generated Boxes] [MX4]
Input format of boxes should be [TLx, TLy, BRx, BRy]
Output: IoU [N X M] matrix containing N --> M IoUs
Purpose: Used for DEBUG
'''
x11, y11, x12, y12 = np.split(bboxes1, 4, axis=1)
x21, y21, x22, y22 = np.split(bboxes2, 4, axis=1)
xA = np.maximum(x11, np.transpose(x21))
yA = np.maximum(y11, np.transpose(y21))
xB = np.minimum(x12, np.transpose(x22))
yB = np.minimum(y12, np.transpose(y22))
interArea = np.maximum((xB - xA + 1), 0) * np.maximum((yB - yA + 1), 0)
boxAArea = (x12 - x11 + 1) * (y12 - y11 + 1)
boxBArea = (x22 - x21 + 1) * (y22 - y21 + 1)
iou = interArea.astype(float) / (boxAArea + np.transpose(boxBArea) - interArea)
return iou
def generateBoundingBox(bbox, IoUs, imageSize):
'''
MAIN FUNCTION CALL
Input: bbox: NX4 - [[TLx, TLy, BRx, BRy], [TLx, TLy, BRx, BRy], ...] | IoUs: List of IoU(Length L) | imageSize: Dimensions: List[width, height]
Output: Dictionary of generated bounding boxes: Key = Index of the Ground Truth, Value = Generated Bounding Boxes
Description: This snippet will generate artificial bounding boxes given with the ground truths. For each GT, it will generate numBoxes, where numBoxes is the total number
of IoUs specified. For every given IoU, it will generate one box. So if the number of IoUs specified are 10 then it will generate 10 boxes for each GT.
'''
numBoxes = IoUs.shape[0]
finalBoxes = dict()
for idx, bb in enumerate(bbox):
normalizedBox, scales, shifts = normalizeBox(np.expand_dims(bb, axis=0))
normalizedBox = np.squeeze(normalizedBox)
sampledBox = np.empty(shape=(numBoxes, 4), dtype=np.float32)
sampledBox.fill(-1)
boxArea = (normalizedBox[2] - normalizedBox[0]) * (normalizedBox[3] - normalizedBox[1])
bbTemp = normalizedBox
for i, IoU in enumerate(IoUs):
if np.random.uniform() < 0.5: # TRandomly take TL or BR coordinates for making box coordinates spaces.
flag = 1
normalizedBox = np.array([1 - bbTemp[2], 1 - bbTemp[3], 1 - bbTemp[0], 1 - bbTemp[1]])
else:
flag = 0
normalizedBox = bbTemp
# Finding the space of Top Left Coordinates
topLeftBorders = findTopLeftPointBorders(normalizedBox, IoU, boxArea, flag, scales, shifts)
sampledBox[i, 0], sampledBox[i, 1] = samplePolygon(topLeftBorders, normalizedBox, scales, shifts, flag, name='TLSP')
# Given Top Left Coordinates find the space for Bottom Right Coordinates
limitLeftX, limitRightX, limitTopY, limitBottomY = findBottomRightMaxBorders(normalizedBox, IoU, boxArea, sampledBox[i, 0], sampledBox[i, 1])
bottomRightBorders = findBottomRightBorders(normalizedBox, IoU, boxArea, sampledBox[i, 0], sampledBox[i, 1], limitLeftX, limitRightX, limitTopY, limitBottomY, scales, shifts, flag)
sampledBox[i, 2], sampledBox[i, 3] = samplePolygon(bottomRightBorders, normalizedBox, scales, shifts, flag, name='BRSP')
if flag == 1: # If bottom right coordinates where taken into conisderation
sampledBox[i, :] = np.array([1 - sampledBox[i, 2], 1 - sampledBox[i, 3], 1 - sampledBox[i, 0], 1 - sampledBox[i, 1]])
sampledBox[i] = unnormalizeBox(np.expand_dims(sampledBox[i], axis=0), scales[0], shifts[0])
sampledBox[i, [0, 2]] = np.clip(sampledBox[i, [0, 2]], 0, imageSize[0])
sampledBox[i, [1, 3]] = np.clip(sampledBox[i, [1, 3]], 0, imageSize[1])
finalBoxes[idx] = sampledBox
finalBoxes = np.array(list(finalBoxes.values()))
return finalBoxes.reshape(finalBoxes.shape[0] * finalBoxes.shape[1], finalBoxes.shape[2])
#Test Bench: SUCCESS
if __name__ == '__main__':
image_size = [800, 800]
bbox = np.array([[100., 200., 200., 400.]])
IoUs = np.array([0.6])
finalBoxes = generateBoundingBox(bbox, IoUs, image_size)
iou = runVecIoU(bbox, finalBoxes)
print('Final Boxes:-')
print(finalBoxes)
print('IoU Matrix:-')
print(iou)
if VISUALIZE:
for box in finalBoxes:
drawBox(box.astype('int'),color=(0, 0, 255))
cv2.imwrite(os.path.join(outpath, 'out.jpg'), img)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment