ez-frcnn.inferencing
def inferencing.load_model(model_name, MODEL_DIR, NUM_CLASSES):
Loads a trained model from disk and prepares it for evaluation.
Inputs
model_name (str): Filename of the saved model weights.
MODEL_DIR (str): Directory path where the model files are stored.
NUM_CLASSES (int): Number of output classes for the model.
Output
nn.Module: The loaded PyTorch model set to evaluation mode on the appropriate device.
Source code in library/inferencing.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43 | def load_model(model_name, MODEL_DIR, NUM_CLASSES):
"""
Loads a trained model from disk and prepares it for evaluation.
Inputs:
model_name (str): Filename of the saved model weights.
MODEL_DIR (str): Directory path where the model files are stored.
NUM_CLASSES (int): Number of output classes for the model.
Output:
nn.Module: The loaded PyTorch model set to evaluation mode on the appropriate device.
"""
# set the computation device
modelPath = './models/' + model_name
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# load the model and the trained weights
model = create_model(num_classes=NUM_CLASSES).to(device)
model.load_state_dict(torch.load(
modelPath, map_location=device
))
model.eval()
return model
|
def inferencing.saveResultsToCSV(csvFileName, results, OUT_DIR):
Saves detection results to a CSV file with specified columns.
Inputs
csvFileName (str): Name of the CSV file (without extension) to save results.
results (list of dict): List of detection result dictionaries containing keys
'image_name', 'boxes', 'classes', and 'scores'.
OUT_DIR (str): Directory path where the CSV file will be saved.
Output
None: Writes the results to a CSV file at the specified location.
Source code in library/inferencing.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 | def saveResultsToCSV(csvFileName, results, OUT_DIR):
"""
Saves detection results to a CSV file with specified columns.
Inputs:
csvFileName (str): Name of the CSV file (without extension) to save results.
results (list of dict): List of detection result dictionaries containing keys
'image_name', 'boxes', 'classes', and 'scores'.
OUT_DIR (str): Directory path where the CSV file will be saved.
Output:
None: Writes the results to a CSV file at the specified location.
"""
csv_path = os.path.join(OUT_DIR, f"{csvFileName}.csv")
# Open CSV file and write the data
with open(csv_path, 'w', newline='') as f:
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
writer.writerow(['Image Name', 'Bounding Boxes', 'Classes', 'Scores']) # CSV Header
for result in results:
writer.writerow([result['image_name'], result['boxes'], result['classes'], result['scores']])
|
def inferencing.inference_video(DIR_TEST, OUT_DIR, vidName, model, detection_threshold, CLASSES, save_detections=False):
Runs object detection on a video, annotates detected objects frame-by-frame,
optionally saves detected regions, and writes the annotated video to disk.
Inputs
DIR_TEST (str): Path to the input video file for inference.
OUT_DIR (str): Directory where output video and detected regions (optional) will be saved.
vidName (str): Filename for the output annotated video.
model (torch.nn.Module): Trained object detection model.
detection_threshold (float): Confidence threshold for filtering detections.
CLASSES (list): List of class names corresponding to model outputs.
save_detections (bool, optional): If True, saves detected bounding box regions as separate images. Default is False.
Outputs
list: A list containing three elements for all frames:
- bboxes (list): Detected bounding boxes per frame.
- classes (list): Detected class labels per frame.
- sscores (list): Detection scores per frame.
Source code in library/inferencing.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162 | def inference_video(DIR_TEST, OUT_DIR, vidName, model, detection_threshold, CLASSES, save_detections=False):
"""
Runs object detection on a video, annotates detected objects frame-by-frame,
optionally saves detected regions, and writes the annotated video to disk.
Inputs:
DIR_TEST (str): Path to the input video file for inference.
OUT_DIR (str): Directory where output video and detected regions (optional) will be saved.
vidName (str): Filename for the output annotated video.
model (torch.nn.Module): Trained object detection model.
detection_threshold (float): Confidence threshold for filtering detections.
CLASSES (list): List of class names corresponding to model outputs.
save_detections (bool, optional): If True, saves detected bounding box regions as separate images. Default is False.
Outputs:
list: A list containing three elements for all frames:
- bboxes (list): Detected bounding boxes per frame.
- classes (list): Detected class labels per frame.
- sscores (list): Detection scores per frame.
"""
vid = cv2.VideoCapture(DIR_TEST)
property_id = int(cv2.CAP_PROP_FRAME_COUNT)
NUM_FRAMES = int(cv2.VideoCapture.get(vid, property_id))
idx = 1
frame_width = int(vid.get(3))
frame_height = int(vid.get(4))
# Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
out = cv2.VideoWriter((OUT_DIR + '/' + vidName),cv2.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
classes = [None] * NUM_FRAMES
bboxes = [None] * NUM_FRAMES
sscores = [None] * NUM_FRAMES
while vid.isOpened():
ret, image = vid.read()
orig_image = image.copy()
# BGR to RGB
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
# make the pixel range between 0 and 1
image /= 255.0
# bring color channels to front
image = np.transpose(image, (2, 0, 1)).astype(float)
# convert to tensor
image = torch.tensor(image, dtype=torch.float).cuda()
# add batch dimension
image = torch.unsqueeze(image, 0)
with torch.no_grad():
outputs = model(image)
# load all detection to CPU for further operations
outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
# carry further only if there are detected boxes
if len(outputs[0]['boxes']) != 0:
boxes = outputs[0]['boxes'].data.numpy()
scores = outputs[0]['scores'].data.numpy()
sscores[idx] = scores
# filter out boxes according to `detection_threshold`
boxes = boxes[scores >= detection_threshold].astype(np.int32)
bboxes[idx] = boxes
draw_boxes = bboxes[idx].copy()
# get all the predicited class names
pred_classes = [CLASSES[i] for i in outputs[0]['labels'].cpu().numpy()]
pred_classes = np.array(pred_classes)
pred_classes = pred_classes[scores >= detection_threshold]
classes[idx] = pred_classes
if (save_detections):
for j, box in enumerate(draw_boxes):
# Extract and save each detected region
detected_region = orig_image[box[1]:box[3], box[0]:box[2]]
region_save_path = f"{OUT_DIR}/frame_{idx:04d}_box_{j:02d}.png"
cv2.imwrite(region_save_path, detected_region)
# draw the bounding boxes and write the class name on top of it
for j, box in enumerate(draw_boxes):
cv2.rectangle(orig_image,
(int(box[0]), int(box[1])),
(int(box[2]), int(box[3])),
(0, 0, 255), 2)
cv2.putText(orig_image, str(pred_classes[j]),
(int(box[0]), int(box[1]-5)),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0),
2, lineType=cv2.LINE_AA)
out.write(orig_image)
idx += 1
print(f"Image {idx+1} done...")
print('-'*50)
if idx == NUM_FRAMES:
vid.release()
out.release()
print('TEST PREDICTIONS COMPLETE')
return [bboxes, classes, sscores]
|
def inferencing.inference_images(DIR_TEST, model, OUT_DIR, detection_threshold, CLASSES, tqdmBar, inf_fig):
Performs object detection on all images in a specified directory, annotates and saves the results,
and records detection details for further analysis.
Inputs
DIR_TEST (str): Path to the directory containing input images.
model (torch.nn.Module): Trained object detection model.
OUT_DIR (str): Directory where annotated images and results CSV will be saved.
detection_threshold (float): Confidence threshold for filtering detections.
CLASSES (list): List of class names corresponding to model output labels.
tqdmBar (callable): Progress bar function for iterating over images.
inf_fig (object): Visualization object used to display annotated images.
Outputs
list: A list of dictionaries, each containing:
- 'image_name' (str): Filename of the image.
- 'boxes' (list): Detected bounding boxes as lists of coordinates.
- 'classes' (list): Predicted class labels.
- 'scores' (list): Confidence scores for detections.
Source code in library/inferencing.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273 | def inference_images(DIR_TEST, model, OUT_DIR, detection_threshold, CLASSES, tqdmBar, inf_fig):
"""
Performs object detection on all images in a specified directory, annotates and saves the results,
and records detection details for further analysis.
Inputs:
DIR_TEST (str): Path to the directory containing input images.
model (torch.nn.Module): Trained object detection model.
OUT_DIR (str): Directory where annotated images and results CSV will be saved.
detection_threshold (float): Confidence threshold for filtering detections.
CLASSES (list): List of class names corresponding to model output labels.
tqdmBar (callable): Progress bar function for iterating over images.
inf_fig (object): Visualization object used to display annotated images.
Outputs:
list: A list of dictionaries, each containing:
- 'image_name' (str): Filename of the image.
- 'boxes' (list): Detected bounding boxes as lists of coordinates.
- 'classes' (list): Predicted class labels.
- 'scores' (list): Confidence scores for detections.
"""
imagePath = glob.glob(f"{DIR_TEST}/*.png")
image_extensions = ['jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'webp', 'tif']
all_extensions = image_extensions + [ext.upper() for ext in image_extensions] # Add uppercase versions
for extension in all_extensions:
imagePath.extend(glob.glob(f"{DIR_TEST}/*.{extension}"))
all_images = [image_path.split('/')[-1] for image_path in imagePath]
all_images = sorted(all_images)
num_images = len(all_images)
classes = [None] * num_images
bboxes = [None] * num_images
sscores = [None] * num_images
# List to store results for CSV
results = []
for idx in tqdmBar(range(0,num_images)):
el = all_images[idx]
orig_image = cv2.imread(DIR_TEST + '/' + el)
# BGR to RGB
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
# make the pixel range between 0 and 1
image /= 255.0
# bring color channels to front
image = np.transpose(image, (2, 0, 1)).astype(float)
# convert to tensor
if torch.cuda.is_available():
image = torch.tensor(image, dtype=torch.float).cuda()
else:
image = torch.tensor(image, dtype=torch.float)
# add batch dimension
image = torch.unsqueeze(image, 0)
with torch.no_grad():
outputs = model(image)
# load all detection to CPU for further operations
outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
# carry further only if there are detected boxes
if len(outputs[0]['boxes']) != 0:
boxes = outputs[0]['boxes'].data.numpy()
scores = outputs[0]['scores'].data.numpy()
sscores[idx] = scores[scores >= detection_threshold]
# filter out boxes according to `detection_threshold`
boxes = boxes[scores >= detection_threshold].astype(np.int32)
bboxes[idx] = boxes
draw_boxes = bboxes[idx].copy()
# get all the predicited class names
pred_classes = [CLASSES[i] for i in outputs[0]['labels'].cpu().numpy()]
pred_classes = np.array(pred_classes)
pred_classes = pred_classes[scores >= detection_threshold]
classes[idx] = pred_classes
# Store results for this image in the list
results.append({
'image_name': el,
'boxes': boxes.tolist(),
'classes': pred_classes.tolist(),
'scores': sscores[idx].tolist()
})
# draw the bounding boxes and write the class name on top of it
fig, ax = plt.subplots(1, figsize=(4,4))
ax.axis('off')
orig_image_rgb = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
plt.tight_layout()
ax.imshow(orig_image_rgb)
inf_fig.object = fig
for j, box in enumerate(draw_boxes):
cv2.rectangle(orig_image_rgb,
(int(box[0]), int(box[1])),
(int(box[2]), int(box[3])),
(255, 0, 0), 5)
cv2.putText(orig_image_rgb, str(pred_classes[j]),
(int(box[0]), int(box[1]-5)),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0),
2, lineType=cv2.LINE_AA)
writeOut = cv2.cvtColor(orig_image_rgb, cv2.COLOR_RGB2BGR)
cv2.imwrite(OUT_DIR + '/' + el, writeOut) #The 'el' filepath is broken right now (TODO: FIX)
ax.axis('off') # Remove the axis for cleaner visualization
plt.tight_layout()
ax.imshow(orig_image_rgb)
# Update the inf_fig pane with the new figure
inf_fig.object = fig
plt.close()
#print(f"Image {idx+1} done...")
#print('-'*50)
saveResultsToCSV('inference_results', results, OUT_DIR)
print('TEST PREDICTIONS COMPLETE')
return results
|
def inferencing.load_and_preprocess_image(file_path, target_size=(800, 800)):
Loads an image from disk, resizes it to a target size, converts it to RGB, normalizes pixel values,
and transforms it into a PyTorch tensor suitable for model input.
Inputs
file_path (str): Path to the input image file.
target_size (tuple): Desired output image size as (width, height). Default is (800, 800).
Outputs
tuple: A tuple containing:
- image_tensor (torch.Tensor): Preprocessed image tensor of shape (3, target_height, target_width).
- filename (str): The basename of the input image file.
- original_size (tuple): Original image dimensions as (width, height).
Source code in library/inferencing.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296 | def load_and_preprocess_image(file_path, target_size=(800, 800)):
"""
Loads an image from disk, resizes it to a target size, converts it to RGB, normalizes pixel values,
and transforms it into a PyTorch tensor suitable for model input.
Inputs:
file_path (str): Path to the input image file.
target_size (tuple): Desired output image size as (width, height). Default is (800, 800).
Outputs:
tuple: A tuple containing:
- image_tensor (torch.Tensor): Preprocessed image tensor of shape (3, target_height, target_width).
- filename (str): The basename of the input image file.
- original_size (tuple): Original image dimensions as (width, height).
"""
orig_image = cv2.imread(file_path)
orig_height, orig_width = orig_image.shape[:2]
resized_image = cv2.resize(orig_image, target_size) # Resize to fixed size
image_rgb = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
image_tensor = torch.tensor(np.transpose(image_rgb, (2, 0, 1)), dtype=torch.float)
return image_tensor, os.path.basename(file_path), (orig_width, orig_height)
|
def inferencing.scale_boxes_to_original(boxes, original_size, resized_size=(800, 800)):
Scales bounding box coordinates from a resized image back to the original image dimensions.
Inputs
boxes (array-like): Array of bounding boxes with coordinates [x_min, y_min, x_max, y_max]
relative to the resized image.
original_size (tuple): Original image size as (width, height).
resized_size (tuple): Resized image size as (width, height). Default is (800, 800).
Outputs
numpy.ndarray: Array of bounding boxes scaled to the original image size.
Source code in library/inferencing.py
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323 | def scale_boxes_to_original(boxes, original_size, resized_size=(800, 800)):
"""
Scales bounding box coordinates from a resized image back to the original image dimensions.
Inputs:
boxes (array-like): Array of bounding boxes with coordinates [x_min, y_min, x_max, y_max]
relative to the resized image.
original_size (tuple): Original image size as (width, height).
resized_size (tuple): Resized image size as (width, height). Default is (800, 800).
Outputs:
numpy.ndarray: Array of bounding boxes scaled to the original image size.
"""
orig_width, orig_height = original_size
resized_width, resized_height = resized_size
x_scale = orig_width / resized_width
y_scale = orig_height / resized_height
scaled_boxes = []
for box in boxes:
x_min, y_min, x_max, y_max = box
scaled_boxes.append([
x_min * x_scale, y_min * y_scale,
x_max * x_scale, y_max * y_scale
])
return np.array(scaled_boxes)
|
def inferencing.inference_images_fast(DIR_TEST, model, OUT_DIR, detection_threshold, CLASSES, tqdmBar, batch_size=4):
Performs batch inference on images in a directory using the provided model, with optional GPU acceleration.
Inputs
DIR_TEST (str): Directory path containing images for inference.
model (torch.nn.Module): Trained object detection model.
OUT_DIR (str): Directory path to save inference results.
detection_threshold (float): Minimum confidence score to consider a detection valid.
CLASSES (list): List of class names corresponding to model labels.
tqdmBar (iterable): Progress bar iterator for displaying progress.
batch_size (int, optional): Number of images to process per batch. Default is 4.
Outputs
list of dict: Each dict contains image filename, bounding boxes (scaled to original image size),
predicted classes, and detection scores for that image.
Source code in library/inferencing.py
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398 | def inference_images_fast(DIR_TEST, model, OUT_DIR, detection_threshold, CLASSES, tqdmBar, batch_size=4):
"""
Performs batch inference on images in a directory using the provided model, with optional GPU acceleration.
Inputs:
DIR_TEST (str): Directory path containing images for inference.
model (torch.nn.Module): Trained object detection model.
OUT_DIR (str): Directory path to save inference results.
detection_threshold (float): Minimum confidence score to consider a detection valid.
CLASSES (list): List of class names corresponding to model labels.
tqdmBar (iterable): Progress bar iterator for displaying progress.
batch_size (int, optional): Number of images to process per batch. Default is 4.
Outputs:
list of dict: Each dict contains image filename, bounding boxes (scaled to original image size),
predicted classes, and detection scores for that image.
"""
# Collect all image paths
image_extensions = ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'webp']
all_image_paths = []
for ext in image_extensions + [ext.upper() for ext in image_extensions]:
all_image_paths.extend(glob.glob(f"{DIR_TEST}/*.{ext}"))
all_image_paths = sorted(all_image_paths)
# Prepare results list for annotations
results = []
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# Process images in batches
with ThreadPoolExecutor() as executor:
for start_idx in tqdmBar(range(0, len(all_image_paths), batch_size), desc="Inference Progress"):
# Load images in parallel
batch_paths = all_image_paths[start_idx:start_idx + batch_size]
batch_data = list(executor.map(load_and_preprocess_image, batch_paths))
# Separate image tensors and filenames
images, filenames, original_sizes = zip(*batch_data)
images = torch.stack(images).to(device)
# Run inference
with torch.no_grad():
outputs = model(images)
# Process each image output
for i, output in enumerate(outputs):
scores = output['scores'].cpu().numpy()
boxes = output['boxes'][scores >= detection_threshold].cpu().numpy()
labels = output['labels'][scores >= detection_threshold].cpu().numpy()
# Scale boxes back to original image size
orig_size = original_sizes[i]
scaled_boxes = scale_boxes_to_original(boxes, orig_size)
# Store annotation results
pred_classes = [CLASSES[label] for label in labels]
result = {
'image_name': filenames[i],
'boxes': scaled_boxes.tolist(),
'classes': pred_classes,
'scores': scores[scores >= detection_threshold].tolist()
}
results.append(result)
# Save results to JSON or CSV
saveResultsToCSV('inference_results', results, OUT_DIR)
print('TEST PREDICTIONS COMPLETE')
return results
|
def inferencing.inference_images_figs(DIR_TEST, model, OUT_DIR, detection_threshold, CLASSES):
Performs inference on images in a directory using the given model, annotates detected objects with bounding boxes
and class labels, and overlays enlarged views of detected regions on the original images. Saves annotated images with
bounding boxes and enlarged detected regions overlaid to OUT_DIR.
Inputs
DIR_TEST (str): Directory path containing input images.
model (torch.nn.Module): Trained object detection model.
OUT_DIR (str): Directory path to save annotated output images.
detection_threshold (float): Minimum confidence score to consider a detection valid.
CLASSES (list): List of class names corresponding to model output labels.
Outputs
list: A list containing three elements:
- bboxes (list): Detected bounding boxes per image.
- classes (list): Predicted class labels per image.
- sscores (list): Detection scores per image.
Source code in library/inferencing.py
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498 | def inference_images_figs(DIR_TEST, model, OUT_DIR, detection_threshold, CLASSES):
"""
Performs inference on images in a directory using the given model, annotates detected objects with bounding boxes
and class labels, and overlays enlarged views of detected regions on the original images. Saves annotated images with
bounding boxes and enlarged detected regions overlaid to OUT_DIR.
Inputs:
DIR_TEST (str): Directory path containing input images.
model (torch.nn.Module): Trained object detection model.
OUT_DIR (str): Directory path to save annotated output images.
detection_threshold (float): Minimum confidence score to consider a detection valid.
CLASSES (list): List of class names corresponding to model output labels.
Outputs:
list: A list containing three elements:
- bboxes (list): Detected bounding boxes per image.
- classes (list): Predicted class labels per image.
- sscores (list): Detection scores per image.
"""
imagePath = glob.glob(f"{DIR_TEST}/*.png")
image_extensions = ['jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'webp', 'tif']
all_extensions = image_extensions + [ext.upper() for ext in image_extensions] # Add uppercase versions
for extension in all_extensions:
imagePath.extend(glob.glob(f"{DIR_TEST}/*.{extension}"))
all_images = [image_path.split('/')[-1] for image_path in imagePath]
all_images = sorted(all_images)
num_images = len(all_images)
classes = [None] * num_images
bboxes = [None] * num_images
sscores = [None] * num_images
for idx, el in enumerate(all_images):
orig_image = cv2.imread(DIR_TEST + '/' + el)
# BGR to RGB
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
# Normalize the pixel values (between 0 and 1)
image /= 255.0
# Rearrange color channels
image = np.transpose(image, (2, 0, 1)).astype(float)
# Convert to tensor
image_tensor = torch.tensor(image, dtype=torch.float).cuda() if torch.cuda.is_available() else torch.tensor(image, dtype=torch.float)
# Add batch dimension
image_tensor = torch.unsqueeze(image_tensor, 0)
with torch.no_grad():
outputs = model(image_tensor)
outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
if len(outputs[0]['boxes']) != 0:
boxes = outputs[0]['boxes'].data.numpy()
scores = outputs[0]['scores'].data.numpy()
sscores[idx] = scores[scores >= detection_threshold]
boxes = boxes[scores >= detection_threshold].astype(np.int32)
bboxes[idx] = boxes
draw_boxes = boxes.copy()
pred_classes = [CLASSES[i] for i in outputs[0]['labels'].cpu().numpy()]
pred_classes = np.array(pred_classes)
pred_classes = pred_classes[scores >= detection_threshold]
classes[idx] = pred_classes
for j, box in enumerate(draw_boxes):
x1, y1, x2, y2 = box
cv2.rectangle(orig_image, (x1, y1), (x2, y2), (0, 0, 255), 2)
cv2.putText(orig_image, str(pred_classes[j]), (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
# Extract and enlarge the detected region
detected_img = orig_image[y1:y2, x1:x2]
factor = 2 # Change factor to desired zoom
enlarged_img = cv2.resize(detected_img, None, fx=factor, fy=factor, interpolation=cv2.INTER_LINEAR)
# Calculate where to place the enlarged image on the original
eh, ew, _ = enlarged_img.shape
ex, ey = 10, 10 # Starting coordinates for the enlarged image (top left)
# Ensure the enlarged image does not go out of the bounds of the original image
if ey + eh > orig_image.shape[0]:
ey = orig_image.shape[0] - eh
if ex + ew > orig_image.shape[1]:
ex = orig_image.shape[1] - ew
# Overlay the enlarged image on the original image
orig_image[ey:ey+eh, ex:ex+ew] = enlarged_img
# Draw lines connecting the small and enlarged boxes
cv2.line(orig_image, (x1, y1), (ex, ey), (255, 0, 0), 2)
cv2.line(orig_image, (x2, y2), (ex + ew, ey + eh), (255, 0, 0), 2)
cv2.imwrite(OUT_DIR + '/' + el, orig_image) # Save the modified image
print(f"Image {idx+1} done...")
print('-'*50)
print('TEST PREDICTIONS COMPLETE')
return [bboxes, classes, sscores]
|