|
|
|
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import numpy.typing as npt |
|
|
import matplotlib.pyplot as plt |
|
|
from matplotlib.patches import Rectangle |
|
|
from typing import Dict, List, Tuple, Optional, Union |
|
|
|
|
|
|
|
|
COLORS = [ |
|
|
"#003EFF", |
|
|
"#FF8F00", |
|
|
"#079700", |
|
|
"#A123FF", |
|
|
"#87CEEB", |
|
|
"#FF5733", |
|
|
"#C70039", |
|
|
"#900C3F", |
|
|
"#581845", |
|
|
"#11998E", |
|
|
] |
|
|
|
|
|
|
|
|
def reformat_for_plotting( |
|
|
boxes: npt.NDArray[np.float64], |
|
|
labels: npt.NDArray[np.int_], |
|
|
scores: npt.NDArray[np.float64], |
|
|
shape: Tuple[int, int, int], |
|
|
num_classes: int, |
|
|
) -> Tuple[List[npt.NDArray[np.int_]], List[npt.NDArray[np.float64]]]: |
|
|
""" |
|
|
Reformat YOLOX predictions for plotting. |
|
|
|
|
|
Args: |
|
|
boxes (np.ndarray): Array of bounding boxes. |
|
|
labels (np.ndarray): Array of labels. |
|
|
scores (np.ndarray): Array of confidence scores. |
|
|
shape (tuple): Shape of the image. |
|
|
num_classes (int): Number of classes. |
|
|
|
|
|
Returns: |
|
|
list[np.ndarray]: List of box bounding boxes per class. |
|
|
list[np.ndarray]: List of confidence scores per class. |
|
|
""" |
|
|
boxes_plot = boxes.copy() |
|
|
boxes_plot[:, [0, 2]] *= shape[1] |
|
|
boxes_plot[:, [1, 3]] *= shape[0] |
|
|
boxes_plot = boxes_plot.astype(int) |
|
|
boxes_plot[:, 2] -= boxes_plot[:, 0] |
|
|
boxes_plot[:, 3] -= boxes_plot[:, 1] |
|
|
boxes_plot = [boxes_plot[labels == c] for c in range(num_classes)] |
|
|
confs = [scores[labels == c] for c in range(num_classes)] |
|
|
return boxes_plot, confs |
|
|
|
|
|
|
|
|
def plot_sample( |
|
|
img: npt.NDArray[np.uint8], |
|
|
boxes_list: List[npt.NDArray[np.int_]], |
|
|
confs_list: List[npt.NDArray[np.float64]], |
|
|
labels: List[str], |
|
|
) -> None: |
|
|
""" |
|
|
Plots an image with bounding boxes. |
|
|
Coordinates are expected in format [x_min, y_min, width, height]. |
|
|
|
|
|
Args: |
|
|
img (numpy.ndarray): The input image to be plotted. |
|
|
boxes_list (list[np.ndarray]): List of box bounding boxes per class. |
|
|
confs_list (list[np.ndarray]): List of confidence scores per class. |
|
|
labels (list): List of class labels. |
|
|
""" |
|
|
plt.imshow(img, cmap="gray") |
|
|
plt.axis(False) |
|
|
|
|
|
for boxes, confs, col, l in zip(boxes_list, confs_list, COLORS, labels): |
|
|
for box_idx, box in enumerate(boxes): |
|
|
|
|
|
h, w, _ = img.shape |
|
|
box = np.copy(box) |
|
|
box[:2] = np.clip(box[:2], 2, max(h, w)) |
|
|
box[2] = min(box[2], w - 2 - box[0]) |
|
|
box[3] = min(box[3], h - 2 - box[1]) |
|
|
|
|
|
rect = Rectangle( |
|
|
(box[0], box[1]), |
|
|
box[2], |
|
|
box[3], |
|
|
linewidth=2, |
|
|
facecolor="none", |
|
|
edgecolor=col, |
|
|
) |
|
|
plt.gca().add_patch(rect) |
|
|
|
|
|
|
|
|
plt.text( |
|
|
box[0], box[1], |
|
|
f"{l}_{box_idx} conf={confs[box_idx]:.3f}", |
|
|
color='white', |
|
|
fontsize=8, |
|
|
bbox=dict(facecolor=col, alpha=1, edgecolor=col, pad=0, linewidth=2), |
|
|
verticalalignment='bottom', |
|
|
horizontalalignment='left' |
|
|
) |
|
|
|
|
|
|
|
|
def reorder_boxes( |
|
|
boxes: npt.NDArray[np.float64], |
|
|
labels: npt.NDArray[np.int_], |
|
|
classes: Optional[List[str]] = None, |
|
|
scores: Optional[npt.NDArray[np.float64]] = None, |
|
|
) -> Union[ |
|
|
Tuple[npt.NDArray[np.float64], npt.NDArray[np.int_]], |
|
|
Tuple[npt.NDArray[np.float64], npt.NDArray[np.int_], npt.NDArray[np.float64]], |
|
|
]: |
|
|
""" |
|
|
Reorder boxes, labels and scores by box coordinates. |
|
|
Ordering depends on the class. |
|
|
|
|
|
Args: |
|
|
boxes (np.ndarray): Array of bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]. |
|
|
labels (np.ndarray): Array of labels of shape (N,). |
|
|
classes (list, optional): List of class labels. Defaults to None. |
|
|
scores (np.ndarray, optional): Array of confidences of shape (N,). Defaults to None. |
|
|
|
|
|
Returns: |
|
|
np.ndarray [N, 4]: Ordered boxes. |
|
|
np.ndarray [N]: Ordered labels. |
|
|
np.ndarray [N]: Ordered scores if scores is not None. |
|
|
""" |
|
|
n_classes = labels.max() if classes is None else len(classes) |
|
|
classes = labels.unique() if classes is None else classes |
|
|
|
|
|
ordered_boxes, ordered_labels, ordered_scores = [], [], [] |
|
|
for c in range(n_classes): |
|
|
boxes_class = boxes[labels == c] |
|
|
if len(boxes_class): |
|
|
|
|
|
sort = ["y0", "x0"] |
|
|
ascending = [True, True] |
|
|
if classes[c] == "ylabel": |
|
|
ascending = [False, True] |
|
|
elif classes[c] == "y_title": |
|
|
sort = ["x0", "y0"] |
|
|
ascending = [True, False] |
|
|
|
|
|
df_coords = pd.DataFrame({ |
|
|
"y0": np.round(boxes_class[:, 1] - boxes_class[:, 1].min(), 2), |
|
|
"x0": np.round(boxes_class[:, 0] - boxes_class[:, 0].min(), 2), |
|
|
}) |
|
|
|
|
|
idxs = df_coords.sort_values(sort, ascending=ascending).index |
|
|
|
|
|
ordered_boxes.append(boxes_class[idxs]) |
|
|
ordered_labels.append(labels[labels == c][idxs]) |
|
|
|
|
|
if scores is not None: |
|
|
ordered_scores.append(scores[labels == c][idxs]) |
|
|
|
|
|
ordered_boxes = np.concatenate(ordered_boxes) |
|
|
ordered_labels = np.concatenate(ordered_labels) |
|
|
if scores is not None: |
|
|
ordered_scores = np.concatenate(ordered_scores) |
|
|
return ordered_boxes, ordered_labels, ordered_scores |
|
|
return ordered_boxes, ordered_labels |
|
|
|
|
|
|
|
|
def postprocess_preds_graphic_element( |
|
|
preds: Dict[str, npt.NDArray], |
|
|
threshold: float = 0.1, |
|
|
class_labels: Optional[List[str]] = None, |
|
|
reorder: bool = True, |
|
|
) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.int_], npt.NDArray[np.float64]]: |
|
|
""" |
|
|
Post process predictions for the page element task. |
|
|
- Applies thresholding |
|
|
- Reorders boxes using the reading order |
|
|
|
|
|
Args: |
|
|
preds (dict): Predictions. Keys are "scores", "boxes", "labels". |
|
|
threshold (float, optional): Threshold for the confidence scores. Defaults to 0.1. |
|
|
class_labels (list, optional): List of class labels. Defaults to None. |
|
|
reorder (bool, optional): Whether to apply reordering. Defaults to True. |
|
|
|
|
|
Returns: |
|
|
numpy.ndarray [N x 4]: Array of bounding boxes. |
|
|
numpy.ndarray [N]: Array of labels. |
|
|
numpy.ndarray [N]: Array of scores. |
|
|
""" |
|
|
boxes = preds["boxes"].cpu().numpy() |
|
|
labels = preds["labels"].cpu().numpy() |
|
|
scores = preds["scores"].cpu().numpy() |
|
|
|
|
|
|
|
|
boxes = boxes[scores > threshold] |
|
|
labels = labels[scores > threshold] |
|
|
scores = scores[scores > threshold] |
|
|
|
|
|
if len(boxes) > 0 and reorder: |
|
|
boxes, labels, scores = reorder_boxes(boxes, labels, class_labels, scores) |
|
|
|
|
|
return boxes, labels, scores |
|
|
|