""" Bounding box clustering module for grouping nearby text elements. Copied from the original with minimal modifications for HuggingFace Spaces. """ from math import sqrt, dist import extcolors from py_files.pycolor import find class Node: def __init__(self, data=None, x1=None, y1=None, x2=None, y2=None, font_size=None, children=None): if children is None: children = [] self.data = data self.top_left_x = min(int(x1), int(x2)) if x1 is not None and x2 is not None else None self.top_left_y = min(int(y1), int(y2)) if y1 is not None and y2 is not None else None self.bottom_right_x = max(int(x1), int(x2)) if x1 is not None and x2 is not None else None self.bottom_right_y = max(int(y1), int(y2)) if y1 is not None and y2 is not None else None self.font_size = abs(self.bottom_right_y - self.top_left_y) if not font_size else font_size self.children = children def add_child(self, child): child_top_left_x = child.top_left_x child_top_left_y = child.top_left_y child_bottom_right_x = child.bottom_right_x child_bottom_right_y = child.bottom_right_y self.top_left_x = min(self.top_left_x, child_top_left_x) self.top_left_y = min(self.top_left_y, child_top_left_y) self.bottom_right_x = max(self.bottom_right_x, child_bottom_right_x) self.bottom_right_y = max(self.bottom_right_y, child_bottom_right_y) self.data += " " + child.data self.children.append(child) self.font_size = (max(self.font_size, 0) + max(child.font_size, 0)) // 2 def get_data(self): return self.data def __repr__(self): return f'Node({self.data}, size: {self.font_size}, {self.top_left_x}, {self.top_left_y}, {self.bottom_right_x}, {self.bottom_right_y}, {self.children})' class ImageNode(Node): def __init__(self, category=None, top_left_x=None, top_left_y=None, bottom_right_x=None, bottom_right_y=None, text=""): super().__init__(data=None, top_left_x=top_left_x, top_left_y=top_left_y, bottom_right_x=bottom_right_x, bottom_right_y=bottom_right_y) self.category = category self.text = text def get_boundaries(self): return self.top_left_x, self.top_left_y, self.bottom_right_x, self.bottom_right_y def get_text(self): return self.text def set_text(self, text): self.text = text def get_category(self): return self.category def __repr__(self): return f'ImageNode({self.text}, {self.top_left_x}, {self.top_left_y}, {self.bottom_right_x}, {self.bottom_right_y})' def distance_between_parallel_lines(pt1, pt2, pt3, pt4): x1, y1 = pt1 x2, y2 = pt2 x3, y3 = pt3 numerator = abs((y2 - y1) * x3 - (x2 - x1) * y3 + x2 * y1 - y2 * x1) denominator = sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2) if denominator == 0: return None return numerator / denominator def create_node_lines(node): top_left = (node.top_left_x, node.top_left_y) top_right = (node.bottom_right_x, node.top_left_y) bottom_left = (node.top_left_x, node.bottom_right_y) bottom_right = (node.bottom_right_x, node.bottom_right_y) return { "left_line": [top_left, bottom_left], "right_line": [top_right, bottom_right], "top_line": [top_left, top_right], "bottom_line": [bottom_left, bottom_right], } def rect_distance(x1, y1, x1b, y1b, x2, y2, x2b, y2b): left = x2b < x1 right = x1b < x2 bottom = y2b < y1 top = y1b < y2 if top and left: return dist((x1, y1b), (x2b, y2)) elif left and bottom: return dist((x1, y1), (x2b, y2b)) elif bottom and right: return dist((x1b, y1), (x2, y2b)) elif right and top: return dist((x1b, y1b), (x2, y2)) elif left: return x1 - x2b elif right: return x2 - x1b elif bottom: return y1 - y2b elif top: return y2 - y1b else: # rectangles intersect return 0 def distance_between_nodes(node1, node2): return rect_distance(node1.top_left_x, node1.top_left_y, node1.bottom_right_x, node1.bottom_right_y, node2.top_left_x, node2.top_left_y, node2.bottom_right_x, node2.bottom_right_y) class QuadTree: def __init__(self, root=None, max_dist=5): self.root = root if isinstance(root, Node) else Node(None, 0, 0, 0, 0) self.max_dist = max_dist def insert(self, node: Node, parent=None): if parent is None: parent = self.root if len(parent.children) == 0: parent.children.append(node) return min_dist = float('inf') min_child = None for child in parent.children: _dist = distance_between_nodes(node, child) if _dist < min_dist: min_dist = _dist min_child = child if min_dist <= self.max_dist: min_child.add_child(node) return True else: parent.children.append(node) return False def get_root(self): return self.root def get_children(self, data=False): boxes = [] datas = [] for child in self.root.children: boxes.append([child.top_left_x, child.top_left_y, child.bottom_right_x, child.bottom_right_y]) datas.append(child.data) if data: return datas, boxes return boxes def get_children_nodes(self): return self.root.children def get_dataframe(self, image): import pandas as pd def custom_sort(row): x, y = row return (y, x) data = [] for child in self.root.children: x1, y1, x2, y2 = child.top_left_x, child.top_left_y, child.bottom_right_x + 3, child.bottom_right_y + 3 try: img_crop = image.crop((x1, y1, x2, y2)) colors, pixel_count = extcolors.extract_from_image(img_crop) # Get background and font colors bg_color = colors[0][0] if colors else (255, 255, 255) font_color = colors[-1][0] if len(colors) > 1 else (0, 0, 0) data.append([ child.data, (child.top_left_x, child.top_left_y), (child.bottom_right_x, child.bottom_right_y), child.font_size, f"{find(bg_color)}, (RGB: {bg_color[0]}, {bg_color[1]}, {bg_color[2]})", f"{find(font_color)}, (RGB: {font_color[0]}, {font_color[1]}, {font_color[2]})" ]) except Exception as e: # Fallback for color extraction errors data.append([ child.data, (child.top_left_x, child.top_left_y), (child.bottom_right_x, child.bottom_right_y), child.font_size, "white, (RGB: 255, 255, 255)", "black, (RGB: 0, 0, 0)" ]) df = pd.DataFrame(data, columns=["Text", "Top Co-ordinates", "Bottom Co-ordinates", "Font Size", "Background Color", "Font Color"]) df = df.sort_values(by="Top Co-ordinates", key=lambda x: x.apply(custom_sort)) return df def __repr__(self): repr = "" for c in self.root.children: repr += f'Node({c.data}, size: {c.font_size}, {c.top_left_x}, {c.top_left_y}, {c.bottom_right_x}, {c.bottom_right_y})\n' return repr