import pdfplumber

# Function to group shapes by color
def group_by_color(objects, shape_type):
    color_groups = {}

    # Check each shape and group by color
    for shape in objects.get(shape_type, []):
        color = tuple(shape.get('non_stroking_color', ()))  # Get color tuple (or empty if not found)

        # If color already exists, append the shape; otherwise, create a new list for this color
        if color in color_groups:
            color_groups[color].append(shape)
        else:
            color_groups[color] = [shape]

    return color_groups

# Function to group smaller curves into a single bounding box
def group_curves_into_single(curves):
    if not curves:
        return None

    # Initialize with extreme values
    min_x0 = min_y0 = float('inf')
    max_x1 = max_y1 = float('-inf')

    # Loop through each curve to find the minimum and maximum coordinates
    for curve in curves:
        min_x0 = min(min_x0, curve['x0'])
        min_y0 = min(min_y0, curve['y0'])
        max_x1 = max(max_x1, curve['x1'])
        max_y1 = max(max_y1, curve['y1'])

    # Return the new bounding box that encompasses all curves
    return {
        'x0': min_x0,
        'y0': min_y0,
        'x1': max_x1,
        'y1': max_y1
    }

# Function to check if a color is approximately blue or pink
def is_blue_or_pink(color):
    if color is None:
        return False

    # Approximate RGB values for blue and pink
    blue_rgb = (0, 0, 1)
    pink_rgb = (1, 0.75, 0.8)

    # Allow for small variations in color
    tolerance = 0.1

    def is_close(color1, color2):
        return all(abs(c1 - c2) < tolerance for c1, c2 in zip(color1, color2))

    return is_close(color, blue_rgb) or is_close(color, pink_rgb)

# Open the PDF file
with pdfplumber.open("./FINAL.pdf") as pdf:
    # Loop through pages
    for page_num, page in enumerate(pdf.pages):
        print(f"Page {page_num+1}")

        # Extract vector elements and group by color
        objects = page.objects

        # Extract text elements to find blue and pink text
        text_elements = page.chars
        blue_pink_text_positions = []

        for text in text_elements:
            color = text.get('non_stroking_color')  # Handle if 'non_stroking_color' is None
            if color is not None:
                color = tuple(color)  # Ensure the color is a tuple
                if is_blue_or_pink(color):
                    blue_pink_text_positions.append({
                        'text': text['text'],
                        'x0': text['x0'],
                        'top': text['top'],
                        'x1': text['x1'],
                        'bottom': text['bottom'],
                        'color': color
                    })

        # Group rectangles by color
        rect_groups = group_by_color(objects, 'rects')
        # Group curves by color
        curve_groups = group_by_color(objects, 'curve')

        # Filter shapes by proximity to blue/pink text positions
        def is_shape_near_text(shape, text_positions):
            for text in text_positions:
                if (text['x0'] <= shape['x1'] and text['x1'] >= shape['x0'] and
                    text['top'] <= shape['bottom'] and text['bottom'] >= shape['top']):
                    return True
            return False

        # Print grouped rectangles near blue/pink text
        print("\nGrouped Rectangles near Blue/Pink Text:")
        for color, shapes in rect_groups.items():
            print(f"Color: {color} | Number of Rectangles: {len(shapes)}")
            for rect in shapes:
                if is_shape_near_text(rect, blue_pink_text_positions):
                    print(f"Position: ({rect['x0']}, {rect['top']}) to ({rect['x1']}, {rect['bottom']})")

        # Print grouped curves near blue/pink text
        print("\nGrouped Curves near Blue/Pink Text:")
        for color, curves in curve_groups.items():
            print(f"Color: {color} | Number of Curves: {len(curves)}")

            # Group smaller curves into a single bounding box
            filtered_curves = [curve for curve in curves if is_shape_near_text(curve, blue_pink_text_positions)]
            grouped_curve = group_curves_into_single(filtered_curves)
            if grouped_curve:
                print(f"Grouped Curve bounding box: ({grouped_curve['x0']}, {grouped_curve['y0']}) to ({grouped_curve['x1']}, {grouped_curve['y1']})")
