import fitz  # PyMuPDF

# Function to check if a color is approximately blue or pink
def is_blue_or_pink(color):
    if color is None:
        return False
    # Approximate RGB values for blue and pink
    blue_rgb = (0, 0, 1)
    pink_rgb = (1, 0.75, 0.8)
    tolerance = 0.1

    # Check if the color is close to the target (blue or pink)
    def is_close(color1, color2):
        return all(abs(c1 - c2) < tolerance for c1, c2 in zip(color1, color2))

    return is_close(color, blue_rgb), is_close(color, pink_rgb)

# Function to convert an integer color to an RGB tuple
def int_to_rgb(color_int):
    r = (color_int >> 16) & 255
    g = (color_int >> 8) & 255
    b = color_int & 255
    return (r / 255.0, g / 255.0, b / 255.0)

# Function to merge all rectangles into a single bounding box
def merge_all_rects(rects):
    if not rects:
        return None
    merged_rect = rects[0]  # Start with the first rectangle
    for rect in rects[1:]:
        merged_rect |= rect  # Expand the bounding box to include the new rectangle
    return merged_rect

# Path to the input and output PDF files
input_pdf_path = "./kanchi-big-town.pdf"
output_pdf_path = "./kanchi-big-town_filled.pdf"

# Open the PDF
pdf_document = fitz.open(input_pdf_path)

# Loop through each page of the PDF
for page_num in range(len(pdf_document)):
    page = pdf_document.load_page(page_num)
    
    # Get all text elements on the page
    text_instances = page.get_text("dict")["blocks"]

    # Collect rectangles by color
    blue_rects = []
    pink_rects = []

    # Loop through text instances and detect color
    for block in text_instances:
        if "lines" in block:
            for line in block["lines"]:
                for span in line["spans"]:
                    color = span.get("color")
                    
                    # If color is an integer, convert it to RGB manually
                    if isinstance(color, int):
                        rgb_color = int_to_rgb(color)
                    else:
                        # If color is already a tuple, normalize it to [0, 1] range
                        rgb_color = (
                            color[0] / 255.0,
                            color[1] / 255.0,
                            color[2] / 255.0,
                        )
                    
                    is_blue, is_pink = is_blue_or_pink(rgb_color)
                    
                    # Get bounding box for the text
                    bbox = fitz.Rect(*span["bbox"])
                    
                    # Group rectangles by color
                    if is_blue:
                        blue_rects.append(bbox)
                    elif is_pink:
                        pink_rects.append(bbox)

    # Merge all rectangles for blue text and fill the area
    if blue_rects:
        merged_blue = merge_all_rects(blue_rects)
        if merged_blue:
            print(f"Final Blue Polygon Coordinates: {merged_blue}")
            page.add_rect_annot(merged_blue).set_colors(fill=(1, 0.6, 0))  # Orange fill for blue text

    # Merge all rectangles for pink text and fill the area
    if pink_rects:
        merged_pink = merge_all_rects(pink_rects)
        if merged_pink:
            print(f"Final Pink Polygon Coordinates: {merged_pink}")
            page.add_rect_annot(merged_pink).set_colors(fill=(1, 0.8, 0.8))  # Light pink fill for pink text

# Save the modified PDF
pdf_document.save(output_pdf_path)

print(f"PDF with filled areas saved as {output_pdf_path}")
