import fitz  # PyMuPDF
import cv2
import numpy as np

# Step 1: Open the PDF and process each page
pdf_path = "./vaj.pdf"
doc = fitz.open(pdf_path)

# Convert hex #fefcde to HSV
yellow_rgb = np.array([254, 252, 222], dtype=np.uint8)  # RGB for #fefcde
yellow_hsv = cv2.cvtColor(np.uint8([[yellow_rgb]]), cv2.COLOR_RGB2HSV)[0][0]

# Print out the HSV value for #fefcde for verification
print(f"HSV for #fefcde: {yellow_hsv}")

# Define the HSV range for the specific yellow color #fefcde
lower_yellow = np.array([yellow_hsv[0] - 10, 30, 150])  # Adjust tolerance for hue, saturation, and value
upper_yellow = np.array([yellow_hsv[0] + 10, 100, 255])

# Define the HSV color ranges for light green
lower_green = np.array([35, 50, 50])  # Light green HSV range (adjust as needed)
upper_green = np.array([85, 255, 255])

# Iterate through all pages in the PDF
for page_num in range(doc.page_count):
    page = doc.load_page(page_num)
    
    # Convert the PDF page to a pixmap (image-like structure)
    pix = page.get_pixmap()
    img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)

    # Step 2: Convert the image to BGR format (required by OpenCV)
    if pix.n == 4:  # If the image has an alpha channel (RGBA), convert to BGR
        img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGBA2BGR)
    else:  # If the image is RGB
        img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR)

    # Step 3: Convert the image to HSV for color-based segmentation
    hsv_image = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)

    # Step 4: Create masks for light green and specific yellow areas
    mask_green = cv2.inRange(hsv_image, lower_green, upper_green)
    mask_yellow = cv2.inRange(hsv_image, lower_yellow, upper_yellow)

    # Step 5: Find contours for light green areas
    contours_green, _ = cv2.findContours(mask_green, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Step 6: Find contours for yellow areas
    contours_yellow, _ = cv2.findContours(mask_yellow, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Step 7: Draw exact contours around grouped light green areas
    if contours_green:
        cv2.drawContours(img_bgr, contours_green, -1, (0, 255, 0), 3)  # Draw green contours

    # Step 8: Draw exact contours around grouped yellow areas
    if contours_yellow:
        cv2.drawContours(img_bgr, contours_yellow, -1, (0, 255, 255), 3)  # Draw yellow contours

    # Step 9: Save the output image as PNG
    output_image_path = f"page_{page_num}_exact_grouped_contours.png"
    cv2.imwrite(output_image_path, img_bgr)

# Close the PDF
doc.close()