# import sys
# import json
# import fitz  # PyMuPDF
# import re

# def expand_bbox(bbox, padding=2):
#     x0, y0, x1, y1 = bbox
#     return [x0 - padding, y0 - padding, x1 + padding, y1 + padding]

# def extract_plot_numbers_from_pdf(pdf_document):
#     document = fitz.open(pdf_document)
#     number_pattern = re.compile(r'\b\d+\b')
#     extracted_data = {}

#     for page_num in range(len(document)):
#         page = document.load_page(page_num)
#         blocks = page.get_text("dict")["blocks"]

#         for block in blocks:
#             for line in block["lines"]:
#                 for span in line["spans"]:
#                     text = span["text"]
#                     numbers = number_pattern.findall(text)
#                     if numbers:
#                         bbox = span["bbox"]
#                         expanded_bbox = expand_bbox(bbox)
#                         for number in numbers:
#                             if number not in extracted_data:
#                                 extracted_data[number] = []
#                             extracted_data[number].append({"page": page_num + 1, "bbox": expanded_bbox})

#     return extracted_data

# def draw_contours_on_pdf(pdf_document, output_pdf, extracted_data, margin=15):
#     document = fitz.open(pdf_document)
#     for number, data_list in extracted_data.items():
#         for data in data_list:
#             page_num = data["page"] - 1
#             bbox = data["bbox"]
#             page = document.load_page(page_num)
#             x0, y0, x1, y1 = bbox

#             # Increase the size of the rectangle by the margin
#             expanded_bbox = [x0 - margin, y0 - margin, x1 + margin, y1 + margin]
#             rect = fitz.Rect(expanded_bbox)
#             page.draw_rect(rect, color=(0.68, 0.85, 0.90), width=2)

#     document.save(output_pdf)
#     print(f"Output PDF saved to {output_pdf}")

# if __name__ == "__main__":
#     input_pdf = './MELERIPAKKAM.pdf'
#     output_pdf = './Vajralayam-Layout_with_contours.pdf'
#     extracted_data = extract_plot_numbers_from_pdf(input_pdf)
#     json_output = json.dumps(extracted_data, indent=4)
#     print(f"Extracted Data:\n{json_output}")
#     draw_contours_on_pdf(input_pdf, output_pdf, extracted_data)

import sys
import json
import fitz  # PyMuPDF
import re

def expand_bbox(bbox, padding=2):
    x0, y0, x1, y1 = bbox
    return [x0 - padding, y0 - padding, x1 + padding, y1 + padding]

def extract_plot_numbers_from_pdf(pdf_document):
    document = fitz.open(pdf_document)
    number_pattern = re.compile(r'\b\d+\b')
    extracted_data = {}

    for page_num in range(len(document)):
        page = document.load_page(page_num)
        blocks = page.get_text("dict")["blocks"]

        for block in blocks:
            for line in block["lines"]:
                for span in line["spans"]:
                    text = span["text"]
                    numbers = number_pattern.findall(text)
                    if numbers:
                        bbox = span["bbox"]
                        expanded_bbox = expand_bbox(bbox)
                        for number in numbers:
                            if number not in extracted_data:
                                extracted_data[number] = []
                            extracted_data[number].append({"page": page_num + 1, "bbox": expanded_bbox})

    return extracted_data

if __name__ == "__main__":
    input_pdf = sys.argv[1]
    extracted_data = extract_plot_numbers_from_pdf(input_pdf)
    json_output = json.dumps(extracted_data, indent=4)
    print(json_output)
