# import sys
# import cv2
# import json
# import numpy as np

# def hex_to_hsv(hex_code):
#     # Convert hex code to RGB format
#     rgb_color = np.array([int(hex_code[i:i+2], 16) for i in (1, 3, 5)])
#     # Convert RGB to BGR format
#     bgr_color = np.flip(rgb_color)
#     # Convert BGR to HSV format
#     hsv_color = cv2.cvtColor(np.uint8([[bgr_color]]), cv2.COLOR_BGR2HSV)[0][0]
#     return hsv_color

# def draw_contours(image_path, colors_dict, output_path, min_contour_size=150, max_contour_size=10000):
#     # Load the source image
#     source_image = cv2.imread(image_path)

#     if source_image is None:
#         print(f"Error: Unable to load image at {image_path}")
#         sys.exit(1)

#     # Convert the image to HSV color space
#     hsv_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2HSV)

#     # Initialize a blank mask to store combined ROIs for all colors
#     combined_mask = np.zeros_like(hsv_image[:, :, 0])

#     # Loop through each color
#     for color_hex_code, bounds in colors_dict.items():
#         # Create a mask for the specific color
#         mask = cv2.inRange(hsv_image, bounds['lower'], bounds['upper'])

#         # Combine the mask with the previous masks
#         combined_mask = cv2.bitwise_or(combined_mask, mask)

#     # Find contours in the combined mask
#     contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#     bounding_rectangles = []

#     # Iterate through each contour to get bounding rectangle coordinates
#     for cnt, contour in enumerate(contours):
#         contour_area = cv2.contourArea(contour)
#         if min_contour_size <= contour_area <= max_contour_size:
#             x, y, w, h = cv2.boundingRect(contour)
#             x1, y1 = x, y
#             x2, y2 = x + w, y + h
#             bounding_rectangles.append({"contour_number": cnt, "coordinates": (x1, y1, x2, y2)})  # Append each rectangle individually

#             # Draw rectangle around each contour
#             cv2.rectangle(source_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

#     # Save the resulting image
#     cv2.imwrite(output_path, source_image)

#     # Convert the array to JSON string
#     json_string = json.dumps(bounding_rectangles)

#     # Print the JSON string
#     return json_string

# if __name__ == "__main__":
#     # Define the colors and their corresponding lower and upper bounds in HSV format
#     colors = {
#         '#FFFBDB': {'lower': np.array([20, 10, 200]), 'upper': np.array([30, 100, 255])},  # Light Yellow
#         '#AFCB04': {'lower': np.array([24, 200, 150]), 'upper': np.array([44, 255, 255])}  # Lime Green
#     }

#     # Check if the script is run with the correct number of arguments
#     if len(sys.argv) != 2:
#         print("Usage: python script.py <image_file>")
#         sys.exit(1)

#     # Get the image file path from command-line arguments
#     image_file = sys.argv[1]

#     # Output path for the resulting image
#     output_path = 'floor_plan_with_bbox.png'

#     # Draw contours on areas filled with specified colors
#     json_output = draw_contours(image_file, colors, output_path, min_contour_size=150, max_contour_size=10000)
#     print(json_output)


# import sys
# import cv2
# import json
# import numpy as np
# import pytesseract

# def hex_to_hsv(hex_code):
#     # Convert hex code to RGB format
#     rgb_color = np.array([int(hex_code[i:i+2], 16) for i in (1, 3, 5)])
#     # Convert RGB to BGR format
#     bgr_color = np.flip(rgb_color)
#     # Convert BGR to HSV format
#     hsv_color = cv2.cvtColor(np.uint8([[bgr_color]]), cv2.COLOR_BGR2HSV)[0][0]
#     return hsv_color

# def draw_contours(image_path, colors_dict, output_path, min_contour_size=150, max_contour_size=10000):
#     # Load the source image
#     source_image = cv2.imread(image_path)

#     if source_image is None:
#         print(f"Error: Unable to load image at {image_path}")
#         sys.exit(1)

#     # Convert the image to HSV color space
#     hsv_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2HSV)

#     # Initialize a blank mask to store combined ROIs for all colors
#     combined_mask = np.zeros_like(hsv_image[:, :, 0])

#     # Loop through each color
#     for color_hex_code, bounds in colors_dict.items():
#         # Create a mask for the specific color
#         mask = cv2.inRange(hsv_image, bounds['lower'], bounds['upper'])

#         # Combine the mask with the previous masks
#         combined_mask = cv2.bitwise_or(combined_mask, mask)

#     # Find contours in the combined mask
#     contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#     bounding_rectangles = []

#     # Iterate through each contour to get bounding rectangle coordinates
#     for cnt, contour in enumerate(contours):
#         contour_area = cv2.contourArea(contour)
#         if min_contour_size <= contour_area <= max_contour_size:
#             x, y, w, h = cv2.boundingRect(contour)
#             x1, y1 = x, y
#             x2, y2 = x + w, y + h

#             # Extract the region of interest (ROI) for OCR
#             roi = source_image[y1:y2, x1:x2]

#             # Use Tesseract to recognize text in the ROI
#             plot_number = pytesseract.image_to_string(roi, config='--psm 6').strip()

#             bounding_rectangles.append({"contour_number": cnt, "plot_number": plot_number, "coordinates": (x1, y1, x2, y2)})

#             # Draw rectangle around each contour
#             cv2.rectangle(source_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
#             # Annotate the plot number on the image
#             cv2.putText(source_image, plot_number, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

#     # Save the resulting image
#     cv2.imwrite(output_path, source_image)

#     # Convert the array to JSON string
#     json_string = json.dumps(bounding_rectangles)

#     # Print the JSON string
#     return json_string

# if __name__ == "__main__":
#     # Define the colors and their corresponding lower and upper bounds in HSV format
#     colors = {
#         '#FFFBDB': {'lower': np.array([20, 10, 200]), 'upper': np.array([30, 100, 255])},  # Light Yellow
#         '#AFCB04': {'lower': np.array([24, 200, 150]), 'upper': np.array([44, 255, 255])}  # Lime Green
#     }

#     # Check if the script is run with the correct number of arguments
#     if len(sys.argv) != 2:
#         print("Usage: python script.py <image_file>")
#         sys.exit(1)

#     # Get the image file path from command-line arguments
#     image_file = sys.argv[1]

#     # Output path for the resulting image
#     output_path = 'floor_plan_with_bbox.png'

#     # Draw contours on areas filled with specified colors
#     json_output = draw_contours(image_file, colors, output_path, min_contour_size=150, max_contour_size=10000)
#     print(json_output)


# import sys
# import cv2
# import json
# import numpy as np
# import pytesseract

# # Set the Tesseract executable path explicitly
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# def hex_to_hsv(hex_code):
#     # Convert hex code to RGB format
#     rgb_color = np.array([int(hex_code[i:i+2], 16) for i in (1, 3, 5)])
#     # Convert RGB to BGR format
#     bgr_color = np.flip(rgb_color)
#     # Convert BGR to HSV format
#     hsv_color = cv2.cvtColor(np.uint8([[bgr_color]]), cv2.COLOR_BGR2HSV)[0][0]
#     return hsv_color

# def draw_contours(image_path, colors_dict, output_path, min_contour_size=150, max_contour_size=10000):
#     # Load the source image
#     source_image = cv2.imread(image_path)

#     if source_image is None:
#         print(f"Error: Unable to load image at {image_path}")
#         sys.exit(1)

#     # Convert the image to HSV color space
#     hsv_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2HSV)

#     # Initialize a blank mask to store combined ROIs for all colors
#     combined_mask = np.zeros_like(hsv_image[:, :, 0])

#     # Loop through each color
#     for color_hex_code, bounds in colors_dict.items():
#         # Create a mask for the specific color
#         mask = cv2.inRange(hsv_image, bounds['lower'], bounds['upper'])

#         # Combine the mask with the previous masks
#         combined_mask = cv2.bitwise_or(combined_mask, mask)

#     # Find contours in the combined mask
#     contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#     bounding_rectangles = []

#     # Iterate through each contour to get bounding rectangle coordinates
#     for cnt, contour in enumerate(contours):
#         contour_area = cv2.contourArea(contour)
#         if min_contour_size <= contour_area <= max_contour_size:
#             x, y, w, h = cv2.boundingRect(contour)
#             x1, y1 = x, y
#             x2, y2 = x + w, y + h

#             # Extract the region of interest (ROI) for OCR
#             roi = source_image[y1:y2, x1:x2]

#             # Use Tesseract to recognize text in the ROI
#             plot_number = pytesseract.image_to_string(roi, config='--psm 6').strip()

#             bounding_rectangles.append({"contour_number": cnt, "plot_number": plot_number, "coordinates": (x1, y1, x2, y2)})

#             # Draw rectangle around each contour
#             cv2.rectangle(source_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
#             # Annotate the plot number on the image
#             cv2.putText(source_image, plot_number, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

#     # Save the resulting image
#     cv2.imwrite(output_path, source_image)

#     # Convert the array to JSON string
#     json_string = json.dumps(bounding_rectangles)

#     # Print the JSON string
#     return json_string

# if __name__ == "__main__":
#     # Define the colors and their corresponding lower and upper bounds in HSV format
#     colors = {
#         '#FFFBDB': {'lower': np.array([20, 10, 200]), 'upper': np.array([30, 100, 255])},  # Light Yellow
#         '#AFCB04': {'lower': np.array([24, 200, 150]), 'upper': np.array([44, 255, 255])}  # Lime Green
#     }

#     # Check if the script is run with the correct number of arguments
#     if len(sys.argv) != 2:
#         print("Usage: python script.py <image_file>")
#         sys.exit(1)

#     # Get the image file path from command-line arguments
#     image_file = sys.argv[1]

#     # Output path for the resulting image
#     output_path = 'floor_plan_with_bbox.png'

#     # Draw contours on areas filled with specified colors
#     json_output = draw_contours(image_file, colors, output_path, min_contour_size=150, max_contour_size=10000)
#     print(json_output)

# import sys
# import cv2
# import json
# import numpy as np
# import pytesseract

# # Set the Tesseract executable path explicitly
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# def hex_to_hsv(hex_code):
#     # Convert hex code to RGB format
#     rgb_color = np.array([int(hex_code[i:i+2], 16) for i in (1, 3, 5)])
#     # Convert RGB to BGR format
#     bgr_color = np.flip(rgb_color)
#     # Convert BGR to HSV format
#     hsv_color = cv2.cvtColor(np.uint8([[bgr_color]]), cv2.COLOR_BGR2HSV)[0][0]
#     return hsv_color

# def preprocess_roi(roi):
#     # Convert the ROI to grayscale
#     gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
#     # Apply thresholding to get a binary image
#     _, thresh = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
#     # Optionally, resize the image to improve OCR accuracy
#     resized = cv2.resize(thresh, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
#     return resized

# def draw_contours(image_path, colors_dict, output_path, min_contour_size=150, max_contour_size=10000):
#     # Load the source image
#     source_image = cv2.imread(image_path)

#     if source_image is None:
#         print(f"Error: Unable to load image at {image_path}")
#         sys.exit(1)

#     # Convert the image to HSV color space
#     hsv_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2HSV)

#     # Initialize a blank mask to store combined ROIs for all colors
#     combined_mask = np.zeros_like(hsv_image[:, :, 0])

#     # Loop through each color
#     for color_hex_code, bounds in colors_dict.items():
#         # Create a mask for the specific color
#         mask = cv2.inRange(hsv_image, bounds['lower'], bounds['upper'])

#         # Combine the mask with the previous masks
#         combined_mask = cv2.bitwise_or(combined_mask, mask)

#     # Find contours in the combined mask
#     contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#     bounding_rectangles = []

#     # Iterate through each contour to get bounding rectangle coordinates
#     for cnt, contour in enumerate(contours):
#         contour_area = cv2.contourArea(contour)
#         if min_contour_size <= contour_area <= max_contour_size:
#             x, y, w, h = cv2.boundingRect(contour)
#             x1, y1 = x, y
#             x2, y2 = x + w, y + h

#             # Extract the region of interest (ROI) for OCR
#             roi = source_image[y1:y2, x1:x2]

#             # Preprocess the ROI
#             processed_roi = preprocess_roi(roi)

#             # Use Tesseract to recognize text in the ROI
#             plot_number = pytesseract.image_to_string(processed_roi, config='--psm 6').strip()

#             bounding_rectangles.append({"contour_number": cnt, "plot_number": plot_number, "coordinates": (x1, y1, x2, y2)})

#             # Draw rectangle around each contour
#             cv2.rectangle(source_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
#             # Annotate the plot number on the image
#             cv2.putText(source_image, plot_number, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

#     # Save the resulting image
#     cv2.imwrite(output_path, source_image)

#     # Convert the array to JSON string
#     json_string = json.dumps(bounding_rectangles, ensure_ascii=False)

#     # Print the JSON string
#     return json_string

# if __name__ == "__main__":
#     # Define the colors and their corresponding lower and upper bounds in HSV format
#     colors = {
#         '#FFFBDB': {'lower': np.array([20, 10, 200]), 'upper': np.array([30, 100, 255])},  # Light Yellow
#         '#AFCB04': {'lower': np.array([24, 200, 150]), 'upper': np.array([44, 255, 255])}  # Lime Green
#     }

#     # Check if the script is run with the correct number of arguments
#     if len(sys.argv) != 2:
#         print("Usage: python script.py <image_file>")
#         sys.exit(1)

#     # Get the image file path from command-line arguments
#     image_file = sys.argv[1]

#     # Output path for the resulting image
#     output_path = 'floor_plan_with_bbox.png'

#     # Draw contours on areas filled with specified colors
#     json_output = draw_contours(image_file, colors, output_path, min_contour_size=150, max_contour_size=10000)
#     print(json_output)


# import sys
# import cv2
# import json
# import numpy as np
# import pytesseract
# import re

# # Set the Tesseract executable path explicitly
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# def preprocess_roi(roi):
#     # Convert the ROI to grayscale
#     gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
#     # Apply adaptive thresholding to get a binary image
#     thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
#     # Optionally, resize the image to improve OCR accuracy
#     resized = cv2.resize(thresh, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
#     return resized

# def clean_plot_number(plot_number):
#     # Remove unwanted characters and whitespace
#     plot_number = re.sub(r'[^0-9]', '', plot_number)
#     return plot_number.strip()

# def draw_contours(image_path, colors_dict, output_path, min_contour_size=150, max_contour_size=10000):
#     # Load the source image
#     source_image = cv2.imread(image_path)

#     if source_image is None:
#         print(f"Error: Unable to load image at {image_path}")
#         sys.exit(1)

#     # Convert the image to HSV color space
#     hsv_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2HSV)

#     # Initialize a blank mask to store combined ROIs for all colors
#     combined_mask = np.zeros_like(hsv_image[:, :, 0])

#     # Loop through each color
#     for color_hex_code, bounds in colors_dict.items():
#         # Create a mask for the specific color
#         mask = cv2.inRange(hsv_image, bounds['lower'], bounds['upper'])

#         # Combine the mask with the previous masks
#         combined_mask = cv2.bitwise_or(combined_mask, mask)

#     # Find contours in the combined mask
#     contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#     bounding_rectangles = []

#     # Iterate through each contour to get bounding rectangle coordinates
#     for cnt, contour in enumerate(contours):
#         contour_area = cv2.contourArea(contour)
#         if min_contour_size <= contour_area <= max_contour_size:
#             x, y, w, h = cv2.boundingRect(contour)
#             x1, y1 = x, y
#             x2, y2 = x + w, y + h

#             # Extract the region of interest (ROI) for OCR
#             roi = source_image[y1:y2, x1:x2]

#             # Preprocess the ROI
#             processed_roi = preprocess_roi(roi)

#             # Use Tesseract to recognize text in the ROI
#             plot_number = pytesseract.image_to_string(processed_roi, config='--psm 6')

#             # Clean the plot number
#             plot_number = clean_plot_number(plot_number)

#             bounding_rectangles.append({"plot_number": plot_number, "coordinates": [x1, y1, x2, y2]})

#             # Draw rectangle around each contour
#             cv2.rectangle(source_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

#     # Save the resulting image with green bounding boxes
#     cv2.imwrite(output_path, source_image)

#     # Convert the array to JSON string
#     json_string = json.dumps(bounding_rectangles, ensure_ascii=False)

#     # Print the JSON string
#     print(f"JSON Output: {json_string}")

#     return json_string

# if __name__ == "__main__":
#     # Define the colors and their corresponding lower and upper bounds in HSV format
#     colors = {
#         '#FFFBDB': {'lower': np.array([20, 10, 200]), 'upper': np.array([30, 100, 255])},  # Light Yellow
#         '#AFCB04': {'lower': np.array([24, 200, 150]), 'upper': np.array([44, 255, 255])}  # Lime Green
#     }

#     # Check if the script is run with the correct number of arguments
#     if len(sys.argv) != 2:
#         print("Usage: python script.py <image_file>")
#         sys.exit(1)

#     # Get the image file path from command-line arguments
#     image_file = sys.argv[1]

#     # Output path for the resulting image (with green bounding boxes)
#     output_path = 'floor_plan_with_bbox.png'

#     # Draw contours on areas filled with specified colors
#     json_output = draw_contours(image_file, colors, output_path, min_contour_size=150, max_contour_size=10000)
#     print(json_output)


# import sys
# import cv2
# import json
# import numpy as np
# import pytesseract
# import re

# # Set the Tesseract executable path explicitly
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# def preprocess_roi(roi):
#     # Convert the ROI to grayscale
#     gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
#     # Apply adaptive thresholding to get a binary image
#     thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
#     # Optionally, resize the image to improve OCR accuracy
#     resized = cv2.resize(thresh, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
#     return resized

# def clean_plot_number(plot_number):
#     # Remove unwanted characters and whitespace
#     plot_number = re.sub(r'[^0-9]', '', plot_number)
#     return plot_number.strip()

# def extract_plot_numbers(image_path, colors_dict, min_contour_size=150, max_contour_size=10000):
#     # Load the source image
#     source_image = cv2.imread(image_path)

#     if source_image is None:
#         print(f"Error: Unable to load image at {image_path}")
#         sys.exit(1)

#     # Convert the image to HSV color space
#     hsv_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2HSV)

#     # Initialize a blank mask to store combined ROIs for all colors
#     combined_mask = np.zeros_like(hsv_image[:, :, 0])

#     # Loop through each color
#     for color_hex_code, bounds in colors_dict.items():
#         # Create a mask for the specific color
#         mask = cv2.inRange(hsv_image, bounds['lower'], bounds['upper'])

#         # Combine the mask with the previous masks
#         combined_mask = cv2.bitwise_or(combined_mask, mask)

#     # Find contours in the combined mask
#     contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#     bounding_rectangles = []

#     # Iterate through each contour to get bounding rectangle coordinates
#     for idx, contour in enumerate(contours):
#         contour_area = cv2.contourArea(contour)
#         if min_contour_size <= contour_area <= max_contour_size:
#             x, y, w, h = cv2.boundingRect(contour)
#             x1, y1 = x, y
#             x2, y2 = x + w, y + h

#             # Extract the region of interest (ROI) for OCR
#             roi = source_image[y1:y2, x1:x2]

#             # Preprocess the ROI
#             processed_roi = preprocess_roi(roi)

#             # Use Tesseract to recognize text in the ROI
#             plot_number = pytesseract.image_to_string(processed_roi, config='--psm 6')

#             # Clean the plot number
#             plot_number = clean_plot_number(plot_number)

#             bounding_rectangles.append({"plot_number": plot_number, "coordinates": [x1, y1, x2, y2]})

#     # Convert the array to JSON string
#     json_string = json.dumps(bounding_rectangles, ensure_ascii=False)

#     # Print the JSON string (optional)
#     print(json_string)

# if __name__ == "__main__":
#     # Example colors dictionary (adjust as per your requirements)
#     colors = {
#         '#FFFBDB': {'lower': np.array([20, 10, 200]), 'upper': np.array([30, 100, 255])},  # Light Yellow
#         '#AFCB04': {'lower': np.array([24, 200, 150]), 'upper': np.array([44, 255, 255])}  # Lime Green
#     }

#     # Retrieve image file path from command-line argument
#     image_path = sys.argv[1]

#     # Process image and extract plot numbers
#     extract_plot_numbers(image_path, colors)


# import sys
# import json
# import numpy as np
# import cv2
# import pytesseract
# import re
# import fitz  # PyMuPDF

# # Set the Tesseract executable path explicitly
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# def preprocess_roi(roi):
#     gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
#     thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
#     return thresh

# def clean_plot_number(plot_number):
#     plot_number = re.sub(r'[^0-9]', '', plot_number)
#     return plot_number.strip()

# def extract_plot_numbers(image_path, min_contour_size=150, max_contour_size=10000):
#     source_image = cv2.imread(image_path)

#     if source_image is None:
#         print(f"Error: Unable to load image at {image_path}")
#         sys.exit(1)

#     hsv_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2HSV)

#     # Detect black text
#     lower_black = np.array([0, 0, 0])
#     upper_black = np.array([180, 255, 30])
#     mask_black = cv2.inRange(hsv_image, lower_black, upper_black)

#     # Detect red color
#     lower_red1 = np.array([0, 70, 50])
#     upper_red1 = np.array([10, 255, 255])
#     lower_red2 = np.array([170, 70, 50])
#     upper_red2 = np.array([180, 255, 255])
#     mask_red = cv2.inRange(hsv_image, lower_red1, upper_red1) + cv2.inRange(hsv_image, lower_red2, upper_red2)

#     # Detect green color
#     lower_green = np.array([36, 25, 25])
#     upper_green = np.array([86, 255, 255])
#     mask_green = cv2.inRange(hsv_image, lower_green, upper_green)

#     # Detect blue color
#     lower_blue = np.array([94, 80, 2])
#     upper_blue = np.array([126, 255, 255])
#     mask_blue = cv2.inRange(hsv_image, lower_blue, upper_blue)

#     # Combine masks
#     combined_mask = mask_black | mask_red | mask_green | mask_blue

#     # Find contours
#     contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#     bounding_rectangles = []

#     for contour in contours:
#         contour_area = cv2.contourArea(contour)
#         if min_contour_size <= contour_area <= max_contour_size:
#             x, y, w, h = cv2.boundingRect(contour)
#             roi = source_image[y:y+h, x:x+w]
#             processed_roi = preprocess_roi(roi)
#             plot_number = pytesseract.image_to_string(processed_roi, config='--psm 6')
#             plot_number = clean_plot_number(plot_number)

#             # Print plot number and coordinates for debugging
#             print(f"Detected plot number: {plot_number} at coordinates: {x, y, x+w, y+h}")

#             bounding_rectangles.append({"plot_number": plot_number, "coordinates": [x, y, x+w, y+h]})

#     return bounding_rectangles

# def process_pdf(pdf_path):
#     doc = fitz.open(pdf_path)
#     all_data = []

#     for page_num in range(len(doc)):
#         page = doc.load_page(page_num)
#         pix = page.get_pixmap()
#         image_path = f"page_{page_num + 1}.png"
#         pix.save(image_path)
#         data = extract_plot_numbers(image_path)
#         all_data.extend(data)

#     json_string = json.dumps(all_data, ensure_ascii=False, indent=4)
#     print(json_string)

# if __name__ == "__main__":
#     pdf_path = sys.argv[1]  # Retrieve PDF file path from command-line argument
#     process_pdf(pdf_path)


import sys
import json
import numpy as np
import cv2
import pytesseract
import re
import fitz  # PyMuPDF

# Set the Tesseract executable path explicitly
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def preprocess_roi(roi):
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return thresh

def clean_plot_number(plot_number):
    plot_number = re.sub(r'[^0-9]', '', plot_number)
    return plot_number.strip()

def extract_plot_numbers(image_path, min_contour_size=150, max_contour_size=10000):
    source_image = cv2.imread(image_path)

    if source_image is None:
        print(f"Error: Unable to load image at {image_path}")
        sys.exit(1)

    hsv_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2HSV)

    # Detect black color
    lower_black = np.array([0, 0, 0])
    upper_black = np.array([180, 255, 50])  # Adjust the upper limit if needed
    mask_black = cv2.inRange(hsv_image, lower_black, upper_black)

    # Perform morphological operations to enhance text features
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    mask_black = cv2.morphologyEx(mask_black, cv2.MORPH_CLOSE, kernel)
    mask_black = cv2.morphologyEx(mask_black, cv2.MORPH_OPEN, kernel)

    # Find contours
    contours, _ = cv2.findContours(mask_black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bounding_rectangles = []

    for contour in contours:
        contour_area = cv2.contourArea(contour)
        if min_contour_size <= contour_area <= max_contour_size:
            x, y, w, h = cv2.boundingRect(contour)
            roi = source_image[y:y+h, x:x+w]
            processed_roi = preprocess_roi(roi)
            plot_number = pytesseract.image_to_string(processed_roi, config='--psm 6 digits')
            plot_number = clean_plot_number(plot_number)

            # Print plot number and coordinates for debugging
            print(f"Detected plot number: {plot_number} at coordinates: {x, y, x+w, y+h}")

            bounding_rectangles.append({"plot_number": plot_number, "coordinates": [x, y, x+w, y+h]})

    return bounding_rectangles

def process_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    all_data = []

    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        pix = page.get_pixmap()
        image_path = f"page_{page_num + 1}.png"
        pix.save(image_path)
        data = extract_plot_numbers(image_path)
        all_data.extend(data)

    json_string = json.dumps(all_data, ensure_ascii=False, indent=4)
    print(json_string)

if __name__ == "__main__":
    pdf_path = sys.argv[1]  # Retrieve PDF file path from command-line argument
    process_pdf(pdf_path)
