import cv2
import pytesseract
import re

# Load the image
image_path = "kavanur.jpg"
image = cv2.imread(image_path)

# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Use pytesseract to detect text in the image
# You may need to specify the Tesseract executable path if it's not in your PATH
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
text_data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT)

# Regular expression pattern to match:
# 1. Pure numbers
# 2. Alphanumeric strings that start with a number
# 3. A single letter followed by numbers
# 4. A single letter followed by a number and then another letter
number_pattern = re.compile(r'\b\d+[a-zA-Z-]*\b|\b[a-zA-Z]\d+\b|\b[a-zA-Z]\d+[a-zA-Z]\b')

# Initialize a dictionary to store numbers with their coordinates
number_coordinates = {}

# Iterate through detected text
for i in range(len(text_data['text'])):
    word = text_data['text'][i]
    if number_pattern.match(word):
        x, y, w, h = (text_data['left'][i], text_data['top'][i], text_data['width'][i], text_data['height'][i])
        coordinates = (x, y, x + w, y + h)
        # Store the coordinates for this unique number
        number_coordinates[word] = coordinates

# Print the extracted numbers with their coordinates
print(f"Numbers and Coordinates: {number_coordinates}")

# Detect contours for image objects in the grayscale image
contours, _ = cv2.findContours(gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Initialize a list to store image coordinates
image_coordinates = []

for contour in contours:
    x, y, w, h = cv2.boundingRect(contour)
    # You might want to filter out small contours that are likely not images
    if w > 50 and h > 50:  # Adjust threshold as needed
        image_coordinates.append((x, y, x + w, y + h))

# Print the detected image coordinates
print(f"Image Coordinates: {image_coordinates}")

# Optional: Draw rectangles on the image to visualize the detection
for (x0, y0, x1, y1) in number_coordinates.values():
    cv2.rectangle(image, (x0, y0), (x1, y1), (0, 255, 0), 2)  # Green rectangle for numbers

for (x0, y0, x1, y1) in image_coordinates:
    cv2.rectangle(image, (x0, y0), (x1, y1), (255, 0, 0), 2)  # Blue rectangle for images