import logging

from app.providers.provider_manager import run_cascade_extraction
from app.utils.constants import MAX_OCR_CHARS_FOR_EXTRACTION
from app.utils.prompts import EXTRACTION_SYSTEM, EXTRACTION_USER_TEMPLATE

logger = logging.getLogger("lab_analyzer")


async def extract_tests_from_ocr(cleaned_ocr: str) -> tuple[str, list[dict]]:
    """
    Send FULL cleaned OCR text to AI extraction cascade.
    AI handles tables, differentials, and relationships.
    """
    if not cleaned_ocr.strip():
        logger.warning("[AI_EXTRACTION] Empty OCR text")
        return "none", []

    ocr_for_ai = cleaned_ocr
    if len(ocr_for_ai) > MAX_OCR_CHARS_FOR_EXTRACTION:
        ocr_for_ai = ocr_for_ai[:MAX_OCR_CHARS_FOR_EXTRACTION]
        logger.warning(
            "[AI_EXTRACTION] OCR truncated %d -> %d chars for speed",
            len(cleaned_ocr),
            MAX_OCR_CHARS_FOR_EXTRACTION,
        )

    user_prompt = EXTRACTION_USER_TEMPLATE.format(ocr_text=ocr_for_ai)
    logger.info("[AI_EXTRACTION] Sending OCR context (%d chars)", len(ocr_for_ai))

    source_label, data = await run_cascade_extraction(
        system_prompt=EXTRACTION_SYSTEM,
        user_prompt=user_prompt,
    )

    tests = data.get("tests", [])
    logger.info("[AI_EXTRACTION] Extracted %d tests via %s", len(tests), source_label)
    return source_label, tests
