"""Evaluate PDF extraction against example-pdf fixtures."""

import io
import json
import sys
from pathlib import Path

import pdfplumber

ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT))

from app.parsers.text_normalize import normalize_pdf_text
from app.parsers.registry import PARSER_REGISTRY, detect_company  # noqa: E402

KEY_FIELDS = [
    "policy_number",
    "customer_name",
    "mobile_number",
    "policy_end_date",
    "vehicle_registration_number",
]


def extract_text(pdf_path: Path, max_pages: int = 10) -> str:
    with pdfplumber.open(pdf_path) as doc:
        parts = []
        for page in doc.pages[:max_pages]:
            parts.append(page.extract_text() or "")
        return "\n".join(parts)


def evaluate_pdf(pdf_path: Path) -> dict:
    text = normalize_pdf_text(extract_text(pdf_path))
    code, name, confidence = detect_company(text, pdf_path.name)
    parser = PARSER_REGISTRY.get(code or "generic", PARSER_REGISTRY["generic"])
    result = parser(text)
    field_map = result.get_field_map()
    return {
        "file": pdf_path.name,
        "company_code": code,
        "company_name": name,
        "detection_confidence": confidence,
        "parser": parser.__name__ if hasattr(parser, "__name__") else str(parser),
        "field_count": len(field_map),
        "key_fields_found": sum(1 for k in KEY_FIELDS if field_map.get(k) and field_map[k].value),
        "fields": {
            k: {"value": f.value, "confidence": f.confidence}
            for k, f in field_map.items()
        },
        "text_preview": text[:400].replace("\n", " "),
    }


def main() -> None:
    folder = ROOT / "example-pdf"
    if not folder.exists():
        print(f"Folder not found: {folder}")
        sys.exit(1)

    pdfs = sorted(p for p in folder.iterdir() if p.suffix.lower() == ".pdf")
    print(f"Evaluating {len(pdfs)} PDFs in {folder}\n")
    print(f"{'File':<55} {'Company':<22} {'Key':>5} {'Fields':>6}")
    print("-" * 92)

    results = []
    for pdf in pdfs:
        row = evaluate_pdf(pdf)
        results.append(row)
        print(
            f"{row['file'][:54]:<55} "
            f"{(row['company_name'] or 'Unknown')[:21]:<22} "
            f"{row['key_fields_found']}/5 "
            f"{row['field_count']:>6}"
        )

    out = ROOT / "example-pdf" / "evaluation_report.json"
    out.write_text(json.dumps(results, indent=2), encoding="utf-8")
    print(f"\nDetailed report written to: {out}")


if __name__ == "__main__":
    main()
