from app.core.security import get_password_hash
from app.models.enums import ExtractionStatus, UserRole
from app.models.extraction_result import ExtractionResult
from app.models.user import User


def test_upload_skips_duplicate_pending_extraction(client, auth_headers, db, seed_data):
    pdf_bytes = b"%PDF-1.4 duplicate test content for checksum dedup"

    first = client.post(
        "/api/v1/upload/pdf",
        headers=auth_headers,
        files={"file": ("policy-a.pdf", pdf_bytes, "application/pdf")},
    )
    assert first.status_code == 200
    first_body = first.json()
    assert first_body["duplicate"] is False
    first_id = first_body["extraction_id"]

    second = client.post(
        "/api/v1/upload/pdf",
        headers=auth_headers,
        files={"file": ("policy-a-copy.pdf", pdf_bytes, "application/pdf")},
    )
    assert second.status_code == 200
    second_body = second.json()
    assert second_body["duplicate"] is True
    assert second_body["skipped"] is True
    assert second_body["extraction_id"] == first_id

    pending = client.get("/api/v1/upload/extractions", headers=auth_headers, params={"pending": True})
    assert pending.status_code == 200
    ids = [item["id"] for item in pending.json()["items"]]
    assert ids.count(first_id) == 1


def test_list_extractions_hides_duplicate_checksum_rows(client, auth_headers, db, seed_data):
    checksum = "abc123deduptestchecksumvalue0123456789abcdef0123456789abcdef01"
    for idx in range(2):
        db.add(
            ExtractionResult(
                agency_id=seed_data["agency"].id,
                uploaded_by=seed_data["admin"].id,
                original_filename=f"dup-{idx}.pdf",
                file_checksum=checksum,
                status=ExtractionStatus.SUCCESS,
                raw_data={"checksum": checksum, "fields": []},
            )
        )
    db.commit()

    response = client.get("/api/v1/upload/extractions", headers=auth_headers, params={"pending": True})
    assert response.status_code == 200
    matching = [item for item in response.json()["items"] if item["original_filename"].startswith("dup-")]
    assert len(matching) == 1


def test_file_checksums_endpoint(client, auth_headers, db, seed_data):
    db.add(
        ExtractionResult(
            agency_id=seed_data["agency"].id,
            uploaded_by=seed_data["admin"].id,
            original_filename="listed.pdf",
            file_checksum="deadbeef" * 8,
            status=ExtractionStatus.PARTIAL,
            raw_data={"checksum": "deadbeef" * 8, "fields": []},
        )
    )
    db.commit()

    response = client.get("/api/v1/upload/file-checksums", headers=auth_headers)
    assert response.status_code == 200
    body = response.json()
    assert "deadbeef" * 8 in body["pending_checksums"]
