
from fastapi import HTTPException, UploadFile

from common_logging import get_logger

logger = get_logger(__name__)
ALLOWED_MIME_TYPES: dict[str, list[str]] = {
    "application/pdf": [".pdf"],
    "application/msword": [".doc"],
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": [".docx"],
    "text/plain": [".txt"],
    "text/markdown": [".md"],
    "text/csv": [".csv"],
    "application/json": [".json"],
}
FILE_SIGNATURES: dict[str, bytes] = {
    "pdf": b"%PDF",
    "docx": b"PK\x03\x04",
    "doc": b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
    "zip": b"PK\x03\x04",
    "txt": None,
    "md": None,
    "csv": None,
    "json": None,
}
MAX_FILE_SIZE = 100 * 1024 * 1024
MAX_TOTAL_UPLOAD_SIZE = 1024 * 1024 * 1024
ALLOWED_EXTENSIONS = {".pdf", ".doc", ".docx", ".txt", ".md", ".csv", ".json"}


def validate_file_extension(filename: str) -> str:
    if "." not in filename:
        logger.error(f"File validation failed: no extension - {filename}")
        raise HTTPException(status_code=400, detail="File must have an extension")
    ext = "." + filename.rsplit(".", 1)[-1].lower()
    if ext not in ALLOWED_EXTENSIONS:
        logger.error(f"File validation failed: invalid extension {ext}")
        raise HTTPException(
            status_code=400,
            detail=f"File type {ext} not allowed. Allowed types: {', '.join(ALLOWED_EXTENSIONS)}",
        )
    return ext


def validate_mime_type(content_type: str | None, extension: str) -> bool:
    if not content_type:
        raise HTTPException(status_code=400, detail="Content-Type header is required")
    if content_type not in ALLOWED_MIME_TYPES:
        raise HTTPException(status_code=400, detail=f"MIME type {content_type} not allowed")
    allowed_extensions = ALLOWED_MIME_TYPES[content_type]
    if extension not in allowed_extensions:
        raise HTTPException(
            status_code=400,
            detail=f"File extension {extension} does not match MIME type {content_type}",
        )
    return True


def verify_file_signature(content: bytes, extension: str) -> bool:
    ext = extension.lstrip(".")
    signature = FILE_SIGNATURES.get(ext)
    if signature is None:
        return True
    if not content.startswith(signature):
        raise HTTPException(
            status_code=400, detail=f"File signature does not match {extension} file type"
        )
    return True


def validate_file_size(file_size: int, max_size: int = MAX_FILE_SIZE) -> bool:
    if file_size > max_size:
        logger.error(f"File size validation failed: {file_size} > {max_size}")
        raise HTTPException(
            status_code=400, detail=f"File size {file_size} bytes exceeds maximum {max_size} bytes"
        )
    return True


async def validate_upload_file(file: UploadFile) -> bytes:
    extension = validate_file_extension(file.filename)
    validate_mime_type(file.content_type, extension)
    content = await file.read()
    validate_file_size(len(content))
    verify_file_signature(content, extension)
    await file.seek(0)
    logger.info(f"File validation passed: {file.filename} ({len(content)} bytes)")
    return content


def scan_file_for_malware(file_path: str) -> bool:
    logger.warning("Malware scanning not implemented - integrate ClamAV")
    return True
