import io
import uuid

from fastapi import APIRouter, Depends, File, Form, HTTPException, Request, UploadFile
from sqlalchemy.orm import Session

from app.api.deps import get_current_user
from app.api.permissions import require_permission
from app.config import settings
from app.core.exceptions import DocumentUploadError, TextChunkingError, UnsupportedFileTypeError
from app.core.i18n import get_translator
from app.db.session import get_db
from app.models import (
    DocumentVersion,
    KnowledgeBase,
    KnowledgeDocument,
    Model,
    User,
)
from app.schemas.knowledge_document import DocumentCreate
from app.services.storage.minio import get_minio_service
from common_logging import get_logger

logger = get_logger(__name__)
_MINIO_CONTENT_TYPES = {
    "pdf": "application/pdf",
    "doc": "application/msword",
    "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    "ppt": "application/vnd.ms-powerpoint",
    "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
    "txt": "text/plain",
    "md": "text/markdown",
    "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    "csv": "text/csv",
    "json": "application/json",
    "xml": "application/xml",
}
router = APIRouter()


@router.post("/documents/upload")
async def upload_document_file(
    request: Request,
    file: UploadFile = File(...),
    category_id: int | None = Form(None),
    knowledge_base_id: int | None = Form(None),
    status: str = Form("draft"),
    enable_chunking: str = Form("1"),
    chunk_size: int = Form(1000, ge=100, le=5000),
    chunk_overlap: int = Form(200, ge=0, le=1000),
    splitter_type: str = Form("recursive"),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
    _: None = Depends(require_permission("knowledge_bases", "create")),
):
    from app.services.knowledge.file_parser import get_file_parser_service
    from app.services.knowledge.text_splitter_service import SplitterType, get_text_splitter_service

    from .documents import create_document

    get_translator(request)
    if splitter_type not in [
        "recursive",
        "character",
        "token",
        "markdown",
        "tax_article",
        "tax_clause",
        "tax_chapter",
        "tax_adaptive",
    ]:
        raise HTTPException(
            status_code=400,
            detail="Invalid splitter_type. Must be one of: recursive, character, token, markdown, tax_article, tax_clause, tax_chapter, tax_adaptive",
        )
    MAX_FILE_SIZE = 50 * 1024 * 1024
    file.file.seek(0, 2)
    file_size = file.file.tell()
    file.file.seek(0)
    if file_size > MAX_FILE_SIZE:
        raise HTTPException(
            status_code=400, detail="File size exceeds maximum allowed size of 50MB"
        )
    ALLOWED_EXTENSIONS = ["pdf", "doc", "docx", "ppt", "pptx", "txt", "md"]
    file_ext = file.filename.split(".")[-1].lower() if "." in file.filename else ""
    if file_ext not in ALLOWED_EXTENSIONS:
        raise HTTPException(
            status_code=400,
            detail=f"File type .{file_ext} not allowed. Allowed types: {', '.join(ALLOWED_EXTENSIONS)}",
        )
    enable_chunking_bool = enable_chunking in ["1", "true", "True", "yes", "Yes"]
    logger.info("Upload started", filename=file.filename, size=file_size)
    try:
        content_bytes = await file.read()
        file_type = file.filename.split(".")[-1] if "." in file.filename else ""
        parser_service = get_file_parser_service()
        text_content = parser_service.parse_file(content_bytes, file_type)
        if not text_content:
            raise UnsupportedFileTypeError(file_type)
        file_ext = file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else "bin"
        if knowledge_base_id is None:
            logger.warning("knowledge_base_id is None; skipping MinIO upload")
            object_name = None
        else:
            object_name = (
                f"tenant_{current_user.tenant_id}/kb_{knowledge_base_id}/{uuid.uuid4()}.{file_ext}"
            )
            minio_service = get_minio_service()
            upload_content_type = _MINIO_CONTENT_TYPES.get(file_ext, "application/octet-stream")
            upload_success = minio_service.upload_file(
                bucket_name=settings.MINIO_BUCKET,
                object_name=object_name,
                file_data=io.BytesIO(content_bytes),
                file_size=len(content_bytes),
                content_type=upload_content_type,
            )
            if not upload_success:
                logger.warning(
                    f"Failed to upload file to MinIO: {object_name}, continuing without storage"
                )
                object_name = None
        title = file.filename.rsplit(".", 1)[0] if "." in file.filename else file.filename
        document_data = {
            "title": title,
            "content": text_content,
            "summary": text_content[:200] + "..." if len(text_content) > 200 else text_content,
            "category_id": category_id,
            "file_type": file_type,
            "file_path": object_name,
            "status": status,
            "is_public": True,
            "segmentation_mode": "automatic" if enable_chunking_bool else "none",
            "chunk_size": chunk_size if enable_chunking_bool else None,
            "chunk_overlap": chunk_overlap if enable_chunking_bool else None,
            "splitter_type": splitter_type if enable_chunking_bool else None,
            "character_count": len(text_content),
        }
        doc_create = DocumentCreate(**document_data)
        document = create_document(request, doc_create, db, current_user)
        if enable_chunking_bool and text_content:
            try:
                splitter_service = get_text_splitter_service()
                splitter_type_enum = SplitterType.RECURSIVE
                if splitter_type.lower() == "character":
                    splitter_type_enum = SplitterType.CHARACTER
                elif splitter_type.lower() == "token":
                    splitter_type_enum = SplitterType.TOKEN
                elif splitter_type.lower() == "markdown":
                    splitter_type_enum = SplitterType.MARKDOWN
                elif splitter_type.lower() == "tax_article":
                    splitter_type_enum = SplitterType.TAX_ARTICLE
                elif splitter_type.lower() == "tax_clause":
                    splitter_type_enum = SplitterType.TAX_CLAUSE
                elif splitter_type.lower() == "tax_chapter":
                    splitter_type_enum = SplitterType.TAX_CHAPTER
                elif splitter_type.lower() == "tax_adaptive":
                    splitter_type_enum = SplitterType.TAX_ADAPTIVE
                chunks = splitter_service.create_chunks_with_metadata(
                    text=text_content,
                    chunk_size=chunk_size,
                    chunk_overlap=chunk_overlap,
                    splitter_type=splitter_type_enum,
                    document_id=document.id,
                    document_title=document.title,
                )
                document_dict = document.__dict__.copy()
                document_dict["chunks"] = chunks
                document_dict["chunk_count"] = len(chunks)
                logger.info("Upload completed", doc_id=document.id, filename=file.filename, chunks=len(chunks))
                try:
                    from app.services.knowledge.auto_tagging import auto_tag_document_on_upload

                    auto_tag_document_on_upload(db, document.id)
                except Exception as e:
                    logger.warning(f"Auto-tagging failed for document {document.id}: {e}")
                try:
                    from app.services.knowledge.document_classifier import TaxDocumentClassifier

                    classifier = TaxDocumentClassifier()
                    classification = classifier.classify(text_content)
                    document.doc_type = classification.doc_type.value
                    document.doc_number = classification.doc_number
                    document.issuing_authority = classification.issuing_authority
                    db.commit()
                    document_dict["doc_type"] = classification.doc_type.value
                    document_dict["doc_number"] = classification.doc_number
                    document_dict["issuing_authority"] = classification.issuing_authority
                    logger.info(
                        f"Document {document.id} classified as: {classification.doc_type.value}"
                    )
                except Exception as e:
                    logger.warning(
                        f"Document classification failed for document {document.id}: {e}"
                    )
                return document_dict
            except TextChunkingError:
                raise
            except Exception as e:
                logger.error(f"Text chunking failed: {e}")
                raise TextChunkingError(f"文本分块失败: {str(e)}") from None
        try:
            from app.services.knowledge.auto_tagging import auto_tag_document_on_upload

            auto_tag_document_on_upload(db, document.id)
        except Exception as e:
            logger.warning(f"Auto-tagging failed for document {document.id}: {e}")
        return document
    except HTTPException:
        raise
    except Exception as e:
        logger.error("Upload failed", filename=file.filename, error=str(e))
        raise DocumentUploadError(file.filename, str(e)) from None


@router.post("/documents/batch-upload")
async def batch_upload_documents(
    request: Request,
    files: list[UploadFile] = File(...),
    category_id: int | None = Form(None),
    knowledge_base_id: int | None = Form(None),
    status: str = Form("draft"),
    enable_chunking: str = Form("1"),
    chunk_size: int = Form(1000, ge=100, le=5000),
    chunk_overlap: int = Form(200, ge=0, le=1000),
    splitter_type: str = Form("recursive"),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
    _: None = Depends(require_permission("knowledge_bases", "create")),
):
    from app.services.knowledge.file_parser import get_file_parser_service

    t = get_translator(request)
    if splitter_type not in [
        "recursive",
        "character",
        "token",
        "markdown",
        "tax_article",
        "tax_clause",
        "tax_chapter",
        "tax_adaptive",
    ]:
        raise HTTPException(
            status_code=400,
            detail="Invalid splitter_type. Must be one of: recursive, character, token, markdown, tax_article, tax_clause, tax_chapter, tax_adaptive",
        )
    if len(files) > 50:
        raise HTTPException(status_code=400, detail="Maximum 50 files allowed per batch upload")
    enable_chunking_bool = enable_chunking in ["1", "true", "True", "yes", "Yes"]
    logger.bind(file_count=len(files), enable_chunking=enable_chunking_bool).info("Batch upload started")
    if category_id:
        from app.models.knowledge_base import KnowledgeCategory

        category = db.query(KnowledgeCategory).filter(KnowledgeCategory.id == category_id).first()
        if not category:
            raise HTTPException(status_code=404, detail="Category not found")
        kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == category.knowledge_base_id).first()
        if not kb:
            raise HTTPException(status_code=404, detail="Knowledge base not found")
    results = []
    parser_service = get_file_parser_service()
    batch_minio_service = get_minio_service()
    for file in files:
        try:
            content_bytes = await file.read()
            file_type = file.filename.split(".")[-1] if "." in file.filename else ""
            text_content = parser_service.parse_file(content_bytes, file_type)
            if not text_content:
                results.append(
                    {
                        "filename": file.filename,
                        "success": False,
                        "error": t.t("knowledge.cannot_parse_file_type", file_type=file_type),
                    }
                )
                continue
            title = file.filename.rsplit(".", 1)[0] if "." in file.filename else file.filename
            tenant_id = None
            if category_id and category:
                tenant_id = kb.tenant_id if kb is not None else None
            elif current_user.tenant_id is not None:
                tenant_id = current_user.tenant_id
            if tenant_id is None:
                logger.bind(filename=file.filename).warning("Cannot determine tenant_id")
                results.append(
                    {
                        "filename": file.filename,
                        "success": False,
                        "error": "Cannot determine tenant_id",
                    }
                )
                continue
            batch_file_ext = (
                file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else "bin"
            )
            if knowledge_base_id is None:
                logger.warning(
                    f"knowledge_base_id is None for {file.filename}; skipping MinIO upload"
                )
                batch_object_name = None
            else:
                batch_object_name = f"tenant_{current_user.tenant_id}/kb_{knowledge_base_id}/{uuid.uuid4()}.{batch_file_ext}"
                batch_upload_content_type = _MINIO_CONTENT_TYPES.get(
                    batch_file_ext, "application/octet-stream"
                )
                batch_upload_success = batch_minio_service.upload_file(
                    bucket_name=settings.MINIO_BUCKET,
                    object_name=batch_object_name,
                    file_data=io.BytesIO(content_bytes),
                    file_size=len(content_bytes),
                    content_type=batch_upload_content_type,
                )
                if not batch_upload_success:
                    logger.warning(f"Failed to upload batch file to MinIO: {batch_object_name}")
                    batch_object_name = None
            db_document = KnowledgeDocument(
                title=title,
                content=text_content,
                summary=text_content[:200] + "..." if len(text_content) > 200 else text_content,
                category_id=category_id,
                file_type=file_type,
                file_path=batch_object_name,
                status=status,
                is_public=True,
                segmentation_mode="automatic" if enable_chunking_bool else "none",
                chunk_size=chunk_size if enable_chunking_bool else None,
                chunk_overlap=chunk_overlap if enable_chunking_bool else None,
                splitter_type=splitter_type if enable_chunking_bool else None,
                character_count=len(text_content),
                author_id=current_user.id,
                tenant_id=tenant_id,
                is_vectorized=False,
                vectorization_status="pending",
                vectorization_progress=0,
            )
            db.add(db_document)
            db.flush()
            version = DocumentVersion(
                document_id=db_document.id,
                version_number=1,
                title=title,
                content=text_content,
                change_summary="初始版本",
                editor_id=current_user.id,
            )
            db.add(version)
            db.commit()
            db.refresh(db_document)
            try:
                from app.services.knowledge.auto_tagging import auto_tag_document_on_upload

                auto_tag_document_on_upload(db, db_document.id)
            except Exception as e:
                logger.warning(f"Auto-tagging failed for document {db_document.id}: {e}")
            if enable_chunking_bool:
                from app.services.rag.async_vectorization_service import (
                    get_async_vectorization_service,
                )

                vector_model_id = None
                if category_id:
                    category = (
                        db.query(KnowledgeCategory)
                        .filter(KnowledgeCategory.id == category_id)
                        .first()
                    )
                    if category and category.knowledge_base_id:
                        kb = (
                            db.query(KnowledgeBase)
                            .filter(KnowledgeBase.id == category.knowledge_base_id)
                            .first()
                        )
                        if kb and kb.code:
                            if kb.code.isdigit():
                                vector_model = (
                                    db.query(Model).filter(Model.id == int(kb.code)).first()
                                )
                            else:
                                vector_model = db.query(Model).filter(Model.code == kb.code).first()
                            if vector_model:
                                vector_model_id = vector_model.id
                async_service = get_async_vectorization_service()
                async_service.vectorize_document_async(
                    document_id=db_document.id,
                    model_id=vector_model_id,
                    chunk_size=chunk_size,
                    chunk_overlap=chunk_overlap,
                    splitter_type=splitter_type,
                )
            results.append(
                {
                    "filename": file.filename,
                    "success": True,
                    "document_id": db_document.id,
                    "is_vectorized": False,
                    "vectorization_status": "pending" if enable_chunking_bool else "none",
                    "title": db_document.title,
                }
            )
        except Exception as e:
            logger.bind(filename=file.filename).error(f"Batch upload file failed: {e}")
            results.append({"filename": file.filename, "success": False, "error": str(e)})
    success_count = sum(1 for r in results if r["success"])
    fail_count = len(results) - success_count
    return {
        "total": len(results),
        "success": success_count,
        "failed": fail_count,
        "results": results,
    }
