import re

from sqlalchemy.orm import Session

from common_logging import get_logger

logger = get_logger(__name__)




class SmartCategorizationService:

    def __init__(self):
        self.categories = {
            "税收政策": {
                "keywords": ["政策", "税收优惠", "减免", "扶持", "支持政策"],
                "patterns": ["关于.*政策", ".*优惠.*通知", ".*减免.*公告"],
            },
            "征管规定": {
                "keywords": ["征管", "管理办法", "征收", "缴纳", "申报"],
                "patterns": [".*征管.*", ".*管理办法", ".*征收.*规定"],
            },
            "公告通知": {
                "keywords": ["公告", "通知", "通告", "公示"],
                "patterns": [".*公告", ".*通知", ".*通告"],
            },
            "税务稽查": {
                "keywords": ["稽查", "检查", "核查", "审计"],
                "patterns": [".*稽查.*", ".*检查.*通知"],
            },
            "发票管理": {
                "keywords": ["发票", "开票", "票据"],
                "patterns": [".*发票.*", ".*开票.*"],
            },
            "纳税服务": {
                "keywords": ["纳税服务", "便民", "服务指南", "办税"],
                "patterns": [".*纳税服务.*", ".*办税.*"],
            },
            "税收协定": {
                "keywords": ["协定", "双边", "税收协定", "议定书"],
                "patterns": [".*协定.*", ".*议定书"],
            },
            "人才培养": {
                "keywords": ["人才", "培训", "学员", "领军人才"],
                "patterns": [".*人才.*", ".*培训.*", ".*学员.*"],
            },
            "其他": {"keywords": [], "patterns": []},
        }

    def categorize_by_content(self, title: str, content: str) -> str:
        text = (title + " " + content[:500]).lower()
        scores = {}
        for category, rules in self.categories.items():
            score = 0
            for keyword in rules["keywords"]:
                if keyword in text:
                    score += 2
            for pattern in rules["patterns"]:
                if re.search(pattern, text):
                    score += 3
            scores[category] = score
        best_category = max(scores.items(), key=lambda x: x[1])
        if best_category[1] == 0:
            return "其他"
        logger.info(f"Document categorization: {best_category[0]} (score: {best_category[1]})")
        return best_category[0]

    def categorize_by_filename(self, filename: str) -> str:
        filename_lower = filename.lower()
        for category, rules in self.categories.items():
            for keyword in rules["keywords"]:
                if keyword in filename_lower:
                    return category
        return "其他"

    def get_or_create_category(
        self,
        db: Session,
        knowledge_base_id: int,
        category_name: str,
        parent_id: int | None = None,
        year: str | None = None,
    ) -> int:
        from app.models.knowledge_base import KnowledgeCategory


        query = db.query(KnowledgeCategory).filter(
            KnowledgeCategory.knowledge_base_id == knowledge_base_id,
            KnowledgeCategory.name == category_name,
        )
        if parent_id is not None:
            query = query.filter(KnowledgeCategory.parent_id == parent_id)
        category = query.first()
        if category:
            return category.id
        icon = "📅" if year else self._get_category_icon(category_name)
        color = self._get_category_color(category_name)
        new_category = KnowledgeCategory(
            knowledge_base_id=knowledge_base_id,
            name=category_name,
            description=f"{year}年税务文档" if year else f"{category_name}相关文档",
            parent_id=parent_id,
            icon=icon,
            color=color,
        )
        db.add(new_category)
        db.commit()
        db.refresh(new_category)
        logger.info(f"Created new category: {category_name} (ID: {new_category.id})")
        return new_category.id

    def _get_category_icon(self, category_name: str) -> str:
        icon_map = {
            "税收政策": "📋",
            "征管规定": "📊",
            "公告通知": "📢",
            "税务稽查": "🔍",
            "发票管理": "🧾",
            "纳税服务": "🤝",
            "税收协定": "🤝",
            "人才培养": "👨\u200d🎓",
            "其他": "📄",
        }
        return icon_map.get(category_name, "📄")

    def _get_category_color(self, category_name: str) -> str:
        color_map = {
            "税收政策": "#3b82f6",
            "征管规定": "#8b5cf6",
            "公告通知": "#f59e0b",
            "税务稽查": "#ef4444",
            "发票管理": "#10b981",
            "纳税服务": "#06b6d4",
            "税收协定": "#ec4899",
            "人才培养": "#6366f1",
            "其他": "#6b7280",
        }
        return color_map.get(category_name, "#6b7280")

    def create_year_category_structure(
        self, db: Session, knowledge_base_id: int, year: str
    ) -> dict[str, int]:
        year_category_id = self.get_or_create_category(
            db, knowledge_base_id, year, parent_id=None, year=year
        )
        category_map = {}
        for category_name in self.categories.keys():
            if category_name != "其他":
                category_id = self.get_or_create_category(
                    db, knowledge_base_id, category_name, parent_id=year_category_id
                )
                category_map[category_name] = category_id
        other_id = self.get_or_create_category(
            db, knowledge_base_id, "其他", parent_id=year_category_id
        )
        category_map["其他"] = other_id
        return category_map


_categorization_service = None


def get_smart_categorization_service() -> SmartCategorizationService:
    global _categorization_service
    if _categorization_service is None:
        _categorization_service = SmartCategorizationService()
    return _categorization_service
