import re

from sqlalchemy.orm import Session

from common_logging import get_logger

logger = get_logger(__name__)



class SmartCategorizationService:

    def __init__(self):
        self.categories = {'税收政策': {'keywords': ['政策', '税收优惠', '减免', '扶持', '支持政策'], 'patterns': ['关于.*政策', '.*优惠.*通知', '.*减免.*公告']}, '征管规定': {'keywords': ['征管', '管理办法', '征收', '缴纳', '申报'], 'patterns': ['.*征管.*', '.*管理办法', '.*征收.*规定']}, '公告通知': {'keywords': ['公告', '通知', '通告', '公示'], 'patterns': ['.*公告', '.*通知', '.*通告']}, '税务稽查': {'keywords': ['稽查', '检查', '核查', '审计'], 'patterns': ['.*稽查.*', '.*检查.*通知']}, '发票管理': {'keywords': ['发票', '开票', '票据'], 'patterns': ['.*发票.*', '.*开票.*']}, '纳税服务': {'keywords': ['纳税服务', '便民', '服务指南', '办税'], 'patterns': ['.*纳税服务.*', '.*办税.*']}, '税收协定': {'keywords': ['协定', '双边', '税收协定', '议定书'], 'patterns': ['.*协定.*', '.*议定书']}, '人才培养': {'keywords': ['人才', '培训', '学员', '领军人才'], 'patterns': ['.*人才.*', '.*培训.*', '.*学员.*']}, '其他': {'keywords': [], 'patterns': []}}

    def categorize_by_content(self, title: str, content: str) -> str:
        text = (title + ' ' + content[:500]).lower()
        scores = {}
        for category, rules in self.categories.items():
            score = 0
            for keyword in rules['keywords']:
                if keyword in text:
                    score += 2
            for pattern in rules['patterns']:
                if re.search(pattern, text):
                    score += 3
            scores[category] = score
        best_category = max(scores.items(), key=lambda x: x[1])
        if best_category[1] == 0:
            return '其他'
        logger.info(f'Document categorization: {best_category[0]} (score: {best_category[1]})')
        return best_category[0]

    def categorize_by_filename(self, filename: str) -> str:
        filename_lower = filename.lower()
        for category, rules in self.categories.items():
            for keyword in rules['keywords']:
                if keyword in filename_lower:
                    return category
        return '其他'

    def get_or_create_category(self, db: Session, knowledge_base_id: int, category_name: str, parent_id: int | None=None, year: str | None=None) -> int:
        from app.models.knowledge_base import KnowledgeCategory

        query = db.query(KnowledgeCategory).filter(KnowledgeCategory.knowledge_base_id == knowledge_base_id, KnowledgeCategory.name == category_name)
        if parent_id is not None:
            query = query.filter(KnowledgeCategory.parent_id == parent_id)
        category = query.first()
        if category:
            return category.id
        icon = '📅' if year else self._get_category_icon(category_name)
        color = self._get_category_color(category_name)
        new_category = KnowledgeCategory(knowledge_base_id=knowledge_base_id, name=category_name, description=f'{year}年税务文档' if year else f'{category_name}相关文档', parent_id=parent_id, icon=icon, color=color)
        db.add(new_category)
        db.commit()
        db.refresh(new_category)
        logger.info(f'Created new category: {category_name} (ID: {new_category.id})')
        return new_category.id

    def _get_category_icon(self, category_name: str) -> str:
        icon_map = {'税收政策': '📋', '征管规定': '📊', '公告通知': '📢', '税务稽查': '🔍', '发票管理': '🧾', '纳税服务': '🤝', '税收协定': '🤝', '人才培养': '👨\u200d🎓', '其他': '📄'}
        return icon_map.get(category_name, '📄')

    def _get_category_color(self, category_name: str) -> str:
        color_map = {'税收政策': '#3b82f6', '征管规定': '#8b5cf6', '公告通知': '#f59e0b', '税务稽查': '#ef4444', '发票管理': '#10b981', '纳税服务': '#06b6d4', '税收协定': '#ec4899', '人才培养': '#6366f1', '其他': '#6b7280'}
        return color_map.get(category_name, '#6b7280')

    def create_year_category_structure(self, db: Session, knowledge_base_id: int, year: str) -> dict[str, int]:
        year_category_id = self.get_or_create_category(db, knowledge_base_id, year, parent_id=None, year=year)
        category_map = {}
        for category_name in self.categories.keys():
            if category_name != '其他':
                category_id = self.get_or_create_category(db, knowledge_base_id, category_name, parent_id=year_category_id)
                category_map[category_name] = category_id
        other_id = self.get_or_create_category(db, knowledge_base_id, '其他', parent_id=year_category_id)
        category_map['其他'] = other_id
        return category_map
_categorization_service = None

def get_smart_categorization_service() -> SmartCategorizationService:
    global _categorization_service
    if _categorization_service is None:
        _categorization_service = SmartCategorizationService()
    return _categorization_service
