from datetime import datetime, timedelta

from fastapi import APIRouter, Depends, Query
from sqlalchemy import case, func
from sqlalchemy.orm import Session

from app.database import get_db
from app.models.tax_data import DataProcessingTask, TaxDocument
from app.services.tax_data_processor.category_processor import CategoryProcessor
from common_logging import get_logger
logger = get_logger(__name__)



router = APIRouter()
_LOW_QUALITY_THRESHOLD = 0.5
_FAILED_ALERT_WINDOW_HOURS = 24
_FAILED_ALERT_COUNT = 50
_FAILED_RATE_ALERT = 0.1

@router.get('/overview')
async def get_overview_stats(db: Session=Depends(get_db)):
    total_docs = db.query(func.count(TaxDocument.id)).scalar() or 0
    imported_docs = db.query(func.count(TaxDocument.id)).filter(TaxDocument.is_imported.is_(True)).scalar() or 0
    failed_docs = db.query(func.count(TaxDocument.id)).filter(TaxDocument.processing_status == 'failed').scalar() or 0
    pending_docs = db.query(func.count(TaxDocument.id)).filter(TaxDocument.processing_status == 'pending').scalar() or 0
    total_tasks = db.query(func.count(DataProcessingTask.id)).scalar() or 0
    running_tasks = db.query(func.count(DataProcessingTask.id)).filter(DataProcessingTask.status == 'running').scalar() or 0
    completed_tasks = db.query(func.count(DataProcessingTask.id)).filter(DataProcessingTask.status == 'completed').scalar() or 0
    failed_tasks = db.query(func.count(DataProcessingTask.id)).filter(DataProcessingTask.status == 'failed').scalar() or 0
    return {'total_documents': total_docs, 'imported_documents': imported_docs, 'pending_documents': pending_docs, 'failed_documents': failed_docs, 'not_imported_documents': total_docs - imported_docs, 'import_rate': round(imported_docs / total_docs, 4) if total_docs else 0.0, 'total_tasks': total_tasks, 'running_tasks': running_tasks, 'completed_tasks': completed_tasks, 'failed_tasks': failed_tasks}

@router.get('/by-category')
async def get_category_stats(db: Session=Depends(get_db)):
    category_processor = CategoryProcessor()
    all_categories = category_processor.get_all_categories()
    rows = db.query(TaxDocument.category_id, func.count(TaxDocument.id).label('total'), func.sum(case((TaxDocument.is_imported.is_(True), 1), else_=0)).label('imported'), func.sum(case((TaxDocument.processing_status == 'failed', 1), else_=0)).label('failed'), func.sum(case((TaxDocument.processing_status == 'completed', 1), else_=0)).label('completed')).group_by(TaxDocument.category_id).all()
    stat_map = {row.category_id: {'total': row.total, 'imported': int(row.imported or 0), 'failed': int(row.failed or 0), 'completed': int(row.completed or 0)} for row in rows}
    categories = []
    for c in all_categories:
        s = stat_map.get(c['id'], {'total': 0, 'imported': 0, 'failed': 0, 'completed': 0})
        categories.append({'id': c['id'], 'name': c['name'], 'type': c['type'], 'total': s['total'], 'imported': s['imported'], 'not_imported': s['total'] - s['imported'], 'failed': s['failed'], 'completed': s['completed'], 'import_rate': round(s['imported'] / s['total'], 4) if s['total'] else 0.0, 'fail_rate': round(s['failed'] / s['total'], 4) if s['total'] else 0.0})
    return {'categories': categories}

@router.get('/processing-status')
async def get_processing_status(db: Session=Depends(get_db)):
    rows = db.query(TaxDocument.processing_status, func.count(TaxDocument.id).label('count')).group_by(TaxDocument.processing_status).all()
    distribution = {row.processing_status: row.count for row in rows}
    total = sum(distribution.values())
    return {'total': total, 'status_distribution': {'pending': distribution.get('pending', 0), 'processing': distribution.get('processing', 0), 'completed': distribution.get('completed', 0), 'failed': distribution.get('failed', 0)}, 'rates': {status: round(count / total, 4) if total else 0.0 for status, count in distribution.items()}}

@router.get('/quality')
async def get_quality_stats(category_id: int | None=None, db: Session=Depends(get_db)):
    query = db.query(TaxDocument)
    if category_id:
        query = query.filter(TaxDocument.category_id == category_id)
    total = query.count()
    no_content = query.filter(TaxDocument.content_hash.is_(None), TaxDocument.processing_status == 'completed').count()
    failed_total = query.filter(TaxDocument.processing_status == 'failed').count()
    cutoff = datetime.utcnow() - timedelta(hours=_FAILED_ALERT_WINDOW_HOURS)
    recent_failed = query.filter(TaxDocument.processing_status == 'failed', TaxDocument.updated_at >= cutoff).count()
    fail_rate = round(failed_total / total, 4) if total else 0.0
    alerts = []
    if recent_failed >= _FAILED_ALERT_COUNT:
        alerts.append({'level': 'critical', 'code': 'HIGH_RECENT_FAILURES', 'message': f'最近 {_FAILED_ALERT_WINDOW_HOURS}h 内失败文档数 {recent_failed} 超过阈值 {_FAILED_ALERT_COUNT}，请检查爬虫/解析服务'})
    if fail_rate >= _FAILED_RATE_ALERT:
        alerts.append({'level': 'warning', 'code': 'HIGH_FAILURE_RATE', 'message': f'处理失败率 {fail_rate:.1%} 超过阈值 {_FAILED_RATE_ALERT:.0%}，建议检查目标网站结构是否变化'})
    no_content_rate = round(no_content / max(total, 1), 4)
    if no_content_rate >= 0.05:
        alerts.append({'level': 'warning', 'code': 'HIGH_EMPTY_CONTENT_RATE', 'message': f'正文缺失率 {no_content_rate:.1%}（成功文档中正文为空），建议检查 HTML 清洗规则'})
    if alerts:
        logger.warning(f"[质量告警] {len(alerts)} 条告警: {[a['code'] for a in alerts]}")
    return {'total_documents': total, 'failed_documents': failed_total, 'fail_rate': fail_rate, 'no_content_documents': no_content, 'no_content_rate': no_content_rate, 'recent_failed_24h': recent_failed, 'alerts': alerts, 'alert_count': len(alerts), 'thresholds': {'low_quality_score': _LOW_QUALITY_THRESHOLD, 'failed_alert_count': _FAILED_ALERT_COUNT, 'failed_alert_window_hours': _FAILED_ALERT_WINDOW_HOURS, 'failed_rate_alert': _FAILED_RATE_ALERT}}

@router.get('/failed-documents')
async def get_failed_documents(category_id: int | None=None, limit: int=Query(50, le=200), skip: int=0, db: Session=Depends(get_db)):
    query = db.query(TaxDocument).filter(TaxDocument.processing_status == 'failed')
    if category_id:
        query = query.filter(TaxDocument.category_id == category_id)
    total = query.count()
    docs = query.order_by(TaxDocument.updated_at.desc()).offset(skip).limit(limit).all()
    return {'total': total, 'skip': skip, 'limit': limit, 'items': [{'id': d.id, 'title': d.title, 'source_url': d.source_url, 'category_id': d.category_id, 'doc_number': d.doc_number, 'processing_status': d.processing_status, 'content_hash': d.content_hash, 'is_imported': d.is_imported, 'updated_at': d.updated_at.isoformat() if d.updated_at else None} for d in docs]}
