import subprocess
from pathlib import Path


from app.celery_app import celery_app
from app.tasks.ocr_tasks import _rebuild_content_text, _switch_mode
from app.tasks.processor_tasks import DatabaseTask
from common_logging import get_logger
logger = get_logger(__name__)


def _asr_video(local_path: str) -> str:
    audio_path = local_path.rsplit('.', 1)[0] + '_audio.wav'
    try:
        subprocess.run(['ffmpeg', '-i', local_path, '-vn', '-ar', '16000', '-ac', '1', audio_path, '-y'], check=True, capture_output=True)
        from funasr import AutoModel
        model = AutoModel(model='paraformer-zh')
        result = model.generate(input=audio_path)
        return ' '.join(r['text'] for r in result if r.get('text'))
    except Exception as e:
        logger.warning(f'[asr_tasks] ASR 失败: {local_path} — {e}')
        return ''
    finally:
        Path(audio_path).unlink(missing_ok=True)

@celery_app.task(bind=True, base=DatabaseTask, name='app.tasks.asr_tasks.run_asr_batch')
def run_asr_batch(self):
    from sqlalchemy.orm.attributes import flag_modified

    from app.models.tax_data import TaxDocument
    from app.services.tax_data_processor.document_cleaner import DocumentCleaner
    db = self.db
    docs = db.query(TaxDocument).filter(TaxDocument.inline_videos.isnot(None)).all()
    pending = [d for d in docs if any(not v.get('transcript') and v.get('path') for v in d.inline_videos or [])]
    if not pending:
        logger.info('[asr_tasks] 无待处理文档，跳过')
        return {'success': True, 'processed': 0}
    logger.info(f'[asr_tasks] 待处理文档: {len(pending)} 条')
    cleaner = DocumentCleaner()
    _switch_mode('media_processing')
    processed = 0
    try:
        for doc in pending:
            updated = False
            for vid in doc.inline_videos:
                if vid.get('transcript') or not vid.get('path'):
                    continue
                transcript = _asr_video(vid['path'])
                if transcript:
                    vid['transcript'] = transcript
                    updated = True
            if updated:
                _rebuild_content_text(doc, cleaner)
                flag_modified(doc, 'inline_videos')
                try:
                    db.commit()
                    processed += 1
                    from app.services.multimodal_indexer import index_document_media


                    index_document_media(doc.id, doc.inline_images, doc.inline_videos)
                except Exception as e:
                    db.rollback()
                    logger.error(f'[asr_tasks] doc_id={doc.id} 写库失败: {e}')
    finally:
        _switch_mode('inference')
    logger.info(f'[asr_tasks] 完成: processed={processed}/{len(pending)}')
    return {'success': True, 'processed': processed, 'total': len(pending)}
