import sys
import os
from pathlib import Path
base_platform_dir = Path(__file__).resolve().parents[2] / 'base_platform'
os.chdir(base_platform_dir)
sys.path.insert(0, str(base_platform_dir))
from app.models import role, role_permission, user, tenant, knowledge_base
from app.db.session import SessionLocal
from sqlalchemy import text
from app.services.knowledge.vectorization_service import DocumentVectorizationService

def vectorize_pending_documents(tenant_id: int, user_id: int=1, model_id: int=69):
    db = SessionLocal()
    try:
        db.execute(text(f'SET search_path TO tenant_{tenant_id}, public'))
        pending_docs = db.execute(text("\n            SELECT id, title\n            FROM knowledge_documents\n            WHERE vectorization_status = 'pending'\n            ORDER BY id\n        ")).fetchall()
        if not pending_docs:
            print('✓ No pending documents to vectorize')
            return
        total = len(pending_docs)
        print(f'→ Found {total} pending documents')
        print(f'→ Using model_id: {model_id}')
        vectorization_service = DocumentVectorizationService(db)
        success_count = 0
        failed_count = 0
        for i, (doc_id, title) in enumerate(pending_docs, 1):
            try:
                print(f'  [{i}/{total}] Vectorizing: {title[:50]}...')
                result = vectorization_service.vectorize_document(document_id=doc_id, tenant_id=tenant_id, user_id=user_id, model_id=model_id)
                success_count += 1
                print(f"    ✓ Success ({result.get('vector_count')} vectors)")
            except Exception as e:
                failed_count += 1
                print(f'    ✗ Failed: {e}')
        print(f'\n✓ Vectorization completed: {success_count} success, {failed_count} failed')
    except Exception as e:
        print(f'✗ Error: {e}')
        import traceback
        traceback.print_exc()
    finally:
        db.close()
if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Usage: python vectorize_pending.py <tenant_id> [user_id] [model_id]')
        sys.exit(1)
    tenant_id = int(sys.argv[1])
    user_id = int(sys.argv[2]) if len(sys.argv) > 2 else 1
    model_id = int(sys.argv[3]) if len(sys.argv) > 3 else 69
    vectorize_pending_documents(tenant_id, user_id, model_id)