o
    i                     @   s   d Z ddlZddlZddlZddlmZ ddlmZ ej	de
ee jd  ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ g dZdejfddZde
de e
 fddZ!dd Z"e#dkrze"  dS dS )uM  
税法知识库导入入口

用法：
  cd /lsinfo/ai/hellotax_ai/data_center/backend
  source venv/bin/activate

  # 全量导入
  python -m app.services.import_kb.run_import --phase all

  # 分阶段执行
  python -m app.services.import_kb.run_import --phase 1
  python -m app.services.import_kb.run_import --phase 2
  python -m app.services.import_kb.run_import --phase 2b
  python -m app.services.import_kb.run_import --phase 3
  python -m app.services.import_kb.run_import --phase 4
  python -m app.services.import_kb.run_import --phase 5

  # 组合阶段
  python -m app.services.import_kb.run_import --phase 1,2,2b

  # 断点续传（从上次中断处继续）
  python -m app.services.import_kb.run_import --phase 2 --resume

  # 清除 checkpoint 重新开始
  python -m app.services.import_kb.run_import --phase all --reset-state

环境变量（或 .env）：
  BASE_PLATFORM_URL     base_platform 地址，默认 http://localhost:8000
  IMPORT_EMAIL          登录邮箱，默认 admin@hellotax.cn
  IMPORT_PASSWORD       登录密码
  IMPORT_TENANT_ID      租户ID，默认 0
    N)Path)logger   )ImportState)
run_phase1login)
run_phase2)run_phase2b)
run_phase3)
run_phase4)
run_phase5)122b345returnc                  C   s   t jdd} | jdddd | jdtdd	d
 | jdtddd
 | jdtddd
 | jdtttddd | jdddd | jdddd |  S )Nu   税法知识库导入工具)descriptionz--phasealluP   执行阶段：all | 1 | 2 | 2b | 3 | 4 | 5 | 逗号分隔组合（如 1,2,2b）)defaulthelpz
--base-urlBASE_PLATFORM_URLzhttp://localhost:8000)r   z--emailIMPORT_EMAILzadmin@hellotax.cnz
--passwordIMPORT_PASSWORDzHellotax@2026#Adminz--tenant-idIMPORT_TENANT_ID0)typer   z--resume
store_trueu/   从上次断点继续（不清除 checkpoint）)actionr   z--reset-stateu   清除 checkpoint 重新开始)argparseArgumentParseradd_argumentosgetenvint
parse_args)parser r(   O/lsinfo/ai/hellotax_ai/data_center/backend/app/services/import_kb/run_import.pyr&   7   s   r&   	phase_argc                 C   s    | dkrt S dd | dD S )Nr   c                 S   s   g | ]
}|  r|  qS r(   )strip).0pr(   r(   r)   
<listcomp>J   s    z"resolve_phases.<locals>.<listcomp>,)
ALL_PHASESsplit)r*   r(   r(   r)   resolve_phasesG   s   r2   c                  C   s:  t  } t }| jr|  td t| j}td|  td| j d| j	  t
| j| j| j}|D ]^}td| d |dkrPt|| j|| j	 q7|dkr\t|| j| q7|d	krht|| j| q7|d
krtt|| j| q7|dkrt|| j| q7|dkrt|| j| q7td| d q7td d S )Nu   checkpoint 已清除u   执行阶段: z
base_url: z, tenant_id: u   ━━━ Phase u    开始 ━━━r   r   r   r   r   r   u   未知阶段: u	   ，跳过u   ✅ 所有阶段执行完毕)r&   r   reset_stateresetr   infor2   phasebase_url	tenant_idr   emailpasswordr   r   r	   r
   r   r   warning)argsstatephasestokenr6   r(   r(   r)   mainM   s2   

r@   __main__)$__doc__r    r#   syspathlibr   logurur   pathinsertstr__file__resolveparentsapp.services.import_kb.stater   "app.services.import_kb.phase1_initr   r   'app.services.import_kb.phase2_documentsr   (app.services.import_kb.phase2b_relationsr	   'app.services.import_kb.phase3_vectorizer
   %app.services.import_kb.phase4_autotagr   #app.services.import_kb.phase5_graphr   r0   	Namespacer&   listr2   r@   __name__r(   r(   r(   r)   <module>   s*    " #
