o
    "i                     @   st   d Z ddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZmZmZ G dd	 d	eZdS )
z'Qwen local training platform using MLX.    N)Path)DictAnyOptional)datetime   )TrainingPlatformTrainingConfig	JobStatusTrainingResultc                	   @   s   e Zd ZdZdZddiZdd Zdee defdd	Z	dee de
fd
dZd"dedee de
fddZdedefddZdedefddZdedefddZdedefddZdefddZdefddZedededeeef defd d!ZdS )#QwenLocalPlatformz2Local training platform using MLX for Qwen models.zQwen3.5-4B-MLXzQwen/Qwen3.5-4Bc                 C   sV   ddl m} t|j| _t|j| _|j| _|j	| _
|j| _| jjddd i | _d S )Nr   settingsTparentsexist_ok)app.core.configr   r   QWEN_LOCAL_TRAINING_BASE_DIRbase_dirQWEN_LOCAL_MODEL_CACHE_DIRmodel_cache_dirQWEN_LOCAL_MAX_CONCURRENT_JOBSmax_concurrentQWEN_LOCAL_MAX_MEMORY_GBmax_memory_gbQWEN_LOCAL_MIN_FREE_MEMORY_GBmin_free_memory_gbmkdir	processes)selfr    r    [/lsinfo/ai/hellotax_ai/training_center/backend/app/services/training_platform/qwen_local.py__init__   s   
zQwenLocalPlatform.__init__
model_namereturnc                 C   s2   |s| j S || jv r| j| S t|}|jp| j S N)DEFAULT_MODEL_NAMEMODEL_ALIASESr   name)r   r#   
model_pathr    r    r!   _normalize_model_dir_name   s   

z+QwenLocalPlatform._normalize_model_dir_namec                 C   s4   |  |}|rt|nd }|r| r|S | j| S r%   )r*   r   existsr   )r   r#   model_dir_name	candidater    r    r!   _resolve_model_path)   s
   

z%QwenLocalPlatform._resolve_model_pathNjob_idr,   c                 C   sH   |r| j | d | S | j d| D ]}|  S | j | j d | S )Njobsz*/jobs/)r   globr&   )r   r/   r,   r-   r    r    r!   _get_job_dir0   s
   zQwenLocalPlatform._get_job_dirconfigc                 C   s   |   std| j d|  std| j ddt d }| |j	}| 
||}|jddd |jp;i d	d
}t| |j	}|t|j|||jpSi d}ddlm} |j}	tj| j|t|||	fd}
|
  |
| j|< |S )z Create and start a training job.zInsufficient memory (limit: zGB)zMax concurrent jobs reached ()job_z%Y%m%d_%H%M%STr   	task_typesft)r6   
dataset_idr#   r,   hyperparametersr   r   )targetargs)_check_memoryRuntimeErrorr   _check_concurrent_limitr   r   nowstrftimer*   r#   r2   r   r9   getstrr.   intr8   r   r   DATABASE_URLmpProcess_run_training_processstartr   )r   r3   r/   r,   job_dirr6   r#   
job_configr   db_urlprocessr    r    r!   create_training_job9   s2   
z%QwenLocalPlatform.create_training_jobc                 C   s   |  |}|d }| s| rtddddS tddddS t| }|d }| r2| nd	}t|d
d|dd|dd|dS )z Get job status from status.json.zstatus.jsonrunningg        zTraining starting)statusprogressmessagependingzJob not foundztraining.logNrO   rP   errorzTraining in progress)rO   rP   rQ   logs)r2   r+   r
   jsonloads	read_textrA   )r   r/   rI   status_filedatalog_filerT   r    r    r!   get_job_status^   s   



z QwenLocalPlatform.get_job_statusc                 C   s8   |  |}|d }| stdt|i dt|idS )zGet training result.final_modelzTraining not completedr)   )model_idmetrics	artifacts)r2   r+   
ValueErrorr   rB   )r   r/   rI   	model_dirr    r    r!   get_job_resultv   s   

z QwenLocalPlatform.get_job_resultc                 C   sP   || j v r&| j | }| r |  |jdd | r |  | j |= dS dS )zCancel running job.   )timeoutTF)r   is_alive	terminatejoinkill)r   r/   rL   r    r    r!   
cancel_job   s   

zQwenLocalPlatform.cancel_jobc                 C   s4   t  }|jd }|jd }|| jd k o|| jkS )z&Check if memory usage is within limit.i   @g?)psutilvirtual_memoryused	availabler   r   )r   memoryused_gbavailable_gbr    r    r!   r<      s   

zQwenLocalPlatform._check_memoryc                 C   s"   t dd | j D }|| jk S )zCheck concurrent job limit.c                 s   s    | ]	}|  rd V  qdS )r   N)re   ).0pr    r    r!   	<genexpr>   s    z<QwenLocalPlatform._check_concurrent_limit.<locals>.<genexpr>)sumr   valuesr   )r   activer    r    r!   r>      s   
z)QwenLocalPlatform._check_concurrent_limitrI   rK   c                 C   s   ddl m} || ||| dS )zTraining process entry point.r   )run_trainingN)qwen.trainerrw   )r/   rI   r3   rK   rw   r    r    r!   rG      s   z'QwenLocalPlatform._run_training_processr%   )__name__
__module____qualname____doc__r&   r'   r"   r   rB   r*   r   r.   r2   r	   rM   r
   r[   r   rb   boolri   r<   r>   staticmethodr   r   rG   r    r    r    r!   r      s"    

	%(r   )r|   rU   rj   multiprocessingrE   pathlibr   typingr   r   r   r   baser   r	   r
   r   r   r    r    r    r!   <module>   s    