
    XjE                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlZd dlmZ d d	lmZ  ee          Zed
z  dz  Zed
z  dz  ZdZddZ G d d          Zdaded<   ddZdS )    )annotationsN)UTCdatetime)StringIO)Path)Lock)Any)LLM_SERVICE_DIR)
get_loggerbase_modelszbge-m3zbge-reranker-v2-m3)
"vllm.entrypoints.openai.api_serverembedding_server.pyrerank_server.pymlx_lm.serverllamafactorytorchrun	deepspeedaccelerate launch--model_name_or_path--served-model-namereturnstrc                 X    t          j        t                                                    S N)r   nowr   	isoformat     S/lsinfo/ai/hellotax_ai/base_platform/app/services/monitoring/gpu_runtime_service.py_utc_now_isor    $   s    <&&(((r   c                      e Zd ZdFdGdZdHdZdHdZdIdZdJdZdKdZdLdZ	dLdZ
dMdZdNdZdOdZdPdZdQd"ZdRd$ZdSd*ZdRd+ZdTdUd/ZdVd3ZdWd6ZdXd7ZdYd8ZdZd:Zd[d;Zd\d>Zd]dAZdFd^dDZdES )_GpuRuntimeMonitoringService      @cache_ttl_secondsfloatc                V    || _         t                      | _        d | _        d| _        d S )Ng        )_cache_ttl_secondsr   _cache_lock_cached_snapshot
_cached_at)selfr$   s     r   __init__z$GpuRuntimeMonitoringService.__init__*   s)    "3667;r   r   dict[str, Any]c                   t          j                    }| j        5  | j        r&|| j        z
  | j        k     r| j        cd d d            S |                                 }|| _        || _        |cd d d            S # 1 swxY w Y   d S r   )time	monotonicr(   r)   r*   r'   _build_snapshot)r+   r   snapshots      r   get_snapshotz(GpuRuntimeMonitoringService.get_snapshot0   s    n 	 	$ -t)>AX)X)X,	 	 	 	 	 	 	 	 ++--H$,D!!DO	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   !A9	#A99A= A=c                p   |                                  }d |D             }|                                 }|                                 D ]}|d         }|                     ||          }|                    |          }|8|                     |          p|d         }|                     ||          }|||<   |d                             |d                    |dxx         |d         z  cc<   |                     ||          }	| 	                    ||          }
| 
                    |
|	          }||
|	dS )	Nc                     i | ]}|d          |S )uuidr   .0rows     r   
<dictcomp>z?GpuRuntimeMonitoringService._build_snapshot.<locals>.<dictcomp><   s    <<<Cs6{C<<<r   pidprocess_name	gpu_uuidsgpu_uuidgpu_memory_mbused_gpu_memory_mb)overviewdevicesmodels)_collect_gpu_rows_collect_runtime_processes_collect_compute_process_rows_find_model_parent_processget_read_process_cmdline_describe_processadd_build_models_build_devices_build_overview)r+   gpu_rowsgpu_by_uuid	processescompute_rowr;   
target_pidprocesscmdlinerC   rB   rA   s               r   r1   z+GpuRuntimeMonitoringService._build_snapshot:   sX   ))++<<8<<<3355	==?? 		J 		JKe$C88iHHJmmJ//G44Z@@_KP^D_00WEE(/	*%K $$[%<===O$$$4H(II$$$$##I{;;%%h	::''88$FKKKr   rB   list[dict[str, Any]]rC   c                   t          d |D                       }|t          d |D                       z  }t          d |D                       }t          |          t          d |D                       ||t                      dS )Nc              3  .   K   | ]}|d          dv dV  dS )status>   offlinewarning   Nr   r8   devices     r   	<genexpr>z>GpuRuntimeMonitoringService._build_overview.<locals>.<genexpr>P   s2      ``VH5EI_5_5_!5_5_5_5_``r   c              3  2   K   | ]}|d          dk    dV  dS rY   failedr\   Nr   r8   models     r   r_   z>GpuRuntimeMonitoringService._build_overview.<locals>.<genexpr>Q   s0      PPE(Ox4O4O14O4O4O4OPPr   c              3  2   K   | ]}|d          dk    dV  dS ra   r   rc   s     r   r_   z>GpuRuntimeMonitoringService._build_overview.<locals>.<genexpr>R   s0      !W!W5?h;V;V!;V;V;V;V!W!Wr   c              3  2   K   | ]}|d          dk    dV  dS )rY   rZ   r\   Nr   r]   s     r   r_   z>GpuRuntimeMonitoringService._build_overview.<locals>.<genexpr>U   s2      ![![VH=MQZ=Z=Z!=Z=Z=Z=Z![![r   )totalGpuCountonlineGpuCountrunningModelCount
alertCount	updatedAt)sumlenr    )r+   rB   rC   alert_countrunning_model_counts        r   rN   z+GpuRuntimeMonitoringService._build_overviewM   s     ``'`````sPP&PPPPPP!!W!W!W!W!WWW \\!![!['![![![[[!4%%
 
 	
r   rO   rQ   dict[int, dict[str, Any]]c                   i }|                                 D ]`}|d         }|                    d          r |d         D ]7}|                    |t                                                    |           8at          j                    }g }|D ]}	t          |                    |	d         t                                          }
|	d         }|	d         }d}|dk    rt          ||z  dz            }| 
                    ||	d	         |
|
          }|                    d|	d          |	d          d|	d          ||t          |dz  d          t          |dz  d          ||	d	         |
d	           |S )N
model_nameGPU Process r=   r6   memory_total_mbmemory_used_mbr   d   utilization_percent)memory_usage_percentrw   running_model_namesru   zgpu-indexname #   r\   )	idr{   hostNamerY   memoryUsedGbmemoryTotalGbmemoryUsagePercentutilizationPercentrunningModelNames)values
startswith
setdefaultsetrK   socketgethostnamesortedrH   round_derive_gpu_statusappend)r+   rO   rQ   running_models_by_gpurT   rr   r>   	host_namerB   gpu_rowrunning_modelsrt   ru   rx   rY   s                  r   rM   z*GpuRuntimeMonitoringService._build_devices[   s    68 '')) 	R 	RG .J$$^44 #K0 R R%00355AAEEjQQQQR&((	(* 	 	G#$9$=$=gfosuu$U$UVVN%&78O$%56N#$ ""',^o-MPS-S'T'T$,,%9$+,A$B$2-	 -  F NN3!133&vDD''2BDD )$$).4*?$C$C%*?T+A1%E%E*>*12G*H)7
 
    r   rP   dict[str, dict[str, Any]]c                R  	 g }|                                 D ]}|d         }|                    d          r fdt          |d                   D             }d                    d |D                       pd}d                    d |D                       pd	}|                    d
|d          ||                     |          |                     |          ||t          |d         dz  d          |d         d           ddddd	|                    	fd           |S )Nrr   rs   c                (    g | ]}|v |         S r   r   )r8   r>   rP   s     r   
<listcomp>z=GpuRuntimeMonitoringService._build_models.<locals>.<listcomp>   s3       {** H%***r   r=   z, c              3  @   K   | ]}t          |d                    V  dS )rz   N)r   r7   s     r   r_   z<GpuRuntimeMonitoringService._build_models.<locals>.<genexpr>   s.      FFcCL 1 1FFFFFFr   zN/Ac              3  <   K   | ]}|d           d|d          V  dS )r{   r|   rz   Nr   r7   s     r   r_   z<GpuRuntimeMonitoringService._build_models.<locals>.<genexpr>   s8      OOsS[::CL::OOOOOOr   
Unassignedzpid-r;   r?   r}   r\   
started_at)r~   r{   versionrY   gpuIdgpuNamer   	startedAtr         )runningloadingidlerb   c                \                         | d         d          | d          | d         fS )NrY   c   r   r{   )rH   )itemstatus_orders    r   <lambda>z;GpuRuntimeMonitoringService._build_models.<locals>.<lambda>   s3      h44n%%V r   )key)	r   r   r   joinr   _build_version_label_derive_model_statusr   sort)
r+   rQ   rP   rC   rT   rr   rO   gpu_ids	gpu_namesr   s
     `      @r   rL   z)GpuRuntimeMonitoringService._build_models   s    (* '')) 	 	G .J$$^44     &w{'; < <  H
 iiFFXFFFFFO%G		OOhOOOOO_S_  MM111&#88AA"77@@$($)'/*BT*I1$M$M!(!6	 	    $%AKK    	 	
 	
 	
 r   c                   |                      g d          }|sg S g }|                     |          D ]}t          |          dk     r|                    |                     |d                   |d                                         |d                                         |                     |d                   |                     |d                   |                     |d                   d	           |S )
N)
nvidia-smizD--query-gpu=index,name,uuid,memory.used,memory.total,utilization.gpu--format=csv,noheader,nounits   r   r\   r   r         )rz   r{   r6   ru   rt   rw   )_run_command_parse_csv_outputrm   r   	_safe_intstripr+   outputrowsrecords       r   rD   z-GpuRuntimeMonitoringService._collect_gpu_rows   s   ""  
 
  	I%',,V44 	 	F6{{QKK!^^F1I66"1IOO--"1IOO--&*nnVAY&?&?'+~~fQi'@'@+/>>&)+D+D 	 	 	 	 r   c           
        |                      g d          }|rd|v rg S g }|                     |          D ]}t          |          dk     r|                    |d                                         |                     |d                   |d                                         |                     |d                   d           d	 |D             S )
N)r   z>--query-compute-apps=gpu_uuid,pid,process_name,used_gpu_memoryr   zNo running processes foundr   r   r\   r   r   )r>   r;   r<   r@   c                *    g | ]}|d          dk    |S )r;   r   r   r7   s     r   r   zMGpuRuntimeMonitoringService._collect_compute_process_rows.<locals>.<listcomp>   s!    666s5zA~~~~~r   )r   r   rm   r   r   r   r   s       r   rF   z9GpuRuntimeMonitoringService._collect_compute_process_rows   s    ""  
 
  	5??I%',,V44 
	 
	F6{{QKK &q	 1 1>>&)44$*1IOO$5$5*...*C*C	     76t6666r   c                   |                      g d          }|si S i }|                                D ]}|                                }|s|                    d d          }t	          |          dk    rC|                     |d                   }|d                                         }|dk    s|                     |          s|                     ||          ||<   |S )N)psz-eoz
pid=,args=r\   r   r   )r   
splitlinesr   splitrm   r   _looks_like_model_processrJ   )r+   r   rQ   linepartsr;   argss          r   rE   z6GpuRuntimeMonitoringService._collect_runtime_processes   s    ""#>#>#>?? 	I/1	%%'' 	? 	?D::<<D JJtQ''E5zzQ..q**C8>>##Daxxt==dCCx!33C>>IcNNr   r;   intr   r   c                   |                      |          }|                     |          }|                     |d          }|                     |          }||||r|                     |          nd |                     |r|                     |          nd |          |dt                      |                     ||||          d	S )Nz--portr   )	r;   r   runtimeporthealthyr   r?   r=   rr   )_read_process_env_infer_runtime_extract_flag_value_get_process_started_atr   _check_service_healthr   _infer_model_name)r+   r;   r   env_mapr   r   r   s          r   rJ   z-GpuRuntimeMonitoringService._describe_process   s    ((--%%d++''h7711#66
,0:DNN4(((d11$2X$..2F2F2FTXZabb$00dGWMM

 

 
	
r   boolc                l    |                                 t          fdt          D                       S )Nc              3  D   K   | ]}|                                 v V  d S r   lower)r8   hint
normalizeds     r   r_   zHGpuRuntimeMonitoringService._looks_like_model_process.<locals>.<genexpr>  s0      NN$4::<<:-NNNNNNr   )r   anyMODEL_PROCESS_HINTSr+   r   r   s     @r   r   z5GpuRuntimeMonitoringService._looks_like_model_process   s4    ZZ\\
NNNN:MNNNNNNr   c                    |                                 }d|v rdS d|v rdS d|v rdS d|v rdS d	|v rd
S d|v sd|v sd|v rdS dS )Nr   vLLMr   	Embeddingr   Rerankerr   MLXr   zLLaMA-Factoryr   r   r   TrainingRuntimer   r   s      r   r   z*GpuRuntimeMonitoringService._infer_runtime  s    ZZ\\
/:==6 J..;++:j((5Z''"?*$$j(("j00:yr   r   dict[str, str]r   c                   |                      |d          p|                    d          }|r|S |                      |d          pi|                      |d          pS|                    d          p>|                    d          p)|                    d          p|                    d          }|rt          |          j        S |d	k    r:t          |                    dt	          t
                                        j        S |d
k    r:t          |                    dt	          t                                        j        S t          j        d|          }|r't          |	                    d                    j        S d| S )Nr   SERVED_MODEL_NAMEz--modelr   
MODEL_PATHMODEL_NAME_OR_PATHEMBEDDING_MODEL_PATHRERANK_MODEL_PATHr   r   z-(/[^\s]+(?:base_models|trained_models)[^\s]*)r\   rs   )
r   rH   r   r{   r   DEFAULT_EMBEDDING_MODEL_PATHDEFAULT_RERANK_MODEL_PATHresearchgroup)r+   r;   r   r   r   served_model_name
model_path
path_matchs           r   r   z-GpuRuntimeMonitoringService._infer_model_name  s    44T;PQQ 
U\U`U`V
 V
  	%$$$$T955 0''.DEE0{{<((0 {{/000 {{122	0
 {{.// 	  	)
##((k!!$:C@\<]<]^^__ddj  $7=V9W9WXXYY^^YPRVWW
 	2
((++,,11#c###r   rT   c                N    |d         }|d         }|r| d| S | d|d          S )Nr   r   u    · :u    · PID r;   r   )r+   rT   r   r   s       r   r   z0GpuRuntimeMonitoringService._build_version_label1  sI    )$v 	+**D***3375>333r   rx   rw   ry   	list[str]ru   c                B    |dk    s|dk    rdS |s|dk    s|dk    rdS dS )NZ   _   r[   r   busyonliner   )r+   rx   rw   ry   ru   s        r   r   z.GpuRuntimeMonitoringService._derive_gpu_status8  sF      2%%)<)B)B9 	"5"9"9^a=O=O6xr   c                    |d         }|d         dk    pt          |d                   }|du rdS |du r'|d         r|                     |d	                   rd
ndS |rdS dS )Nr   r?   r   r=   Tr   Fr   r   r   rb   r   )r   _started_recently)r+   rT   r   has_gpu_memorys       r   r   z0GpuRuntimeMonitoringService._derive_model_statusE  s    )$ 1A5Sgk>R9S9Sd??9e $ 6 6w|7L M M[99S[[ 	9vr   ,  r   threshold_secondsc                    	 t          j        |          }n# t          $ r Y dS w xY w|j        |                    t
                    }t          j        t
                    |z
                                  |k     S )NF)tzinfo)r   fromisoformat
ValueErrorr  replacer   r   total_seconds)r+   r   r   
started_dts       r   r   z-GpuRuntimeMonitoringService._started_recentlyP  s    	!/
;;JJ 	 	 	55	$#++3+77JS!!J.==??BSSSs    
%%r   
int | Nonebool | Nonec                    |sd S d| dg}|dv r|                     d| d           |D ]B}	 t          j        |d          }|j        dk     r	|j        c S .# t          j        $ r Y ?w xY wdS )	Nzhttp://127.0.0.1:z/health>   r   r   z
/v1/modelsg?)timeouti  F)r   httpxrH   status_code
is_success	HTTPError)r+   r   r   urlsurlresponses         r   r   z1GpuRuntimeMonitoringService._check_service_healthY  s     	41D1112o%%KK<D<<<=== 	 	C 9S#666'#--#.... .?   us   'AA+*A+flag
str | Nonec                    t          j        t          j        |           d          }|                    |          }|sd S |                    d                              d          S )Nz!(?:=|\s+)(\"[^\"]+\"|'[^']+'|\S+)r\   z'")r   compileescaper   r   r   )r+   r   r  patternmatchs        r   r   z/GpuRuntimeMonitoringService._extract_flag_valueh  s^    *")D//[[[\\t$$ 	4{{1~~##E***r   c                @   |                      ddt          |          ddg          }|st                      S |                                }	 t	          j        |d          }|                    t                                                    S # t          $ r |cY S w xY w)Nr   z-pz-ozlstart=z%a %b %d %H:%M:%S %Y)
r   r   r    r   r   strptime
astimezoner   r   r  )r+   r;   r   	raw_valuer   s        r   r   z3GpuRuntimeMonitoringService._get_process_started_ato  s    ""D$C$	#JKK 	">>!LLNN		!*96LMMJ((--77999 	 	 		s   A B BBc                   t          d| d          }	 |                    dd          }n# t          $ r Y d S w xY wd                    d |                    d          D                                                       pd S )	N/proc/z/cmdlineutf-8ignoreencodingerrors c              3     K   | ]}||V  	d S r   r   )r8   parts     r   r_   zDGpuRuntimeMonitoringService._read_process_cmdline.<locals>.<genexpr>  s'      GG$GGGGGGGr    )r   	read_textOSErrorr   r   r   )r+   r;   cmdline_pathcontents       r   rI   z1GpuRuntimeMonitoringService._read_process_cmdlinez  s    2S22233	",,gh,OOGG 	 	 	44	xxGGv)>)>GGGGGMMOOWSWWs   - 
;;known_processesc                   t                      }|}|dk    r||vr||v r|S |                    |           	 t          d| d                              dd          }|                    d          }t          |          dk    r[|d                                                                         }t          |          dk    r|                     |d                   }n# t          $ r Y nw xY w	 |S )	Nr\   r  z/statr  r   r!  )r   )	r   rK   r   r(  r   rm   r   r   r)  )r+   r;   r,  visitedcurrentstat_contentr   fieldss           r   rG   z6GpuRuntimeMonitoringService._find_model_parent_process  s    %%kkgW44/))KK   #$;W$;$;$;<<FF$X  G     %**3//u::??"1X^^--3355F6{{a''"&..";";    
s   B*C# #
C0/C0c                N   t          d| d          }	 |                                }n# t          $ r i cY S w xY wi }|                    d          D ]Q}|rd|vr	|                    dd          \  }}|                    dd          ||                    dd          <   R|S )	Nr  z/environ       =r\   r  r   )r#  )r   
read_bytesr)  r   decode)r+   r;   env_pathrawr   r   r   values           r   r   z-GpuRuntimeMonitoringService._read_process_env  s    ....//	%%''CC 	 	 	III	"$IIg&& 	c 	cD 4t++D!,,JC<ALLYaL<b<bGCJJwxJ8899s   * 99r   list[list[str]]c                X    d t          j        t          |                    D             S )Nc                    g | ]}||S r   r   r7   s     r   r   zAGpuRuntimeMonitoringService._parse_csv_output.<locals>.<listcomp>  s    CCCsCCCCr   )csvreaderr   )r+   r   s     r   r   z-GpuRuntimeMonitoringService._parse_csv_output  s(    CCsz(6*:*:;;CCCCr   r:  r	   c                    	 t          t          |                                                    S # t          t          f$ r Y dS w xY w)Nr   )r   r   r   	TypeErrorr  )r+   r:  s     r   r   z%GpuRuntimeMonitoringService._safe_int  sN    	s5zz''))***:& 	 	 	11	s   -0 AAcommandr
  c                z   	 t          j        |ddd|          }n?# t          t           j        f$ r&}t                              d|           Y d }~d S d }~ww xY w|j        dk    r>|j                                        }|r!t          	                    d|j        |           d S |j
                                        S )NFT)checkcapture_outputtextr
  zGPU runtime command failed: %sr   z#GPU runtime command returned %s: %s)
subprocessrunr)  SubprocessErrorloggerr[   
returncodestderrr   infostdout)r+   rB  r
  resultexcrL  s         r   r   z(GpuRuntimeMonitoringService._run_command  s    	^uTg  FF 34 	 	 	NN;SAAA44444	 !!]((**F ^A6CTV\]]]4}""$$$s    AAAN)r#   )r$   r%   )r   r-   )rB   rV   rC   rV   r   r-   )rO   rV   rQ   rp   r   rV   )rQ   rp   rP   r   r   rV   )r   rV   )r   rp   )r;   r   r   r   r   r-   )r   r   r   r   )r   r   r   r   )
r;   r   r   r   r   r   r   r   r   r   )rT   r-   r   r   )
rx   r   rw   r   ry   r   ru   r   r   r   )r   )r   r   r   r   r   r   )r   r  r   r   r   r  )r   r   r  r   r   r  )r;   r   r   r   )r;   r   r   r  )r;   r   r,  rp   r   r   )r;   r   r   r   )r   r   r   r;  )r:  r	   r   r   )rB  r   r
  r%   r   r  )__name__
__module____qualname__r,   r3   r1   rN   rM   rL   rD   rF   rE   rJ   r   r   r   r   r   r   r   r   r   r   rI   rG   r   r   r   r   r   r   r   r"   r"   (   sB              L L L L&
 
 
 
& & & &P% % % %N   47 7 7 70   &
 
 
 
"O O O O   ($ $ $ $24 4 4 4   	 	 	 	T T T T T   + + + +	 	 	 	X X X X   0   D D D D   % % % % % % %r   r"   z"GpuRuntimeMonitoringService | None_gpu_runtime_servicec                 :    t           t                      a t           S r   )rT  r"   r   r   r   get_gpu_runtime_servicerV    s    #:<<r   )r   r   )r   r"   ) 
__future__r   r>  r   r   rG  r/   r   r   ior   pathlibr   	threadingr   typingr	   r  
app.configr
   common_loggingr   rQ  rJ  r   r   r   r    r"   rT  __annotations__rV  r   r   r   <module>r_     s   " " " " " " " 



 				       " " " " " " " "                          & & & & & & % % % % % %	H		.>I +m;>RR  ) ) ) )V% V% V% V% V% V% V% V%r <@  ? ? ? ?           r   