o
    :/i                     @   s  d Z ddlZddlZddlZddlZddlZzddlZddlmZ ddl	m
Z
mZ ddlmZ W n ey<   eddw ddlZddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ eeZ dej!fddZ"dd Z#edkre#  dS dS )aO  
vLLM gRPC Server

Starts a gRPC server backed by AsyncLLM, using the VllmEngineServicer
from the smg-grpc-servicer package.

Usage:
    python -m vllm.entrypoints.grpc_server --model <model_path>

Example:
    python -m vllm.entrypoints.grpc_server         --model meta-llama/Llama-2-7b-hf         --host 0.0.0.0         --port 50051
    N)
reflection)vllm_engine_pb2vllm_engine_pb2_grpc)VllmEngineServicerzTsmg-grpc-servicer is required for gRPC mode. Install it with: pip install vllm[grpc])AsyncEngineArgs)log_version_and_model)init_logger)UsageContext)FlexibleArgumentParser)AsyncLLM)__version__argsc              	      s$  t tt| j td|  t }t| }|jt	j
d}tj|t	j
| j| jd}t||}tjjg dd}t|| tjjd jtjf}t|| | jpRd}| d| j }	||	 z| I d	H  td
|	 td t ! }
t "   fdd}t#j$t#j%fD ]}|
&|| qz	 ' I d	H  W n t(y   td Y n%w W td |j)ddI d	H  td |*  td td d	S W td |j)ddI d	H  td |*  td td d	S td |j)ddI d	H  td |*  td td w )z\
    Main gRPC serving function.

    Args:
        args: Parsed command line arguments
    zvLLM gRPC server args: %s)usage_context)vllm_configr   enable_log_requestsdisable_log_stats))zgrpc.max_send_message_length)zgrpc.max_receive_message_lengthr   )z1grpc.http2.min_recv_ping_interval_without_data_msi'  )z#grpc.keepalive_permit_without_callsT)options
VllmEngine0.0.0.0:NzvLLM gRPC server started on %sz"Server is ready to accept requestsc                      s   t d    d S )NzReceived shutdown signal)loggerinfoset 
stop_eventr   i/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/entrypoints/grpc_server.pysignal_handlerx   s   
z"serve_grpc.<locals>.signal_handlerzInterrupted by userz!Shutting down vLLM gRPC server...g      @)gracezgRPC server stoppedzAsyncLLM engine stoppedzShutdown complete)+r   r   VLLM_VERSIONmodelr   timer   from_cli_argscreate_engine_configr	   OPENAI_API_SERVERr   from_vllm_configr   r   r   grpcaioserverr    add_VllmEngineServicer_to_serverr   
DESCRIPTORservices_by_name	full_namer   SERVICE_NAMEenable_server_reflectionhostportadd_insecure_portstartasyncioget_running_loopEventsignalSIGTERMSIGINTadd_signal_handlerwaitKeyboardInterruptstopshutdown)r   
start_timeengine_argsr   	async_llmservicerr)   service_namesr0   addressloopr   sigr   r   r   
serve_grpc3   sx   













rG   c               
   C   s   t dd} | jdtddd | jdtdd	d t| } |  }z
tt	| W dS  t
yH } ztd
| td W Y d}~dS d}~ww )z<Main entry point for python -m vllm.entrypoints.grpc_server.zvLLM gRPC Server)descriptionz--hostr   zHost to bind gRPC server to)typedefaulthelpz--porti  zPort to bind gRPC server tozServer failed: %s   N)r
   add_argumentstrintr   add_cli_args
parse_argsuvlooprunrG   	Exceptionr   	exceptionsysexit)parserr   er   r   r   main   s0   
rZ   __main__)$__doc__argparser4   r7   rV   r"   r'   grpc_reflection.v1alphar   smg_grpc_protor   r   smg_grpc_servicer.vllm.servicerr   ImportErrorrR   vllm.engine.arg_utilsr   vllm.entrypoints.utilsr   vllm.loggerr   vllm.usage.usage_libr	   vllm.utils.argparse_utilsr
   vllm.v1.engine.async_llmr   vllm.versionr   r    __name__r   	NamespacerG   rZ   r   r   r   r   <module>   s@   Y
