o
    ~ris                     @  s  d dl mZ d dlmZ d dlmZ ddlmZmZm	Z	m
Z
mZmZmZ er2ddlmZ ddlmZ i ddd	dd
dddddddddddddddddddddddddddddi dddddddddddddd d!d d"d d#d$d%d$d&d$d'd$d(d$d)d$d*d$d+d$d$d$d$d$d$d$d,d-d'd.	Zd/d0 Zd1ad2d3 Z	4dLdMd=d>ZdNdBdCZ	1	1	DdOdPdJdKZd1S )Q    )annotations)deepcopy)TYPE_CHECKING   )ChunkConcatenate"ErnieFuseAndSplitTextVisionExpertsMergeModulelist	TransposeWeightConverterWeightRenamingPreTrainedModel)HfQuantizermixtralminimax
minimax_m2	qwen2_moeafmoedeepseek_v2deepseek_v3dots1ernie4_5_moeglm4_moeglm4_moe_liteglm_moe_dsa	glm4v_moelongcat_flash
solar_open	qwen3_moeqwen3_omni_moeqwen3_omni_moe_thinker
qwen3_nexthunyuan_v1_moe	flex_olmoolmoe
exaone_moe
rt_detr_v2rt_detrpp_doclayout_v2pp_doclayout_v3	paligemmallava
aya_visionfuyugot_ocr2shieldgemma2gemma3internvl
llava_nextqwen2_vlsam3_tracker)	llava_next_videollava_onevisionvipllavavideo_llavamistral3mllama
qwen2_5_vlsam3_tracker_videopp_chart2tablec                   C  sP	  i dt dddt dddgdt dd	dgd
t dddt dddgdt dddt dddgdt dddgdt dddgdt dddgdt dddt dddt d d!dt d"d#dt d$d%dt d&d'dt d(d)dt d*d+dgd,t d-d.gd/t d0d1gd2t d3d4t d5d6gd7t d8d9t d:d;gd<t dddgd=t d>d?dt d@dAdgdBt dCdDt dEdFt dGdHgdIt dJdKtdLdMgdNtdOdPtdQdPgdRtdSgdTtdOdPgdRgdUtdVdWgdXtdOdPtdQdPgdRtdYdZtdOdPgdRgtdXdXtdQd[d\d]gdRtdZdZtdQd[d\d]gdRgt dJdKt d^d_tdLdMgdNtdOdPtdQdPgdRtdSdTtdOdPgdRgtd`dagdbtdOdPtdQdPgdRtdcddtdOdPgdRgt dedft dgdht didjt dkdlt dmdnt dodpt dqdrt dsdtt dudvt dwdxtdydztdOdQd{gdRtd|d}tdOdQd{gdRtd~gddgtdOdPgdRtdgddgtdOdQdgdRtddgddgtdOdQdgdRgt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddgt ddt ddt ddt ddgg t ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddăt ddƃt ddȃt ddʃt dd̃t ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddgt ddt dd̓t dd΃t ddgt ddЃt dd҃tdgdtdOdPgdRtdgdtdOdPgdRgtddgdbtdOdPtdQdPgdRtdddtdOdPgdRgt dddt dddgt dddt dddtdg dtdOdPgdRt dddt dd;dt dddgd} | d  t dddt dddg7  < t d~dtdVdWgdXtdOdPtdQdPgdRtdYdZtdOdPgdRg| d< | dI  | d< | d  t ddg7  < | dU  | d< | d  t ddg7  < tdVdWgdXtdOdPtdQdPgdRtdYdZtdOdPgdRg| d< | d<  | d< | d  | dU  7  < t ddt ddt ddt ddt d dt ddt ddt ddt dd	t d
dt ddt ddt ddt ddt ddt ddt ddt ddt ddt ddt d d!t d"d#t d$d%t d&d't d(d)t d*d+t d,d-t d.d/t d0d1t d2d3g| d4< t	 D ]\}}|| v rq| |  | |< q| S (5  Nr,   zlanguage_model.modellanguage_modelsource_patternstarget_patternszlanguage_model.lm_headlm_headcolpalizvlm(?!\.model)z	vlm.modelemu3ztext_model.model
text_modelztext_model.lm_headpaddleocr_vlmlp_ARzmodel.projectorz1^model(?!(\.visual|\.projector|\.language_model))zmodel.language_modelr4   z((?<!_)model(?!\.(language_model|visual))gemma3n_textz^model.language_modelmodeltimm_wrapperz(^(?!(?:model\.|backbone\.|tower\.))(.+)$ztimm_model.\1pi0
state_projzembed_action_time.state_projaction_in_projz embed_action_time.action_in_projaction_time_mlp_inz$embed_action_time.action_time_mlp_inaction_time_mlp_outz%embed_action_time.action_time_mlp_outz&^paligemma_with_expert.paligemma.modelz	model.vlmz)^paligemma_with_expert.gemma_expert.modelz	model.ditz+^paligemma_with_expert.gemma_expert.lm_headzmodel.dit.embed_tokensz(^paligemma_with_expert.paligemma.lm_headz%model.vlm.language_model.embed_tokensdinov3_convnextz(?<!model\.)stageszmodel.stages
dinov3_vitz(?<!model\.)layer.zmodel.layer.
timesfm2_5ff0fc1ff1fc2olmo_hybridattention_layer_norminput_layernormfeedforward_layer_normpost_attention_layernormqwen3_5_textr5   z'detector_model.vision_encoder.backbone.zvision_encoder.backbone.ztracker_neck.zvision_encoder.neck.t5gemma2_encoderz-(?<!decoder\.)(?<!text_model\.)embed_tokens\.ztext_model.embed_tokens.z5(?<!decoder\.)(?<!text_model\.)(?<!layer)(?<!_)norm\.ztext_model.norm.zA(?<!vision_model.encoder\.)(?<!decoder\.)(?<!text_model\.)layers.ztext_model.layers.r   z.block_sparse_moe.z.mlp.z.experts.*.w1.weightz.experts.*.w3.weightz.experts.gate_up_projr   )dimr   )rA   rB   
operationsz.experts.*.w2.weightz.experts.down_projr   zmlp.experts.*.gate_proj.weightzmlp.experts.*.up_proj.weightzmlp.experts.gate_up_projzmlp.experts.*.down_proj.weightzmlp.experts.down_proj   T)
check_dimsz.gate.weightz.router.weightz feed_forward.experts.*.w1.weightz feed_forward.experts.*.w3.weightz!feed_forward.experts.gate_up_projz feed_forward.experts.*.w2.weightzfeed_forward.experts.down_projvision_modelvision_towerzspatial_linear.0zspatial_linear.fc1zspatial_linear.2zspatial_linear.fc2zspatial_linear.3zspatial_linear.lnztemporal_linear.0ztemporal_linear.fc1ztemporal_linear.2ztemporal_linear.fc2ztemporal_linear.3ztemporal_linear.lnz!(?<!language_model\.)embed_tokenszlanguage_model.embed_tokensz(?<!language_model\.)layerszlanguage_model.layersz(?<!_)(?<!\w)norm\.zlanguage_model.norm.zmlp.gate.weight_1zmlp.vision_moe.gate.weight)dim0dim1zmlp.gate.weightzmlp.text_moe.gate.weightz'mlp.moe_statics.e_score_correction_biasz5mlp.text_moe.gate.moe_statics.e_score_correction_biasz7mlp.vision_moe.gate.moe_statics.e_score_correction_biaszexperts.*.down_proj.weightztext_moe.experts.down_projzvision_moe.experts.down_proj)	stack_dim
concat_dimzexperts.*.gate_proj.weightzexperts.*.up_proj.weightztext_moe.experts.gate_up_projzvision_moe.experts.gate_up_projzbackbone.conv_encoderbackboneout_projo_projzlayers.(\d+).fc1zlayers.\1.mlp.fc1zlayers.(\d+).fc2zlayers.\1.mlp.fc2zbbox_attention.q_linearzbbox_attention.q_projzbbox_attention.k_linearzbbox_attention.k_projzmask_head.lay1zmask_head.conv1.convzmask_head.gn1zmask_head.conv1.normzmask_head.lay2zmask_head.conv2.convzmask_head.gn2zmask_head.conv2.normzmask_head.adapter1z"mask_head.fpn_stages.0.fpn_adapterzmask_head.lay3z"mask_head.fpn_stages.0.refine.convzmask_head.gn3z"mask_head.fpn_stages.0.refine.normzmask_head.adapter2z"mask_head.fpn_stages.1.fpn_adapterzmask_head.lay4z"mask_head.fpn_stages.1.refine.convzmask_head.gn4z"mask_head.fpn_stages.1.refine.normzmask_head.adapter3z"mask_head.fpn_stages.2.fpn_adapterzmask_head.lay5z"mask_head.fpn_stages.2.refine.convzmask_head.gn5z"mask_head.fpn_stages.2.refine.normzmask_head.out_layzmask_head.output_convzencoder.encoder.(\d+).layerszencoder.aifi.\1.layerszself_attn.out_projzself_attn.o_projzencoder_attn.out_projzencoder_attn.o_projz%decoder.layers.(\d+).sa_qcontent_projz*decoder.layers.\1.self_attn.q_content_projz!decoder.layers.(\d+).sa_qpos_projz&decoder.layers.\1.self_attn.q_pos_projz%decoder.layers.(\d+).sa_kcontent_projz*decoder.layers.\1.self_attn.k_content_projz!decoder.layers.(\d+).sa_kpos_projz&decoder.layers.\1.self_attn.k_pos_projzdecoder.layers.(\d+).sa_v_projz"decoder.layers.\1.self_attn.v_projz%decoder.layers.(\d+).ca_qcontent_projz-decoder.layers.\1.encoder_attn.q_content_projz!decoder.layers.(\d+).ca_qpos_projz)decoder.layers.\1.encoder_attn.q_pos_projz%decoder.layers.(\d+).ca_kcontent_projz-decoder.layers.\1.encoder_attn.k_content_projz!decoder.layers.(\d+).ca_kpos_projz)decoder.layers.\1.encoder_attn.k_pos_projzdecoder.layers.(\d+).ca_v_projz%decoder.layers.\1.encoder_attn.v_projz&decoder.layers.(\d+).ca_qpos_sine_projz.decoder.layers.\1.encoder_attn.q_pos_sine_projzlayers.\1.mlp.layers.0zlayers.\1.mlp.layers.1z	backbone.zmodel.zembedding.weightzembeddings.weightzmixer.experts.*.up_proj.weightzmixer.experts.up_projz mixer.experts.*.down_proj.weightzmixer.experts.down_projz'feed_forward.experts.*.gate_proj.weightz%feed_forward.experts.*.up_proj.weightz'feed_forward.experts.*.down_proj.weightzLayerNorm.gammazLayerNorm.weightzLayerNorm.betazLayerNorm.biasemb_lnzembeddings.LayerNormzencoder.layerslayersz
mixer.Wqkv)zself_attn.q_projzself_attn.k_projzself_attn.v_projzmixer.out_projnorm1norm2post_mlp_layernorm)qwen3_vl_moephimoelfm2_moeernie4_5_vl_moedetrr(   conditional_detrdeformable_detrd_fine
nemotron_hjambalegacyjina_embeddings_v3r{   z
.weight_g$z".parametrizations.weight.original0z
.weight_v$z".parametrizations.weight.original1z,mlp.gate.moe_statics.e_score_correction_biasr   r   z).block_sparse_moe.e_score_correction_biasz.mlp.e_score_correction_biasr&   zmlp.e_score_correction_biasz mlp.gate.e_score_correction_biasr   qwen3_5_moe_textzencoder\.pre_encode\.conv\.zencoder.subsampling.layers.zencoder\.pre_encode\.out\.zencoder.subsampling.linear.z7transf_decoder\._embedding\.position_embedding\.pos_enczdecoder.pos_emb.weightz+transf_decoder\._embedding\.token_embeddingzdecoder.embed_tokensz&transf_decoder\._embedding\.layer_normzdecoder.embedding_layernormz*transf_decoder\._decoder\.final_layer_normzdecoder.normz transf_decoder\._decoder\.layerszdecoder.layerszencoder_decoder_proj\.zdecoder.proj.z"encoder\.(.+)\.self_attn\.linear_qzencoder.(.+).self_attn.q_projz"encoder\.(.+)\.self_attn\.linear_kzencoder.(.+).self_attn.k_projz"encoder\.(.+)\.self_attn\.linear_vzencoder.(.+).self_attn.v_projz$encoder\.(.+)\.self_attn\.linear_outzencoder.(.+).self_attn.o_projz$encoder\.(.+)\.self_attn\.linear_posz&encoder.(.+).self_attn.relative_k_projz$encoder\.(.+)\.self_attn\.pos_bias_uzencoder.(.+).self_attn.bias_uz$encoder\.(.+)\.self_attn\.pos_bias_vzencoder.(.+).self_attn.bias_vz\.first_sub_layer\.query_netz.self_attn.q_projz\.first_sub_layer\.key_netz.self_attn.k_projz\.first_sub_layer\.value_netz.self_attn.v_projz!\.first_sub_layer\.out_projectionz.self_attn.o_projz\.second_sub_layer\.query_netz.encoder_attn.q_projz\.second_sub_layer\.key_netz.encoder_attn.k_projz\.second_sub_layer\.value_netz.encoder_attn.v_projz"\.second_sub_layer\.out_projectionz.encoder_attn.o_projz\.third_sub_layer\.dense_inz.mlp.fc1z\.third_sub_layer\.dense_outz.mlp.fc2z\.layer_norm_1\.z.input_layernorm.z\.layer_norm_2\.z.post_attention_layernorm.z\.layer_norm_3\.z.final_layernorm.z\.conv\.batch_normz
.conv.normzlog_softmax\.mlp\.layer0proj_out
cohere_asr)
r   r   r	   r   r
   r   r   copy_MODEL_TO_CONVERSION_PATTERNitems)mapping
model_typebase_pattern r   m/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/transformers/conversion_mapping.py$_build_checkpoint_conversion_mappingU   s  






%



:;<@D
G
MRm
}


5	
 !"#$%&)






	

    l


!
r   Nc                 C  s   t d u rt a tt | S )N)$_checkpoint_conversion_mapping_cacher   r   get)r   r   r   r   !get_checkpoint_conversion_mapping!  s   r   Fr   strr   &list[WeightConverter | WeightRenaming]	overwriteboolreturnNonec                 C  s6   t d u rt a | t v r|std|  d|t | < d S )NzModel type z5 already exists in the checkpoint conversion mapping.)r   r   
ValueError)r   r   r   r   r   r   &register_checkpoint_conversion_mapping(  s
   r   rJ   r   -list[WeightConverter | WeightRenaming] | Nonec                 C  s&   t | jdd }|d urt|}|S d S )Nr   )getattrconfigr   )rJ   r   model_specific_conversionsr   r   r   $extract_weight_conversions_for_model3  s
   r   Tkey_mappingdict[str, str] | Nonehf_quantizerHfQuantizer | None
add_legacyc           	      C  s   ddl m} g }|durdd | D }t }t|  }dur,|| || jj | 	 D ]%}|| urUt
||rU|jj|vrUt|}|durU|| ||jj q0|r_|td |durj||  |S )z
    For a given `model`, obtain the weight conversion mapping if any are registered either as a simple renaming
    `_checkpoint_conversion_mapping` class argument, or in the general WeightConverter mapping.
    r   r   Nc                 S  s   g | ]
\}}t ||d qS )r@   )r   ).0kvr   r   r   
<listcomp>M  s    z0get_model_conversion_mapping.<locals>.<listcomp>r{   )modeling_utilsr   r   setr   extendaddr   r   modules
isinstancer   get_weight_conversions)	rJ   r   r   r   r   weight_conversionsseen_model_typesconversions	submoduler   r   r   get_model_conversion_mapping;  s.   

r   )F)r   r   r   r   r   r   r   r   )rJ   r   r   r   )NNT)
rJ   r   r   r   r   r   r   r   r   r   )
__future__r   r   r   typingr   core_model_loadingr   r   r   r	   r
   r   r   r   r   
quantizersr   r   r   r   r   r   r   r   r   r   r   r   <module>   s   $	
 !"#$%1   L

