o
    [5i=K                     @  s  d Z ddlmZ ddlZddlmZmZ ddlZddlZ	ddl	m
Z
mZ ddl	mZ ddlmZmZ ddlmZ erJdd	l	mZmZmZmZmZmZ eeZe rdd
lmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z, i defdee'fdefde"fde fde$fde!fde)fde#fde%fde%fde&fdefde(fde%fde&fde*fZ-ni Z-	dHdId#d$Z.	dHdJd%d&Z/ej0d'd( Z1									)					dKdLdFdGZdS )Mz%Pipelines running different backends.    )annotationsN)TYPE_CHECKINGAny)
AutoConfigPipeline)pipeline)is_onnxruntime_availableis_transformers_version)
get_logger)BaseImageProcessorFeatureExtractionMixinPretrainedConfigPreTrainedTokenizerPreTrainedTokenizerFastProcessorMixin)ORTModelForAudioClassificationORTModelForCausalLMORTModelForCTCORTModelForFeatureExtractionORTModelForImageClassificationORTModelForImageToImageORTModelForMaskedLMORTModelForQuestionAnsweringORTModelForSemanticSegmentationORTModelForSeq2SeqLM!ORTModelForSequenceClassificationORTModelForSpeechSeq2SeqORTModelForTokenClassificationORTModelForVision2Seq&ORTModelForZeroShotImageClassification)ORTModelzaudio-classificationautomatic-speech-recognitionzfeature-extractionz	fill-maskzimage-classificationzimage-segmentationzimage-to-imagezimage-to-textzquestion-answeringsummarizationztext2text-generationztext-classificationztext-generationztoken-classificationtranslationzzero-shot-classificationzzero-shot-image-classificationtaskstrconfigPretrainedConfig | Nonemodel_id
str | Nonec                 K  s   |  drd} | tvrtd|  dtt  d| dkrW|d u r=|dd|d	d |d
d d}tj|fi |}tdd |j	D rOt|  d }|S t|  d }|S t|  d }|S )Ntranslation_r#   zTask 'z)' is not supported by ONNX Runtime. Only z are supported.r!   trust_remote_codeFrevisiontoken)r+   r,   r-   c                 s  s    | ]}| d V  qdS )ForCTCN)endswith).0arch r2   k/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/onnxruntime/pipelines.py	<genexpr>i   s    z&get_ort_model_class.<locals>.<genexpr>r      )

startswithORT_TASKS_MAPPINGKeyErrorlistkeyspopr   from_pretrainedanyarchitectures)r$   r&   r(   model_kwargs
hub_kwargsort_model_classr2   r2   r3   get_ort_model_classV   s(   



rB   c                 K  s   t | tr4|dd  |dd  |dd  |dd  t||| fi |}|j| fi |}d|fS t | tr?| }d|fS td|  d)N	frameworktorch_dtype_commit_hashmodel_classeszModel z is not supported. Please provide a valid model either as string or ORTModel.
            You can also provide None as the model to use a default one.pt)
isinstancer%   r;   rB   r<   r    	TypeError)modelr&   r$   r?   rA   	ort_modelr2   r2   r3   ort_infer_framework_load_modelt   s   
	

rL   c                  c  s2    t jj} tt j_z
d V  W | t j_d S | t j_w )N)transformers	pipelinesinfer_framework_load_modelrL   )#original_infer_framework_load_modelr2   r2   r3   !patch_pipelines_to_load_ort_model   s   rQ   TrJ   str | ORTModel | Nonestr | PretrainedConfig | None	tokenizer:str | PreTrainedTokenizer | PreTrainedTokenizerFast | Nonefeature_extractor#str | FeatureExtractionMixin | Noneimage_processorstr | BaseImageProcessor | None	processorstr | ProcessorMixin | Noner,   use_fastboolr-   str | bool | Nonedeviceint | str | torch.device | Noner+   bool | Noner?   dict[str, Any] | Nonepipeline_class
Any | Nonekwargsr   returnr   c                 K  s   | dddurtd i }tddr||d< t   td| ||||||||	|
|||d||}W d   |S 1 s>w   Y  |S )	a+  Utility factory method to build a [`Pipeline`] with an ONNX Runtime model, similar to `transformers.pipeline`.

    A pipeline consists of:

        - One or more components for pre-processing model inputs, such as a [tokenizer](tokenizer),
        [image_processor](image_processor), [feature_extractor](feature_extractor), or [processor](processors).
        - A [model](model) that generates predictions from the inputs.
        - Optional post-processing steps to refine the model's output, which can also be handled by processors.

    <Tip>
    While there are such optional arguments as `tokenizer`, `feature_extractor`, `image_processor`, and `processor`,
    they shouldn't be specified all at once. If these components are not provided, `pipeline` will try to load
    required ones automatically. In case you want to provide these components explicitly, please refer to a
    specific pipeline in order to get more details regarding what components are required.
    </Tip>

    Args:
        task (`str`):
            The task defining which pipeline will be returned. Currently accepted tasks are:

            - `"audio-classification"`: will return a [`AudioClassificationPipeline`].
            - `"automatic-speech-recognition"`: will return a [`AutomaticSpeechRecognitionPipeline`].
            - `"depth-estimation"`: will return a [`DepthEstimationPipeline`].
            - `"document-question-answering"`: will return a [`DocumentQuestionAnsweringPipeline`].
            - `"feature-extraction"`: will return a [`FeatureExtractionPipeline`].
            - `"fill-mask"`: will return a [`FillMaskPipeline`]:.
            - `"image-classification"`: will return a [`ImageClassificationPipeline`].
            - `"image-feature-extraction"`: will return an [`ImageFeatureExtractionPipeline`].
            - `"image-segmentation"`: will return a [`ImageSegmentationPipeline`].
            - `"image-text-to-text"`: will return a [`ImageTextToTextPipeline`].
            - `"image-to-image"`: will return a [`ImageToImagePipeline`].
            - `"image-to-text"`: will return a [`ImageToTextPipeline`].
            - `"mask-generation"`: will return a [`MaskGenerationPipeline`].
            - `"object-detection"`: will return a [`ObjectDetectionPipeline`].
            - `"question-answering"`: will return a [`QuestionAnsweringPipeline`].
            - `"summarization"`: will return a [`SummarizationPipeline`].
            - `"table-question-answering"`: will return a [`TableQuestionAnsweringPipeline`].
            - `"text2text-generation"`: will return a [`Text2TextGenerationPipeline`].
            - `"text-classification"` (alias `"sentiment-analysis"` available): will return a
              [`TextClassificationPipeline`].
            - `"text-generation"`: will return a [`TextGenerationPipeline`]:.
            - `"text-to-audio"` (alias `"text-to-speech"` available): will return a [`TextToAudioPipeline`]:.
            - `"token-classification"` (alias `"ner"` available): will return a [`TokenClassificationPipeline`].
            - `"translation"`: will return a [`TranslationPipeline`].
            - `"translation_xx_to_yy"`: will return a [`TranslationPipeline`].
            - `"video-classification"`: will return a [`VideoClassificationPipeline`].
            - `"visual-question-answering"`: will return a [`VisualQuestionAnsweringPipeline`].
            - `"zero-shot-classification"`: will return a [`ZeroShotClassificationPipeline`].
            - `"zero-shot-image-classification"`: will return a [`ZeroShotImageClassificationPipeline`].
            - `"zero-shot-audio-classification"`: will return a [`ZeroShotAudioClassificationPipeline`].
            - `"zero-shot-object-detection"`: will return a [`ZeroShotObjectDetectionPipeline`].

        model (`str` or [`ORTModel`], *optional*):
            The model that will be used by the pipeline to make predictions. This can be a model identifier or an
            actual instance of a ONNX Runtime model inheriting from [`ORTModel`].

            If not provided, the default for the `task` will be loaded.
        config (`str` or [`PretrainedConfig`], *optional*):
            The configuration that will be used by the pipeline to instantiate the model. This can be a model
            identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`].

            If not provided, the default configuration file for the requested model will be used. That means that if
            `model` is given, its default configuration will be used. However, if `model` is not supplied, this
            `task`'s default model's config is used instead.
        tokenizer (`str` or [`PreTrainedTokenizer`], *optional*):
            The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
            identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`].

            If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model`
            is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string).
            However, if `config` is also not given or not a string, then the default tokenizer for the given `task`
            will be loaded.
        feature_extractor (`str` or [`PreTrainedFeatureExtractor`], *optional*):
            The feature extractor that will be used by the pipeline to encode data for the model. This can be a model
            identifier or an actual pretrained feature extractor inheriting from [`PreTrainedFeatureExtractor`].

            Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal
            models. Multi-modal models will also require a tokenizer to be passed.

            If not provided, the default feature extractor for the given `model` will be loaded (if it is a string). If
            `model` is not specified or not a string, then the default feature extractor for `config` is loaded (if it
            is a string). However, if `config` is also not given or not a string, then the default feature extractor
            for the given `task` will be loaded.
        image_processor (`str` or [`BaseImageProcessor`], *optional*):
            The image processor that will be used by the pipeline to preprocess images for the model. This can be a
            model identifier or an actual image processor inheriting from [`BaseImageProcessor`].

            Image processors are used for Vision models and multi-modal models that require image inputs. Multi-modal
            models will also require a tokenizer to be passed.

            If not provided, the default image processor for the given `model` will be loaded (if it is a string). If
            `model` is not specified or not a string, then the default image processor for `config` is loaded (if it is
            a string).
        processor (`str` or [`ProcessorMixin`], *optional*):
            The processor that will be used by the pipeline to preprocess data for the model. This can be a model
            identifier or an actual processor inheriting from [`ProcessorMixin`].

            Processors are used for multi-modal models that require multi-modal inputs, for example, a model that
            requires both text and image inputs.

            If not provided, the default processor for the given `model` will be loaded (if it is a string). If `model`
            is not specified or not a string, then the default processor for `config` is loaded (if it is a string).
        revision (`str`, *optional*, defaults to `"main"`):
            When passing a task name or a string model identifier: The specific model version to use. It can be a
            branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
            artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
        use_fast (`bool`, *optional*, defaults to `True`):
            Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]).
        use_auth_token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        device (`int` or `str` or `torch.device`):
            Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this
            pipeline will be allocated.
        device_map (`str` or `dict[str, Union[int, str, torch.device]`, *optional*):
            Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set
            `device_map="auto"` to compute the most optimized `device_map` automatically (see
            [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload)
            for more information).

            <Tip warning={true}>

            Do not use `device_map` AND `device` at the same time as they will conflict

            </Tip>

        torch_dtype (`str` or `torch.dtype`, *optional*):
            Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model
            (`torch.float16`, `torch.bfloat16`, ... or `"auto"`).
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom code defined on the Hub in their own modeling, configuration,
            tokenization or even pipeline files. This option should only be set to `True` for repositories you trust
            and in which you have read the code, as it will execute code present on the Hub on your local machine.
        model_kwargs (`dict[str, Any]`, *optional*):
            Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
            **model_kwargs)` function.
        kwargs (`dict[str, Any]`, *optional*):
            Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
            corresponding pipeline class for possible values).

    Returns:
        [`Pipeline`]: A suitable pipeline for the task.

    Examples:
    ```python
    >>> from optimum.onnxruntime import pipeline

    >>> # Sentiment analysis pipeline
    >>> analyzer = pipeline("sentiment-analysis")

    >>> # Question answering pipeline, specifying the checkpoint identifier
    >>> oracle = pipeline(
    ...     "question-answering", model="distilbert/distilbert-base-cased-distilled-squad", tokenizer="google-bert/bert-base-cased"
    ... )

    >>> # Named entity recognition pipeline, passing in a specific model and tokenizer
    >>> model = ORTModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
    >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer)
    ```
    acceleratorNzThe `accelerator` argument should not be passed when using `optimum.optimum.onnxruntime.pipelines.pipeline` as ONNX Runtime is the only supported backend. Please remove the `accelerator` argument.z>=z4.46.0rZ   )r$   rJ   r&   rT   rV   rX   r,   r\   r-   r_   r+   r?   rc   r2   )r;   loggerwarningr	   rQ   transformers_pipeline)r$   rJ   r&   rT   rV   rX   rZ   r,   r\   r-   r_   r+   r?   rc   re   version_dependent_kwargspipeline_with_ort_modelr2   r2   r3   r      s@    6

r   )NN)r$   r%   r&   r'   r(   r)   )r&   r'   r$   r)   )NNNNNNNNTNNNNN) r$   r)   rJ   rR   r&   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r,   r)   r\   r]   r-   r^   r_   r`   r+   ra   r?   rb   rc   rd   re   r   rf   r   )2__doc__
__future__r   
contextlibtypingr   r   torchtransformers.pipelinesrM   r   r   r   rj   optimum.utilsr   r	   optimum.utils.loggingr
   r   r   r   r   r   r   __name__rh   optimum.onnxruntimer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   optimum.onnxruntime.modelingr    r7   rB   rL   contextmanagerrQ   r2   r2   r2   r3   <module>   s    
D	

