
     j5                         d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZ ddlZddlmZ ddlmZ ddlmZ erddlmZ  ed          Z G d	 d
          ZdS )ziThis module contains the LangfuseMedia class, which is used to wrap media objects for upload to Langfuse.    N)TYPE_CHECKINGAnyLiteralOptionalTupleTypeVarcast)MediaContentType)langfuse_logger)ParsedMediaReference)LangfuseTc                   $   e Zd ZU dZeed<   ee         ed<   ee         ed<   ee	         ed<   ee	         ed<   dddddddee         d	ee	         d
ee         dee         dee	         f
dZ
de	dee         fdZdee	         fdZedee         fd            Zedee	         fd            Zedee	         fd            Zede	defd            Zde	deee         ee         f         fdZeddddeddded	         dededefd            ZdS ) LangfuseMediaa  A class for wrapping media objects for upload to Langfuse.

    This class handles the preparation and formatting of media content for Langfuse,
    supporting both base64 data URIs and raw content bytes.

    Args:
        obj (Optional[object]): The source object to be wrapped. Can be accessed via the `obj` attribute.
        base64_data_uri (Optional[str]): A base64-encoded data URI containing the media content
            and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ...").
        content_type (Optional[str]): The MIME type of the media content when providing raw bytes.
        content_bytes (Optional[bytes]): Raw bytes of the media content.
        file_path (Optional[str]): The path to the file containing the media content. For relative paths,
            the current working directory is used.

    Raises:
        ValueError: If neither base64_data_uri or the combination of content_bytes
            and content_type is provided.
    obj_content_bytes_content_type_source	_media_idN)r   base64_data_uricontent_typecontent_bytes	file_pathr   r   r   r   c                   || _         |,|                     |          }|\  | _        | _        d| _        n|||| _        || _        d| _        n|\|Zt
          j                            |          r;|                     |          | _        | j        r|nd| _        | j        rdnd| _        n)t          j
        d           d| _        d| _        d| _        |                                 | _        dS )aA  Initialize a LangfuseMedia object.

        Args:
            obj: The object to wrap.

            base64_data_uri: A base64-encoded data URI containing the media content
                and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ...").
            content_type: The MIME type of the media content when providing raw bytes or reading from a file.
            content_bytes: Raw bytes of the media content.
            file_path: The path to the file containing the media content. For relative paths,
                the current working directory is used.
        Nr   bytesfilezbbase64_data_uri, or content_bytes and content_type, or file_path must be provided to LangfuseMedia)r   _parse_base64_data_urir   r   r   ospathexists
_read_fileloggererror_get_media_idr   )selfr   r   r   r   r   parsed_datas          X/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/langfuse/media.py__init__zLangfuseMedia.__init__0   s	   * &55oFFK6A3D!3,DLL&<+C!-D"/D"DLL!(y)) ) #'//)"<"<D151D!N$D%)%8B66dDLLLt   #'D!%DDL++--    returnc                     	 t          |d          5 }|                                cd d d            S # 1 swxY w Y   d S # t          $ r$}t          j        d| |           Y d }~d S d }~ww xY w)NrbzError reading file at path exc_info)openread	Exceptionr"   r#   )r%   r   r   es       r'   r!   zLangfuseMedia._read_filec   s    	i&& #$yy{{# # # # # # # # # # # # # # # # # # 	 	 	LByBBQOOOO44444	s2   A  3A  7A  7A   
A.
A))A.c                     | j         }|d S |                    dd                              dd          }|d d         S )N+-/_   )_content_sha256_hashreplace)r%   content_hashurl_safe_content_hashs      r'   r$   zLangfuseMedia._get_media_idl   sL    04 !- 4 4S# > > F FsC P P$SbS))r)   c                 <    | j         rt          | j                   nd S N)r   lenr%   s    r'   _content_lengthzLangfuseMedia._content_lengthw   s     +/+>Hs4&'''DHr)   c                     | j         d S t          j        | j                                                   }t	          j        |                              d          S )Nzutf-8)r   hashlibsha256digestbase64	b64encodedecode)r%   sha256_hash_bytess     r'   r9   z"LangfuseMedia._content_sha256_hash{   sO    &4#N4+>??FFHH 12299'BBBr)   c                 f    | j         | j        | j        d S d| j          d| j         d| j         dS )N@@@langfuseMedia:type=z|id=z|source=@@@)r   r   r   r@   s    r'   _reference_stringzLangfuseMedia._reference_string   sH    %)=AW4i(:iiiiX\Xdiiiir)   reference_stringc           	         | st          d          t          | t                    st          d          |                     d          st          d          |                     d          st          d          | t          d          d                             d          }|                    d	          }i |D ] }|                    d
d          \  }}||<   !t          fddD                       st          d          t          d         d         t          t          d                             S )a  Parse a media reference string into a ParsedMediaReference.

        Example reference string:
            "@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@"

        Args:
            reference_string: The reference string to parse.

        Returns:
            A TypedDict with the media_id, source, and content_type.

        Raises:
            ValueError: If the reference string is empty or not a string.
            ValueError: If the reference string does not start with "@@@langfuseMedia:type=".
            ValueError: If the reference string does not end with "@@@".
            ValueError: If the reference string is missing required fields.
        zReference string is emptyz Reference string is not a stringrK   z=Reference string does not start with '@@@langfuseMedia:type='rL   z(Reference string does not end with '@@@'z@@@langfuseMedia:N|=   c              3       K   | ]}|v V  	d S r>    ).0keyr&   s     r'   	<genexpr>z7LangfuseMedia.parse_reference_string.<locals>.<genexpr>   s(      JJ#3+%JJJJJJr)   )typeidsourcez+Missing required fields in reference stringrY   rZ   rX   )media_idrZ   r   )
ValueError
isinstancestr
startswithendswithr?   rstripsplitallr   r	   r
   )rN   contentpairspairrV   valuer&   s         @r'   parse_reference_stringz$LangfuseMedia.parse_reference_string   sw   &   	:8999*C00 	A?@@@**+CDD 	O    ((// 	IGHHH"3':#;#;#=#=>EEeLL c"" 	% 	%DC++JC$K JJJJ1IJJJJJ 	LJKKK# &x(.F0CDD
 
 
 	
r)   datac                 0   	 |rt          |t                    st          d          |                    d          st          d          |dd                              dd          \  }}|r|st          d          |                    d          }d	|vrt          d
          |d         }|st          d          t          j        |          t          t          |          fS # t          $ r!}t          j        d|           Y d }~dS d }~ww xY w)NzData URI is not a stringdata:z$Data URI does not start with 'data:'   ,rR   zInvalid URI;rF   zData is not base64 encodedr   zContent type is emptyzError parsing base64 data URIr-   )NN)r]   r^   r\   r_   rb   rF   	b64decoder	   r
   r1   r"   r#   )r%   ri   headeractual_dataheader_partsr   r2   s          r'   r   z$LangfuseMedia._parse_base64_data_uri   sB   	 =z$44 = !;<<<??7++ I !GHHH"&qrr(..a"8"8FK 0 0 /// "<<,,L|++ !=>>> (?L : !8999#K00$7G2V2VVV 	 	 	L81EEEE:::::	s   C'C* *
D4DD
   )	max_depthcontent_fetch_timeout_secondslangfuse_clientr   resolve_withrt   ru   c                     dt           dt          dt           ffdt          t           | d                    S )a  Replace media reference strings in an object with base64 data URIs.

        This method recursively traverses an object (up to max_depth) looking for media reference strings
        in the format "@@@langfuseMedia:...@@@". When found, it (synchronously) fetches the actual media content using
        the provided Langfuse client and replaces the reference string with a base64 data URI.

        If fetching media content fails for a reference string, a warning is logged and the reference
        string is left unchanged.

        Args:
            obj: The object to process. Can be a primitive value, array, or nested object.
                If the object has a __dict__ attribute, a dict will be returned instead of the original object type.
            langfuse_client: Langfuse client instance used to fetch media content.
            resolve_with: The representation of the media content to replace the media reference string with.
                Currently only "base64_data_uri" is supported.
            max_depth: Optional. Default is 10. The maximum depth to traverse the object.

        Returns:
            A deep copy of the input object with all media references replaced with base64 data URIs where possible.
            If the input object has a __dict__ attribute, a dict will be returned instead of the original object type.

        Example:
            obj = {
                "image": "@@@langfuseMedia:type=image/jpeg|id=123|source=bytes@@@",
                "nested": {
                    "pdf": "@@@langfuseMedia:type=application/pdf|id=456|source=bytes@@@"
                }
            }

            result = await LangfuseMedia.resolve_media_references(obj, langfuse_client)

            # Result:
            # {
            #     "image": "data:image/jpeg;base64,/9j/4AAQSkZJRg...",
            #     "nested": {
            #         "pdf": "data:application/pdf;base64,JVBERi0xLjcK..."
            #     }
            # }
        r   depthr*   c                 T   k    r| S t          | t                    rwd}t          j        ||           }t	          |          dk    r| S | }i }j        j        j        nd }|D ]}	 t                              |          }j	        j
                            |d                   }	||                    |	j                  nt          j        |	j                  }
|
                                 t          j        |
j                                                  }d|	j         d| }|||<   # t(          $ r$}t+          j        d| d|            Y d }~d }~ww xY w|                                D ]\  }}|                    ||          }|S t          | t2                    rfd	| D             S t          | t4                    r!fd
|                                 D             S t7          | d          r&fd| j                                        D             S | S )Nz@@@langfuseMedia:.+?@@@r   r[   )timeoutrk   z;base64,z2Error fetching media content for reference string z: c                 .    g | ]} |d z             S rR   rT   )rU   itemry   traverses     r'   
<listcomp>zLLangfuseMedia.resolve_media_references.<locals>.traverse.<locals>.<listcomp>Q  s)    BBBduqy11BBBr)   c                 6    i | ]\  }}| |d z             S r}   rT   rU   rV   rg   ry   r   s      r'   
<dictcomp>zLLangfuseMedia.resolve_media_references.<locals>.traverse.<locals>.<dictcomp>U  s/    VVVJCXXeUQY77VVVr)   __dict__c                 6    i | ]\  }}| |d z             S r}   rT   r   s      r'   r   zLLangfuseMedia.resolve_media_references.<locals>.traverse.<locals>.<dictcomp>Y  s?       "U %33  r)   )r]   r^   refindallr?   
_resourceshttpx_clientr   rh   apimediageturlhttpxraise_for_statusrF   rG   rd   rH   r   r1   r"   warningitemsr:   listdicthasattrr   )r   ry   regexreference_string_matchesresult!reference_string_to_media_contentr   rN   parsed_media_reference
media_datamedia_contentbase64_media_contentr   r2   ref_strmedia_content_strru   rv   rt   r   s    `              r'   r   z8LangfuseMedia.resolve_media_references.<locals>.traverse  s   y  
 #s## 72+-:eS+A+A(/00A55J461 '1= $.;;  )A !! !!$ !1>1U1U,2 2. &5%8%>%B%B2:>& &
  ,7	 ),, *(E -   
 "' *8U" " " & &66888/5/?)10 0 &(( - +j*2I*i*iSg*i*i , ::JKK % ! ! !hQahhefhh   !! 7<<>>H H %#^^G5FGGFF #t$$ CBBBBBcBBBB #t$$ WVVVVV#))++VVVV sJ''     &)l&8&8&:&:   
 Js   +C	D55
E#?EE#r   )r   intr	   r   )r   rv   rw   rt   ru   r   s    ` ``@r'   resolve_media_referencesz&LangfuseMedia.resolve_media_references   st    bM	# M	c M	c M	 M	 M	 M	 M	 M	 M	 M	 M	^ AxxQ''(((r)   )__name__
__module____qualname____doc__object__annotations__r   r   r
   r^   r(   r!   r$   propertyr   rA   r9   rM   staticmethodr   rh   r   r   r   r   r   rT   r)   r'   r   r      s         & 
KKKUO###,----c]}
 !%)-37)-#'1. 1. 1. f1. "#	1.
 /01.  1. C=1. 1. 1. 1.fC HUO    	*x} 	* 	* 	* 	* I# I I I XI Chsm C C C XC j8C= j j j Xj 2
 2
9M 2
 2
 2
 \2
h	x)9 ::	;   @  -/) ) )) $) /0	)
 ) (+) 
) ) ) \) ) )r)   r   )r   rF   rC   r   r   typingr   r   r   r   r   r   r	   r   langfuse.apir
   langfuse.loggerr   r"   langfuse.typesr   langfuse._client.clientr   r   r   rT   r)   r'   <module>r      s   o o   				 				 N N N N N N N N N N N N N N N N N N  ) ) ) ) ) ) 5 5 5 5 5 5 / / / / / / 1000000GCLLK) K) K) K) K) K) K) K) K) K)r)   