
    j>             
       x   d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZmZmZ ddlmZmZ ddlmZmZ dd	lmZ dd
lm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z- ddl+m.Z/ ddl+m0Z0m1Z1m2Z2 ddl+m3Z4 ddl+m5Z6 ddl+m7Z8 ddl+m9Z: ddl+m;Z< ddl+m=Z> ddl?m@Z@mAZAmBZBmCZCmDZD ddlEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZX ddlYmZZZm[Z[ ddl\m]Z] de^de_dee_eedf         f         fdZ` G d deK          Za G d d           Zb G d! d"eeef                   ZcdS )#    N)datetime)BytesIOUnsupportedOperation)Path)AnyCallableDictIterableIteratorListMappingOptionalTupleUnioncast   )
EncryptionPasswordType)
PageObject_VirtualList)index2label)StrByteType
StreamTypeb_deprecate_with_replacementlogger_warningparse_iso8824_dateread_non_whitespaceread_previous_lineread_until_whitespaceskip_over_commentskip_over_whitespace)CatalogAttributes)CatalogDictionary)CheckboxRadioButtonAttributesGoToActionArgumentsUserAccessPermissions)Core)DocumentInformationAttributes)FieldDictionaryAttributes)PageAttributes)PagesAttributes)TrailerKeys)EmptyFileErrorFileNotDecryptedErrorPdfReadErrorPdfStreamErrorWrongPasswordError)ArrayObjectBooleanObjectContentStreamDecodedStreamObjectDestinationDictionaryObjectEncodedStreamObjectFieldFitFloatObjectIndirectObject
NameObject
NullObjectNumberObject	PdfObjectTextStringObject
TreeObjectViewerPreferencesread_object)OutlineTypePagemodeType)XmpInformationdsizereturn.c                     |dk    rt          d          d| z   } | dd          } t          j        d|           d         S )N   zinvalid size in convert_to_ints           iz>qr   )r0   structunpack)rI   rJ   s     W/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/pypdf/_reader.pyconvert_to_intrQ   p   sH    axx;<<<+a/A	"##A=q!!!$$    c                   f   e Zd ZdZddZdedee         fdZedee         fd            Z	edee         fd            Z
edee         fd	            Zedee         fd
            Zedee         fd            Zedee         fd            Zedee         fd            Zedee         fd            Zedee         fd            Zedee         fd            Zedee         fd            Zedee         fd            Zedee         fd            Zedee         fd            ZdS )DocumentInformationa  
    A class representing the basic document metadata provided in a PDF File.
    This class is accessible through
    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.

    All text properties of the document metadata have
    *two* properties, eg. author and author_raw. The non-raw property will
    always return a ``TextStringObject``, making it ideal for a case where
    the metadata is being displayed. The raw property can sometimes return
    a ``ByteStringObject``, if pypdf was unable to decode the string's
    text encoding; this requires additional safety in the caller and
    therefore is not as commonly accessed.
    rK   Nc                 .    t          j        |            d S N)r8   __init__selfs    rP   rW   zDocumentInformation.__init__   s    !$'''''rR   keyc                 `    |                      |d           }t          |t                    r|S d S rV   )get
isinstancerB   )rY   rZ   retvals      rP   	_get_textzDocumentInformation._get_text   s2    #t$$f.// 	MtrR   c                     |                      t          j                  rP|                     t          j                  p0|                      t          j                                                  ndS )z
        Read-only property accessing the document's title.

        Returns a ``TextStringObject`` or ``None`` if the title is not
        specified.
        N)r\   DITITLEr_   
get_objectrX   s    rP   titlezDocumentInformation.title   sR     xx!!DNN28$$G(:(:(E(E(G(G	
rR   c                 @    |                      t          j                  S )z>The "raw" version of title; can return a ``ByteStringObject``.)r\   ra   rb   rX   s    rP   	title_rawzDocumentInformation.title_raw   s     xx!!!rR   c                 @    |                      t          j                  S )z
        Read-only property accessing the document's author.

        Returns a ``TextStringObject`` or ``None`` if the author is not
        specified.
        )r_   ra   AUTHORrX   s    rP   authorzDocumentInformation.author   s     ~~bi(((rR   c                 @    |                      t          j                  S )z?The "raw" version of author; can return a ``ByteStringObject``.)r\   ra   rh   rX   s    rP   
author_rawzDocumentInformation.author_raw   s     xx	"""rR   c                 @    |                      t          j                  S )z
        Read-only property accessing the document's subject.

        Returns a ``TextStringObject`` or ``None`` if the subject is not
        specified.
        )r_   ra   SUBJECTrX   s    rP   subjectzDocumentInformation.subject   s     ~~bj)))rR   c                 @    |                      t          j                  S )z@The "raw" version of subject; can return a ``ByteStringObject``.)r\   ra   rm   rX   s    rP   subject_rawzDocumentInformation.subject_raw        xx
###rR   c                 @    |                      t          j                  S )ac  
        Read-only property accessing the document's creator.

        If the document was converted to PDF from another format, this is the
        name of the application (e.g. OpenOffice) that created the original
        document from which it was converted. Returns a ``TextStringObject`` or
        ``None`` if the creator is not specified.
        )r_   ra   CREATORrX   s    rP   creatorzDocumentInformation.creator   s     ~~bj)))rR   c                 @    |                      t          j                  S )z@The "raw" version of creator; can return a ``ByteStringObject``.)r\   ra   rs   rX   s    rP   creator_rawzDocumentInformation.creator_raw   rq   rR   c                 @    |                      t          j                  S )aG  
        Read-only property accessing the document's producer.

        If the document was converted to PDF from another format, this is the
        name of the application (for example, OSX Quartz) that converted it to
        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
        specified.
        )r_   ra   PRODUCERrX   s    rP   producerzDocumentInformation.producer   s     ~~bk***rR   c                 @    |                      t          j                  S )zAThe "raw" version of producer; can return a ``ByteStringObject``.)r\   ra   rx   rX   s    rP   producer_rawz DocumentInformation.producer_raw   s     xx$$$rR   c                 Z    t          |                     t          j                            S )z:Read-only property accessing the document's creation date.)r   r_   ra   CREATION_DATErX   s    rP   creation_datez!DocumentInformation.creation_date   s!     "$..1A"B"BCCCrR   c                 @    |                      t          j                  S )z
        The "raw" version of creation date; can return a ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        )r\   ra   r}   rX   s    rP   creation_date_rawz%DocumentInformation.creation_date_raw   s     xx()))rR   c                 Z    t          |                     t          j                            S )z
        Read-only property accessing the document's modification date.

        The date and time the document was most recently modified.
        )r   r_   ra   MOD_DATErX   s    rP   modification_datez%DocumentInformation.modification_date   s      "$.."="=>>>rR   c                 @    |                      t          j                  S )z
        The "raw" version of modification date; can return a
        ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        )r\   ra   r   rX   s    rP   modification_date_rawz)DocumentInformation.modification_date_raw   s     xx$$$rR   )rK   N)__name__
__module____qualname____doc__rW   strr   r_   propertyrd   rf   ri   rk   rn   rp   rt   rv   ry   r{   r   r~   r   r   r    rR   rP   rT   rT   x   s        ( ( ( (S Xc]     
x} 
 
 
 X
 "8C= " " " X" ) ) ) ) X) #HSM # # # X# *# * * * X* $Xc] $ $ $ X$ 	*# 	* 	* 	* X	* $Xc] $ $ $ X$ 	+(3- 	+ 	+ 	+ X	+ %hsm % % % X% Dx1 D D D XD *8C= * * * X* ?8H#5 ? ? ? X? %x} % % % X% % %rR   rT   c                   	   e Zd ZdZedee         fd            Z	 	 dedee	e
f         dededeef         ddfd	Z	 	 dfd
edee         f         dedee         f         deeef         fdZedefd            Zedee         fd            Zedee         fd            ZdefdZdedefdZedeeef         fd            Z	 	 	 dgdee         deeeef                  dee         deeeef                  fdZdedefdZ deeef         deeef         dededdf
dZ!deeef         dededdfdZ"dedededdfdZ#dhdedeeef         fd Z$dee%e&e'f         de(e         fd!Z)	 	 dfdeedf         dee         deeef         fd"Z*ede+fd#            Z,	 dfd$ee         d%ee         de+fd&Z-edee.         fd'            Z/d(edee0e'f         dee         fd)Z1d*edee         fd+Z2d,e3dee         fd-Z4d.ed/ee(ee5e'de0ef                           de3fd0Z6d$edee3         fd1Z7ede(e         fd2            Z8ede(e         fd3            Z9edee         fd4            Z:edee;         fd5            Z<	 	 	 dgd6edeef         d7eeeef                  d(ee'         ddfd8Z=d(e'deee&ef         fd9Z>d:ed;edee&         fd<Z?d(eee'f         dee&         fd=Z@deAdeBeef         fd>ZCd?ed@edee&         fdAZDd?ed@edBee&         dee&         fdCZEdeAddfdDZFdeAddfdEZGdeAddfdFZHdeAdefdGZIdeAddfdHZJdeAdIee         dJeddfdKZKdeAdee         fdLZLdeAdIedee         fdMZMdeAdeeNeOePf         fdNZQeRdeAdIedefdO            ZSdeAddfdPZTdQe(e         dReUegeeeBedSf         f         f         dTeUeeeeBedSf         f         gef         ddfdUZVd/e(e         deeBeef                  fdVZWdeeef         deXfdWZYdXedeeef         fdYZZedee[         fdZ            Z\edefd[            Z]edeeeef                  fd\            Z^d]edee         fd^Z_d]edee         fd_Z`edeaee(e         f         fd`            Zbde(e         fdaZcd]ede(e         fdbZd	 didcee         deeeee(e         f         f         fddZedS )j	PdfReadera  
    Initialize a PdfReader object.

    This operation can take some time, as the PDF stream's cross-reference
    tables are read into memory.

    Args:
        stream: A File object or an object that supports the standard read
            and seek methods similar to a File object. Could also be a
            string representing a path to a PDF file.
        strict: Determines whether user should be warned of all
            problems and also causes some correctable problems to be fatal.
            Defaults to ``False``.
        password: Decrypt PDF file at initialization. If the
            password is None, the file will not be decrypted.
            Defaults to ``None``
    rK   c                     t          t          | j        d                                       t          j        d          }|dS |                                }t          |t                    st          |          }|S )zCReturns the existing ViewerPreferences as an overloaded dictionary./RootN)	r   r8   trailerr\   CDVIEWER_PREFERENCESrc   r]   rD   )rY   os     rP   viewer_preferenceszPdfReader.viewer_preferences  sq     !4<#899==!4
 
 94LLNN!.// 	%!!$$ArR   FNstreamstrictpasswordc                    || _         d | _        i | _        d| _        d | _        t          |d          rd|j        vrt          dt                     t          |t          t          f          rIt          |d          5 }t          |                                          }d d d            n# 1 swxY w Y   |                     |           || _        d| _        d | _        | j        rd| _        | j                            t*          j                  }|r|d                                         j        nd}t3          t4          | j        t*          j                                                           }t9          j        ||          | _        ||nd}| j                            |          t<          j        k    r|tA          d	          d| _        d S |tC          d
          d S )Nr   modebzQPdfReader stream/file object is not in binary mode. It may not be read correctly.rbFTrR   zWrong passwordNot encrypted file)"r   flattened_pagesresolved_objects
xref_index_page_id2numhasattrr   r   r   r]   r   r   openr   readr   _override_encryption_encryptionis_encryptedr   r\   TKIDrc   original_bytesr   r8   ENCRYPTr   verifyr   NOT_DECRYPTEDr2   r0   )	rY   r   r   r   fhid_entry	id1_entryencrypt_entrypwds	            rP   rW   zPdfReader.__init__'  s     ;?LN  	 66"" 	s&+'='=0  
 fsDk** 	,fd## ,r ++, , , , , , , , , , , , , , ,		&$)!15 	5(,D% |''..HCKT..00??QTI  $,rz":"E"E"G"G M  *}iHHD '2((C '',,0JJJ( ))9:::(-D%%%!3444 "!s   >"B,,B03B0includeexcludec                     | j                             d           | j                                         }d|i} fd|                                D             } fd|                                D             }|S )z
        Integration into Jupyter Notebooks.

        This method returns a dictionary that maps a mime-type to it's
        representation.

        See https://ipython.readthedocs.io/en/stable/config/integrating.html
        r   zapplication/pdfNc                 $    i | ]\  }}|v 	||S r   r   ).0kvr   s      rP   
<dictcomp>z/PdfReader._repr_mimebundle_.<locals>.<dictcomp>p  s$    BBBTQQ'\\Aq\\\rR   c                 $    i | ]\  }}|v	||S r   r   )r   r   r   r   s      rP   r   z/PdfReader._repr_mimebundle_.<locals>.<dictcomp>t  s)    FFFTQQg5E5EAq5E5E5ErR   )r   seekr   items)rY   r   r   pdf_datadatas    ``  rP   _repr_mimebundle_zPdfReader._repr_mimebundle_[  s     	;##%%x
 BBBBTZZ\\BBBDFFFFTZZ\\FFFDrR   c                     | j                                         }| j                             dd           | j                             d                              dd          }| j                             |d           |S )z
        The first 8 bytes of the file.

        This is typically something like ``'%PDF-1.6'`` and can be used to
        detect if the file is actually a PDF file and which version it is.
        r   rM   zutf-8backslashreplace)r   tellr   r   decode)rY   locpdf_file_versions      rP   
pdf_headerzPdfReader.pdf_headerx  st     k  A;++A..55g?QRRa   rR   c                     t           j        | j        vrdS | j        t           j                 }t                      }t	          |t          d                    rt          d          |                    |           |S )a  
        Retrieve the PDF file's document information dictionary, if it exists.

        Note that some PDF files use metadata streams instead of docinfo
        dictionaries, and these metadata streams will not be accessed by this
        function.
        NzEtrailer not found or does not point to document information directory)r   INFOr   rT   r]   typer0   update)rY   objr^   s      rP   metadatazPdfReader.metadata  sw     7$,&&4l27#$&&c4::&& 	W   	crR   c                 p    	 d| _         | j        t          j                 j        d| _         S # d| _         w xY w)z(XMP (Extensible Metadata Platform) data.TF)r   r   r   ROOTxmp_metadatarX   s    rP   r   zPdfReader.xmp_metadata  s=    	.(,D%<(5(-D%%D%----s   ", 	5c                     | j         r#| j        t          j                 d         d         S | j        |                                  t          | j                  S )a   
        Calculate the number of pages in this PDF file.

        Returns:
            The number of pages of the parsed PDF file

        Raises:
            PdfReadError: if file is encrypted and restrictions prevent
                this action.
        /Pages/Count)r   r   r   r   r   _flattenlenrX   s    rP   _get_num_pageszPdfReader._get_num_pages  sO      	-<(28<<#+t+,,,rR   page_numberc                 t    | j         |                                  | j         
J d            | j         |         S )z
        Retrieve a page by number from this PDF file.

        Args:
            page_number: The page number to retrieve
                (pages begin at zero)

        Returns:
            A :class:`PageObject<pypdf._page.PageObject>` instance.
        Nhint for mypy)r   r   )rY   r   s     rP   	_get_pagezPdfReader._get_page  s?     'MMOOO#/////#K00rR   c                 *    |                                  S )zu
        A read-only dictionary which maps names to
        :class:`Destinations<pypdf.generic.Destination>`
        )_get_named_destinationsrX   s    rP   named_destinationszPdfReader.named_destinations  s     ++---rR   treer^   fileobjc                 r   t          j                    }|                    t          j                               |mi }t	          t
          | j        t          j                           }t          j
        |v r1t	          t          t                   |t          j
                           }ndS ||S |                     |||           |D ] }||v r|                     ||||            n!d|v rLt	          t          |d                   }|D ].}|                                }	|                     |	|||           /|S )aX  
        Extract field data if this PDF contains interactive form fields.

        The *tree* and *retval* parameters are for recursive use.

        Args:
            tree:
            retval:
            fileobj: A file object (usually a text file) to write
                a report to on all interactive form fields found.

        Returns:
            A dictionary where each key is a field name, and each
            value is a :class:`Field<pypdf.generic.Field>` object. By
            default, the mapping name is used for keys.
            ``None`` if form data could not be located.
        N/Fields)FAattributes_dictr   r%   r   r8   r   r   r   r   	ACRO_FORMr   rC   _check_kids_build_fieldr3   rc   )
rY   r   r^   r   field_attributescatalogattrfieldsffields
             rP   
get_fieldszPdfReader.get_fields  sC   . -// = M O OPPP>F+T\"'-BCCG|w&&HZ0'",2GHHt<Mvw///$ 	 	Dt||!!$9IJJJ 
 +tI77F L L!!%:JKKKKrR   parentc                    d|v rt          t          |d                   S d|v rM|                     t          t          |d                             dz   t          t          |d                   z   S t          t          |d                   S )N/TM/Parent./T)r   r   _get_qualified_field_namer8   )rY   r   s     rP   r   z#PdfReader._get_qualified_field_name  s    F??VE]+++&  ..)6)+<==   sF4L))	* VD\***rR   r   r   c                    |                      |||           	 t          t          |d                   }nz# t          $ rm 	 d|v r2|                     t          t
          |d                             dz   }nd}|t          t          |d                   z  }n# t          $ r Y Y d S w xY wY nw xY w|r,|                     |||           |                    d           t          |          ||<   ||         j	        
                                }|                    t          j        d          dk    r5|t          t          j                           ||         t          d          <   |                    t          j        d          d	k    rd
|v rt!          t#          |d
         d                                                             ||         t          d          <   d||         d         vr=||         t          d                                       t          d                     d S d S |                    t          j        d          d	k    ry|                    t          j        d          t          j        j        z  dk    rDg }t!          |          ||         t          d          <   |                    t          j        i           D ]}|
                                }t#          |d
         d                                                   D ]}	|	|vr|                    |	           t!          |          ||         t          d          <   |                    t          j        d          t          j        j        z  dk    rDd||         d         v r6||         d         ||         d                             d          = d S d S d S d S d S )Nr   r   r    r   
/Chz	/_States_/Btnz/AP/Nz/Offr   )r   r   r   KeyErrorr   r8   _write_fieldwriter:   indirect_referencerc   r\   r   FTr>   Optr3   listkeysappendFfFfBitsRadioKidsNoToggleToOffindex)
rY   r   r^   r   r   rZ   r   statesr   ss
             rP   r   zPdfReader._build_field  s    	000	sE%L))CC 	 	 	%%66 !153CDD   C CtCt---    	  	 gu.>???MM$EllsSk,77997725"&&36z"&7I7I3JF3K
;//07725"''ESLL3>SZ%**,,--4 4F3K
;//0 VC[555sJ{334;;Jv<N<NOOOOO 65WWRUB6))cggbeQ.?.?")/.QUV.V.V "F3>v3F3FF3K
;//0WWRWb)) K KLLNNahtn113344 ) )Aa(((7B67J7JsJ{3344q!!BI$;;q@@fSk+6663K,VC[-E-K-KF-S-STTT *).V.V A@66s4   5 
B, ABB,
B&!B,%B&&B,+B,c                     t           j        |v r>|t           j                 D ]-}|                     |                                ||           ,d S d S rV   )PAKIDSr   rc   )rY   r   r^   r   kids        rP   r   zPdfReader._check_kidsK  s\     7d??BG} C C 0 0&'BBBB ?C CrR   c                    t          j                    }|t          j                    z   }|D ]}|t           j        t           j        fv r||         }	 |t           j        k    r9ddddd}||         |v r'|                    | d|||                   d           n|t           j        k    r^	 ||         t           j                 }n(# t          $ r ||         t           j
                 }Y nw xY w|                    | d| d           n!|                    | d||          d           # t          $ r Y w xY wd S )NButtonTextChoice	Signature)r   /Txr   z/Sig: r   )r   
attributesr%   r  AAr   r   ParentTMr   T)	rY   r   r   r   field_attributes_tupler   	attr_nametypesnames	            rP   r   zPdfReader._write_fieldS  s   !#"%B%M%O%OO 	 + 	 	D   (.I25== !)%' +	 E T{e++&L&LeE$K6H&L&L&LMMMRY&&1$T{251# 1 1 1$T{2401MMY":":$":":":;;;;MMY"A"A%+"A"A"ABBB   7	 	s8   AD--CD-"C+(D-*C++A D--
D;:D;full_qualified_namec           
         dt           dt          t          t          f         dt           fd}|                                 }|i S i }|                                D ]t\  }}|                    d          dk    rV|r|                    d          ||<   9|                    d          | |t          t           |d	                   |          <   u|S )
a  
        Retrieve form fields from the document with textual data.

        Args:
            full_qualified_name: to get full name

        Returns:
            A dictionary. The key is the name of the form field,
            the value is the content of the field.

            If the document contains multiple form fields with the same name, the
            second and following will get the suffix .2, .3, ...
        r   r   rK   c                 r      |vr S  dz   t          t           fd|D                       dz             z   S )Nr   c                 D    g | ]}|                     d z             dS )r   r   )
startswith)r   kkr   s     rP   
<listcomp>zGPdfReader.get_form_text_fields.<locals>.indexed_key.<locals>.<listcomp>  s.    LLLRR]]1s75K5KLqLLLrR      )r   sum)r   r   s   ` rP   indexed_keyz3PdfReader.get_form_text_fields.<locals>.indexed_key  sW     #LLLL6LLLMMPQQRRSrR   N/FTr  z/Vr   )r   r	   r   r   r   r\   r   )rY   r   r)  
formfieldsffr   values          rP   get_form_text_fieldszPdfReader.get_form_text_fieldsx  s    	3 	S#X 	3 	 	 	 	 __&&
I&,,.. 	R 	RLE5yy5((& R %		$BuIIBG))D//B{{4U4[#9#92>>?	rR   c                     dt           dt          dt          ffd	 t          t           j                                                  n"# t          $ r}t          d          |d}~ww xY w d          t          d          g }                    d	d
          dk    r4dv rd                                         g}nfd j	        D             }n                    dd          }|D ]m                                                    d	d
          dk    r=dvr9dv r|d                                         gz  }W|fd j	        D             z  }n fd|D             S )a  
        Provides list of pages where the field is called.

        Args:
            field: Field Object, PdfObject or IndirectObject referencing a Field

        Returns:
            List of pages:
                - Empty list:
                    The field has no widgets attached
                    (either hidden field or ancestor field).
                - Single page list:
                    Page where the widget is present
                    (most common).
                - Multi-page list:
                    Field with multiple kids widgets
                    (example: radio buttons, field repeated on multiple pages).
        r   rZ   rK   c                     || v r| |         S d| v r7 t          t          | d                                                   |          S d S )Nr   )r   r8   rc   )r   rZ   _get_inheriteds     rP   r1  z9PdfReader.get_pages_showing_field.<locals>._get_inherited  s[    czz3xc!!%~)3y>+D+D+F+FGG   trR   zfield type is invalidNr*  zfield is not validz/Subtyper   z/Widgetz/Pc                 N    g | ]!}j         |                    d d          v |"S z/Annotsr   r   r\   )r   pr   s     rP   r&  z5PdfReader.get_pages_showing_field.<locals>.<listcomp>  s?       /155B3G3GGG GGGrR   /Kidsr   r   c                 N    g | ]!}j         |                    d d          v |"S r3  r4  )r   r5  r   s     rP   r&  z5PdfReader.get_pages_showing_field.<locals>.<listcomp>  s?           ! 3quuY7K7KKK KKKrR   c                     g | ]>}t          |t                    r|n$j                            |j                           ?S r   )r]   r   pages_get_page_number_by_indirectr   )r   xrY   s     rP   r&  z5PdfReader.get_pages_showing_field.<locals>.<listcomp>  sZ     
 
 
  !Z((WAA*T>>q?STTU
 
 
rR   )
r8   r   r   r   r   rc   	Exception
ValueErrorr\   r9  )rY   r   excretkidsr1  r   s   ``   @@rP   get_pages_showing_fieldz!PdfReader.get_pages_showing_field  s   ,	 0 	s 	s 	 	 	 	 	 		?)5+C+N+N+P+PQQEE 	? 	? 	?4553>	?>%''/122299Z$$	11u}}T{--//0   !Z   99Wb))D  LLNNEE*b))Y66T]]qyy$ 2 2 4 455        %)Z      

 
 
 
 	
 
 
 	
s   ,A 
A-A((A-c                    |i }t          t          | j        t          j                           }t
          j        |v r&t          t          |t
          j                           }nft
          j        |v rXt          t          |t
          j                           }t
          j        |v r%t          t          |t
          j                           }||S t          j
        |v rRt          t          |t          j
                           D ]*}|                     |                                |           +nt
          j        |v rt          t          |t
          j                           }d}|t          |          k     rt          t          ||                                                   }|dz  }t!          |t                    s[	 ||                                         }n# t"          $ r Y n\w xY w|dz  }t!          |t                    rd|v r	|d         }n|                     ||          }	|	|	||<   |t          |          k     n|                                D ]k\  }
}|                                }t!          |t                    r d|v r|d                                         }nN|                     |
|          }	|	|	||
<   l|S )z
        Retrieve the named destinations present in the document.

        Args:
            tree:
            retval:

        Returns:
            A dictionary which maps names to
            :class:`Destinations<pypdf.generic.Destination>`.
        Nr   r   /D)r   r8   r   r   r   CADESTSrC   NAMESr  r  r3   r   rc   r   r   r]   
IndexError_build_destinationr   )rY   r   r^   r   namesr  irZ   r-  destk__v__vals                rP   r   z!PdfReader._get_named_destinations  s     >F+T\"'-BCCG x7""J(9::W$$-wrx/@AA8u$$
E"(O<<D<M7d??Kbg77 G G,,S^^-=-=vFFFFG X)4>::EAc%jj..3a 3 3 5 566Q!#s++ !!H//11EE!   EQe%566 !u}} %d ..sE::#"&F3K# c%jj..& !JJLL 	' 	'Snn&&c#344 !s{{!$i2244 ..sC88#"&F3Ks   ?G 
G'&G'c                 *    |                                  S )z
        Read-only property for the outline present in the document.

        (i.e., a collection of 'outline items' which are also known as
        'bookmarks')
        )_get_outlinerX   s    rP   outlinezPdfReader.outline*  s       """rR   noderQ  c                    |g }t          t          | j        t          j                           }t
          j        |v r]t          t          |t
          j                           }t          |t                    r|S |d|v rt          t          |d                   }| 	                                | _
        ||S 	 |                     |          }|r|                    |           d|v rHg }|                     t          t          |d                   |           |r|                    |           d|vrnt          t          |d                   }|S )N/FirstTz/Next)r   r8   r   r   r   COOUTLINESr]   r?   r   _namedDests_build_outline_itemr  rP  )rY   rR  rQ  r   linesoutline_objsub_outlines          rP   rP  zPdfReader._get_outline4  sY    ?G+T\"'-BCCG {g%%-wr{/CDDeZ00 #"N $U):): 0%/BBD#;;==D<N	922488K ,{+++ 4)+!!$'7h"H"H+VVV 0NN;///d""($w-88D	9  rR   c                     t          t          | j        t          j                           }t
          j        |v r t          d|t
          j                           S dS )u   
        Read-only property for the list of threads.

        See §8.3.2 from PDF 1.7 spec.

        It's an array of dictionaries with "/F" and "/I" properties or
        None if there are no articles.
        r3   N)r   r8   r   r   r   rU  THREADS)rY   r   s     rP   threadszPdfReader.threads]  sF     'bg)>??:  wrz':;;;4rR   r   c                 "   | j         #d t          | j                  D             | _         |t          |t                    rdS t          |t
                    r|}n|j        }| j         
J d            | j                             |d          }|S )z
        Generate _page_id2num.

        Args:
            indirect_reference:

        Returns:
            The page number or None
        Nc                 .    i | ]\  }}|j         j        |S r   )r   idnum)r   rJ  r;  s      rP   r   z:PdfReader._get_page_number_by_indirect.<locals>.<dictcomp>z  s1     ! ! !26!Q$*A! ! !rR   r   )r   	enumerater9  r]   r?   intra  r\   )rY   r   ra  r?  s       rP   r:  z&PdfReader._get_page_number_by_indirectm  s     $! !:CDJ:O:O! ! !D %4F
)S)S%4(#.. 	-&EE&,E ,,o,,,##E400
rR   pagec                 6    |                      |j                  S )a  
        Retrieve page number of a given PageObject.

        Args:
            page: The page to get page number. Should be
                an instance of :class:`PageObject<pypdf._page.PageObject>`

        Returns:
            The page number or None if page is not found
        )r:  r   )rY   rd  s     rP   get_page_numberzPdfReader.get_page_number  s     001HIIIrR   destinationc                 6    |                      |j                  S )z
        Retrieve page number of a given Destination object.

        Args:
            destination: The destination to get page number.

        Returns:
            The page number or None if page is not found
        )r:  rd  )rY   rg  s     rP   get_destination_page_numberz%PdfReader.get_destination_page_number  s     001ABBBrR   rd   arrayc                 Z   d\  }}t          |t          t          f          s*t          |t                    rt	          |          dk    s|0t                      }t          ||t          j                              S |dd         \  }}|dd          }	 t          ||t          ||                    S # t          $ rl t          d| d| t                     | j        r | j        d         j        }|t                      n|}t          ||t          j                              cY S w xY w)NNNr   r'  )fit_typefit_argszUnknown destination:  )r]   r?   r   r3   r   r7   r;   fitr0   r   r   r   r9  r   )rY   rd   rj  rd  typtmpr   s          rP   rH  zPdfReader._build_destination  s>    	c uz3/00	I5+..	I36u::??}<<DudCGII666ac
ID#!""IE	I"5$S50Q0Q0QRRR I I IFuFFuFFQQQ; jm658[Z\\\c""5*<cgiiHHHHHIs   B4 4A3D*)D*c                    d\  }}}	 t          d|d                   }n+# t          $ r | j        rt          d|          d}Y nw xY wd|v rYt          t          |d                   }t          t
          |t          j                           }|dk    r|t          j                 }n-d|v r)|d         }t          |t                    rd	|v r|d	         }t          |t                    r|                     ||          }nt          |t                    rN	 |                     || j        |         j                  }n# t          $ r |                     |d           }Y new xY w||                     ||          }nH| j        rt          d
|          t          d|dt                      |                     |d           }|rd|v r/t          d |d         D                       |t          d          <   d|v r|d         |t          d          <   d|v r|d         |t          d          <   t#          |                    dd          dk              |t          d          <   ||_        	 |j        |_        n# t*          $ r Y nw xY w|S )NNNNr   z/Titlez(Outline Entry Missing /Title attribute: r   z/Az/GoToz/DestrC  zUnexpected destination zRemoved unexpected destination z from destinationz/Cc              3   4   K   | ]}t          |          V  d S rV   )r<   )r   cs     rP   	<genexpr>z0PdfReader._build_outline_item.<locals>.<genexpr>  s)      <`<`PQ[^^<`<`<`<`<`<`rR   /Fr   r   z
/%is_open%)r   r   r   r0   r8   r>   r&   SDr]   r3   rH  r   rW  
dest_arrayr   r   r4   r\   rR  r   AttributeError)rY   rR  rK  rd   outline_itemactionaction_types          rP   rX  zPdfReader._build_outline_item  s   $4!e\	X//EE 	 	 	{ X"#Vd#V#VWWWEEE	
 4<<*DJ77Fz62E2G+HIIKg%%134__=D$ 011 "ddllDzdK(( 	@225$??LLc"" 	@D#664+D1<     D D D#66udCCD \  225$??LL{ "#ET#E#EFFFOdOOO    225$??L  	t||1<<`<`UYZ^U_<`<`<`1`1`Z--.t|| 26dZ--.4 6:(^Z1125B1%%*6 6LL112 !	.2.EL++ 	 	 	D	s3    %AA&D= = E E 0I= =
J
	J
c                 6    t          | j        | j                  S )zbRead-only property that emulates a list of :py:class:`PageObject<pypdf._page.PageObject>` objects.)r   r   r   rX   s    rP   r9  zPdfReader.pages  s     D/@@@rR   c                 ^      fdt          t           j                            D             S )z
        A list of labels for the pages in this document.

        This property is read-only. The labels are in the order that the pages
        appear in the document.
        c                 0    g | ]}t          |          S r   )page_index2page_label)r   rJ  rY   s     rP   r&  z)PdfReader.page_labels.<locals>.<listcomp>  s$    OOO1%dA..OOOrR   )ranger   r9  rX   s   `rP   page_labelszPdfReader.page_labels  s0     POOOc$*oo8N8NOOOOrR   c                     t          t          | j        t          j                           }t
          j        |v r%t          t          |t
          j                           S dS )a  
        Get the page layout currently being used.

        .. list-table:: Valid ``layout`` values
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right
        N)r   r8   r   r   r   r   PAGE_LAYOUTr>   )rY   r   s     rP   page_layoutzPdfReader.page_layout  sF    . 'bg)>??>W$$
GBN$;<<<trR   c                 `    	 | j         t          j                 d         S # t          $ r Y dS w xY w)a2  
        Get the page mode currently being used.

        .. list-table:: Valid ``mode`` values
           :widths: 50 200

           * - /UseNone
             - Do not show outline or thumbnails panels
           * - /UseOutlines
             - Show outline (aka bookmarks) panel
           * - /UseThumbs
             - Show page thumbnails panel
           * - /FullScreen
             - Fullscreen view
           * - /UseOC
             - Show Optional Content Group (OCG) panel
           * - /UseAttachments
             - Show attachments panel
        z	/PageModeN)r   r   r   r   rX   s    rP   	page_modezPdfReader.page_mode7  s=    *	<(55 	 	 	44	s    
--r9  inheritc                    t          t          j                  t          t          j                  t          t          j                  t          t          j                  f}|i }|J| j        t          j                 	                                }|d         	                                }g | _
        t          j        |v r|t          j                 }nt          j        |vrd}nd}|dk    rl|D ]}||v r||         ||<   |t          j                 D ]C}i }	t          |t                    r||	d<   |	                                }
|
r | j        |
|fi |	 Dd S |dk    rqt#          |                                          D ]\  }}||vr|||<   t'          | |          }|                    |           | j
                            |           d S d S )Nr   z/Pager   )r>   PG	RESOURCESMEDIABOXCROPBOXROTATEr   r   r   rc   r   r  TYPEr  r]   r=   r   r  r   r   r   r  )rY   r9  r  r   inheritable_page_attributesr   tr   rd  addtr   attr_inr-  page_objs                 rP   r   zPdfReader._flattenQ  s    r|$$r{##rz""ry!!	'
# ?G= l27+6688GH%0022E#%D 7ebgAAWE!!AAA==3 0 05==$)$KGDMbg 8 8dN33 615D-.oo'' 8!DM#w77$7778 8 '\\"&w}}"7"7 + + %''%*E'N!$(:;;HOOE"""  ''11111 \rR   c                    | j         |j                 \  }}t          |d|                                           }t	          t
          |d                   dk    sJ ||d         k     sJ t          t          |                                                    }t          |d                   D ]}t          |           |                    dd           t          j        |          }t          |           |                    dd           t          j        |          }t          |           |                    dd           ||j        k    r| j        r||k    rt          d          |                    t!          |d         |z             d           t          |           |                    dd           	 t#          ||           }	ni# t$          $ r\}
t'          d	| d
|j         d|j         d|
 t*                     | j        rt          d|
           t-                      }	Y d }
~
nd }
~
ww xY w|	c S | j        rt          d          t-                      S )Nr   /Typez/ObjStmr   r   zObject is in wrong index.rT  zInvalid stream (index z) within object ro  r  zCan't read object stream: z%This is a fatal error in strict mode.)xref_objStmra  r=   rc   r   r   r   r   get_datar  r   r   r@   read_from_streamr   r0   rc  rE   r1   r   
generationr   r?   )rY   r   stmnumidxobj_stmstream_datarJ  objnumoffsetr   r>  s              rP   _get_object_from_streamz!PdfReader._get_object_from_stream  s   
 &'9'?@'5fa'F'F'Q'Q'S'SC)**i7777WT]""""b!1!1!3!34455wt}%% $	 $	A,,,R###!2;??F,,,R###!2;??F,,,R###+111{ @saxx"#>???S!2V!;<<a@@@  ,,,R####!+t44! # # # Q  )/ 2D2O   	   ; K&'IC'I'IJJJ ll# JJJ; 	HFGGG||s   ?G
H6AH11H6numgenc                 H    t          |||                                           S )a/  
        Used to ease development.

        This is equivalent to generic.IndirectObject(num,gen,self).get_object()

        Args:
            num: The object number of the indirect object.
            gen: The generation number of the indirect object.

        Returns:
            A PdfObject
        )r=   rc   )rY   r  r  s      rP   _get_indirect_objectzPdfReader._get_indirect_object  s"     c3--88:::rR   c                 
   t          |t                    rt          |d|           }|                     |j        |j                  }||S |j        dk    r%|j        | j        v r|                     |          }n|j        | j        v rc|j        | j        |j                 v rI| j	        
                    |j        i           
                    |j        d          rt                      S | j        |j                 |j                 }| j                            |d           	 |                     | j                  \  }}n# t          $ r t!          | j        d          r't#          | j                                                  }ni| j                                        }| j                            dd           | j                            d          }| j                            |d           t+          j        d|j         d|j         d                                |          }|t1          d|j         d	|j         d
t2                     |                    d          dz   | j        |j                 |j        <   | j                            |                    d          dz              |                     | j                  \  }}nd}Y nw xY w||j        k    r5| j        r.| j        r&t;          d|j         d|j         d| d| d	          n8||j        k    r-| j        r&t;          d|j         d|j         d| d| d	          | j        r||j        k    sJ t=          | j        |           }| j        sj| j         c| j         !                                stE          d          tG          tH          |          }| j         %                    ||j        |j                  }nut!          | j        d          r't#          | j                                                  }ni| j                                        }| j                            dd           | j                            d          }| j                            |d           t+          j        d|j         d|j         d                                |          }|Yt1          d|j         d|j         dt2                     |j        | j        vri | j        |j        <   |                    d          dz   | j        |j                 |j        <   | j                            |&                    d          dz              tO          | j                   | j                            dd           t=          | j        |           }| j        sj| j         c| j         !                                stE          d          tG          tH          |          }| j         %                    ||j        |j                  }n<t1          d|j         d|j         dt2                     | j        rt;          d          | (                    |j        |j        |           |S )Nr   F	getbufferr  z\sz\s+z\s+objz
Object ID ,z ref repairedr   zExpected object ID (ro  z) does not match actual (z); xref table not zero-indexed.z).zFile has not been decryptedzObject z foundz not defined.zCould not find object.))r]   rc  r=   cache_get_indirect_objectr  ra  r  r  xrefxref_free_entryr\   r?   r   r   read_object_headerr<  r   bytesr  r   r   researchencoder   r   startr   r   r0   rE   r   r   is_decryptedr/   r   rA   decrypt_objectendr"   cache_indirect_object)	rY   r   r^   r  ra  r  bufr5  ms	            rP   rc   zPdfReader.get_object  s    (#.. 	M!/0BAt!L!L//)+=+C
 
 M)Q.."(D,<<<112DEEFF)TY66"(DI6H6S,TTT#''(:(ErJJNN"(%  $ "||#I0;<=O=UVEKUA&&&$($;$;DK$H$H!zz   4;44 + 5 5 7 788CC((**AK$$Q***+**2..CK$$Q***I\,2\\7I7T\\\ccee  ="l%7%=ll@R@]lll    a I0;<*0 K$$QWWQZZ!^444(,(?(?(L(L%E::E/0 *000T_0; &7/A/G 7 7J\Jg 7 7277 7:D7 7 7   ,222t{2"/+=+C / /)4/ // / */ / /  
 { C!%7%BBBBB d33F , 1A1M'4466 O/0MNNNi00)88.46H6S  t{K00 'DK113344K$$&&  A&&&k&&r**  A&&&	X(.XX3E3PXXX__aa A }^06^^9K9V^^^   &0	AA?ADI0;<GGAJJN 	,789K9QR   qA...$T[111  Q'''$T[$77 0 T5E5Q+88:: S34QRRR!)V44F!-<< 2 8:L:W F e06ee9K9Veee   ; A&'?@@@"")+=+CV	
 	
 	
 s   $E FKKc                 H   d}t          |           |t          |          z  }|                    dd           t          |          }|t          |          z  }|                    dd           t          |          }|t          |          z  }|                    dd           |                    d          }t          |           |                    dd           |r"| j        rt          d| d| t                     t          |          t          |          fS )NFr  r      z.Superfluous whitespace found in object header ro  )
r!   r"   r   r    r   r   r   r   r   rc  )rY   r   extrara  r  _objs         rP   r  zPdfReader.read_object_headerM  s+   
 &!!!%f---B%f--%f---B*622
%f---B {{1~~F###B 	T[ 	UUUUU   5zz3z??**rR   r  ra  c                 :    | j                             ||f          S rV   )r   r\   )rY   r  ra  s      rP   r  z#PdfReader.cache_get_indirect_objecti  s      $((*e)<===rR   r   c                     ||f| j         v r3d| d| }| j        rt          |          t          |t                     || j         ||f<   |t          |||           |_        |S )NzOverwriting cache for ro  )r   r   r0   r   r   r=   r   )rY   r  ra  r   msgs        rP   r  zPdfReader.cache_indirect_objectn  s     $"777?:????C{ ("3'''3)))58z512?%3E:t%L%LC"
rR   c                 f   |                      |           |                     |           |                     |          }|                     ||          }|dk    r1| j        r|rt          d          t          d| dt                     |                     |||           | j	        r| j        s|
                                }| j                                        D ]\  }}|dk    rt          |                                          }|D ]}|                    ||         d           	 |                     |          \  }	}
n# t"          $ r Y  nBw xY w|	|| j	        z
  k    r/| j        |         |         | j        |         |	<   | j        |         |= |                    |d           d S d S d S )Nr   zBroken xref tablezincorrect startxref pointer()  )_basic_validation_find_eof_marker_find_startxref_pos_get_xref_issuesr   r0   r   r   _read_xref_tables_and_trailersr   r   r  r   sortedr  r   r  r=  )rY   r   	startxrefxref_issue_nrr   r  
xref_entryxref_kidpid_pgens              rP   r   zPdfReader.read{  s   v&&&f%%%,,V44	 --fi@@A{ 8} 8"#6777J-JJJHUUU 	++FI}MMM ? 	 4; 	 ++--C#'9??#4#4 / /Z%<<OO%%  ! 	/ 	/BKK
2222%)%<%<V%D%D
UU%   b4?222.2inR.@	#s+ IcN2. KKQ)	  	  	  	 s   .E
EEc                    |                     dt          j                   	 |                    d          }n# t          $ r t          d          w xY w|dk    rt          d          |dk    rE| j        r&t          d|	                    d           d	          t          d
| t                     |                     dt          j                   dS )z/Ensure file is not empty. Read at most 5 bytes.r      zcannot read headerrR   zCannot read an empty files   %PDF-zPDF starts with 'utf8z', but '%PDF-' expectedzinvalid pdf header: N)r   osSEEK_SETr   UnicodeDecodeErrorr   r.   r   r0   r   r   r   SEEK_END)rY   r   header_bytes      rP   r  zPdfReader._basic_validation  s    Ar{###	= ++a..KK! 	= 	= 	=&';<<<	=# !<===H$${ O"+(:(:6(B(B + + +  
 CkCCXNNNAr{#####s	   8 Ac                     d}d}|dd         dk    rb|                                 |k     r+| j        rt          d          t          dt                     t          |          }|dd         dk    `dS dS )a  
        Jump to the %%EOF marker.

        According to the specs, the %%EOF marker should be at the very end of
        the file. Hence for standard-compliant PDF documents this function will
        read only the last part (DEFAULT_BUFFER_SIZE).
        rM   rR   Nr  s   %%EOFzEOF marker not found)r   r   r0   r   r   r   )rY   r   HEADER_SIZElines       rP   r  zPdfReader._find_eof_marker  s     2A2h(""{{}}{**; E&'=>>>"#98DDD%f--D 2A2h(""""""rR   c                    t          |          }	 t          |          }t          |          }|dd         dk    rt          d          nr# t          $ re |                    d          st          d          t          |dd                                                   }t          dt                     Y nw xY w|S )z
        Find startxref entry - the location of the xref table.

        Args:
            stream:

        Returns:
            The bytes offset
        N	   s	   startxrefzstartxref not foundz startxref on same line as offset)r   rc  r0   r=  r$  stripr   r   )rY   r   r  r  s       rP   r  zPdfReader._find_startxref_pos  s     "&))	:D		I &f--DBQBx<''"#8999 (  	I 	I 	I??<00 :"#8999DHNN,,--I=xHHHHH	I s   A A,B<;B<c                    |                     d          }|dk    rt          d          t          |           |                    dd           d}	 t	          t
          t          ||                     }|r)|dk    r#|| _        | j        rt          dt                     d	}t          |           |                    dd           t	          t
          t          ||                     }t          |           |                    dd           d}||k     r|                     d
          }|d         dv r5|                    dd           |                     d
          }|d         dv 5|d         dv r|                    dd           	 |d d                             d          \  }}	|dd         }
t          |          t          |	          }}n0# t          $ r" t          |d          r"t          |                                          }nT|                                }|                    dd           |                     d          }|                    |           t#          j        | d                                |          }|t          d| dt                     d}d}nOt          d| dt                     t          |                    d                    }|                                }Y nw xY w|| j        vri | j        |<   i | j        |<   || j        |         v rn\|| j        |         |<   	 |
dk    | j        |         |<   n# t          $ r Y nw xY w	 |
dk    | j        d         |<   n# t          $ r Y nw xY w|dz  }|dz  }||k     t          |           |                    dd           |                     d          }|dk    r|                    dd           nd S )Nr  s   refzxref table read errorr  r   Tr   zFXref table not zero-indexed. ID numbers for objects will be corrected.F      
is   0123456789t             r  z\s+(\d+)\s+objzentry z( in Xref table invalid; object not foundr  z' in Xref table invalid but object found   f   s   traileri)r   r0   r   r   r   rc  rE   r   r   r   r   splitr<  r   r  r  r   r  r  r  groupr  r  r  )rY   r   ref
first_timer  rJ   cntr  offset_bgeneration_bentry_type_br  r  r  r5  r   trailer_tags                    rP   _read_standard_xref_tablez#PdfReader._read_standard_xref_table  sW   kk!nn&==6777F###B
c	sK5566C cQhh"%; "`    J'''KKA[6677D'''KKAC**{{2 1g,,KK+++!;;r??D 1g,, 8~--KKA&&&+-1#2#Y__T-B-B*Hl#'2;L),XL8I8IJFF  + + +v{33 '#F$4$4$6$677"KKMMAq)))$kk"ooA	S";";";"B"B"D"DcJJAy&RSRRR$   &+
!#&QSQQQ$   &)__
!"1+4 TY..,.DIj)79D(4$)J///
 17DIj)#.@LPT@T,Z8==$   ;G4;O,U3C88$   qqS **T  '''KKA ++a..Kj((B""""Gc	s8   AG D*LLM 
M('M(,N 
NNr  r  c                    i | _         i | _        i | _        t                      | _        |@|                    |d           |                    d          }|dv r|                    d          }|dk    r|                     |          }n|r+	 |                     |           d S # t          $ r d}Y nw xY w|
                                r	 |                     |          }n_# t          $ rR}t          j        | j        v r#t          d|j         t                      Y d }~d S t#          d|j                   d }~ww xY wt          j        t          j        t          j        t          j        t          j        f}|D ]9}||v r3|| j        vr*|                    |          | j        t/          |          <   :d|v rq|                                }	|                    t3          t4          |d                   dz   d           |                     |           |                    |	d           d|v rt3          t4          |d                   }nd S |                     ||          }|>d S d S )	Nr   r   r     xz!Previous trailer can not be read ztrailer can not be read /XRefStm/Prev)r  r  r  r8   r   r   r   
_read_xref_rebuild_xref_tabler<  isdigit_read_pdf15_xref_streamr   r   r   argsr   r0   r   r   r   SIZEraw_getr>   r   r   rc  _read_xref_other_error)
rY   r   r  r  r;  
xrefstreametrailer_keysrZ   r5  s
             rP   r  z(PdfReader._read_xref_tables_and_trailersL  s    02	:<79'))#KK	1%%%AAG||KKNNDyy OOF33		  K&,,V444E  & & &$%MMM& K
P!%!=!=f!E!EJJ  P P Pw$,..&HHH$   *+Naf+N+NOOOP  "w
BGRUBGK' P PCj((S-D-D8B8J8J38O8OZ__5++AKKS*Z*@ A AA EqIII00888KK1%%%j(( $S*W*= > >IIE 77	JJ	Q #####s0   B' 'B76B7C& &
E00D=&D==Ec                    |                      |           t          |           |                    dd           t          t          t
          t          f         t          ||                     }|                                D ]\  }}|| j	        vr
|| j	        |<   d|v r|
                                }|                    t          t          |d                   dz   d           	 |                     |           n/# t          $ r" t          d|d          dt                     Y nw xY w|                    |d           d|v r
|d         }|S d S )Nr  r   r  r   zXRef object at z, can not be read, some object may be missingr  )r  r   r   r   r	   r   r   rE   r   r   r   rc  r  r<  r   r   )rY   r   new_trailerrZ   r-  r5  r  s          rP   r  zPdfReader._read_xref}  sq   &&v...F###B4S>;vt+D+DEE%++-- 	* 	*JC$,&&$)S!$$AKKS+j"9::Q>BBB,,V4444   kk*&=kkk    
 KK1k!!#G,I4s   (C> >)D*)D*c                    |dk    r-| j         rt          d          t          dt                     d S |                    dd           |                    d          }|                    d          }|dk    r
|d	|z
  z  }|S |                    |d           t          d
          D ]2}|                    d                                          r	||z  }|c S 3d| j	        v rQ| j         sJt          dt                     	 | 
                    |           d S # t          $ r t          d          w xY wt          d          )Nr   z6/Prev=0 in the trailer (try opening with strict=False)zA/Prev=0 in the trailer - assuming there is no previous xref tableir   r     xrefr  
      r   z"Invalid parent xref., rebuild xrefzcan not rebuild xrefz/Could not find xref table at specified location)r   r0   r   r   r   r   findr  r  r   r  r<  )rY   r   r  rr  xref_loclooks         rP   r  z PdfReader._read_xref_other_error  s    >>{ "L   S   4 	Ckk"oo88G$$r>>h&IIq!!!"II 	! 	!D{{1~~%%'' !T!	    !
 dl""4;"?JJJ;((000t ; ; ;"#9:::;LMMMs   D D3c                    	 |                     dd                                |          \  }}t          t          t	          |                     }t          t
          |d                   dk    sJ                      |||           t          t          |	                                                    	|
                    dd|
                    d          g          }t          t          t          t          f         |
                    d                    t                    d	k    sJ  j        r%t                    d	k    rt          d
           dt           dt"          t           t$          t           df         f         f	fd}dt           dt"          t           t$          t           df         f         dt&          f fd}                     |||           |S )Nr  r   r  z/XRefz/Indexr   z/Sizez/Wr  zToo many entry sizes: rJ  rK   .c                     |          dk    r1                     |                    }t          ||                    S | dk    rdS dS )Nr   r   )r   rQ   )rJ  rI   entry_sizesr  s     rP   	get_entryz4PdfReader._read_pdf15_xref_stream.<locals>.get_entry  sT     1~!!$$[^44%aQ888 AvvqqrR   r  r  c                 P    | j                             |g           v p| j        v S rV   )r  r\   r  )r  r  rY   s     rP   used_beforez6PdfReader._read_pdf15_xref_stream.<locals>.used_before  s+    $)--
B777R3$BR;RRrR   )r   r  r   r5   rE   r   r  r   r   r  r\   r	   r   r   r   r0   rc  r   r   bool_read_xref_subsections)
rY   r   ra  r  r   	idx_pairsr  r  r  r  s
   `       @@rP   r  z!PdfReader._read_pdf15_xref_stream  s    	B 33F;;z-VT)B)BCC
CG,--8888"":ujAAAb!4!4!6!67788 NN8a1H1H-IJJ	4S>:>>$+?+?@@;1$$$$; 	G3{++a//EEEFFF	 	sE#s(O';!< 	 	 	 	 	 	 		SS 	SeCsCx4H.I 	Sd 	S 	S 	S 	S 	S 	S
 	##Iy+FFFrR   c                    |                      |dz
  d           |                     d          }|dk    r|                     d          }|dvrdS |                     d          }|dk    rYd}|dv r!|                     d          }|dk    rd	S |dv !||                     d	          z  }|                                d
k    rdS dS )z
        Return an int which indicates an issue. 0 means there is no issue.

        Args:
            stream:
            startxref:

        Returns:
            0 means no issue, other values represent specific issues.
        r   r      js   
 	   r  rR   s   0123456789 	r'  s   objr  )r   r   lower)r   r  r  s      rP   r  zPdfReader._get_xref_issues  s     	IM1%%%{{1~~4<<;;q>>Dz!!1{{1~~7??D***{{1~~3;;1 *** FKKNN"Dzz||v%%qqrR   c                    i | _         |                    dd           |                    d          }t          j        d|          D ]|}t          |                    d                    }t          |                    d                    }|| j         vr
i | j         |<   |                    d          | j         |         |<   }|                    dd           t          j        d|          D ]}|                    |                    d          d           t          t          t          t          f         t          ||                     }t          |                                          D ]\  }}|| j        |<   d S )Nr   r  s(   [\r\n \t][ \t]*(\d+)[ \t]+(\d+)[ \t]+objr   r'  s$   [\r\n \t][ \t]*trailer[\r\n \t]*(<<))r  r   r   r  finditerrc  r  r  r   r	   r   rE   r  r   r   )	rY   r   f_r  ra  r  r  rZ   r-  s	            rP   r  zPdfReader._rebuild_xref_table  s]   	Aq[[__I2NN 	6 	6A

OOEQWWQZZJ**(*	*%+,771::DIj!%((AqErJJ 	* 	*AKK

A&&&tCH~{64/H/HIIK";#4#4#6#677 * *
U$)S!!*		* 	*rR   r  r  .r  c                    |                      |          D ]\  }}t          |||z             D ]} |d          }|dk    r |d          } |d          }	*|dk    rF |d          }
 |d          }|| j        vr
i | j        |<    |||          s|
| j        |         |<   v|dk    r1 |d          } |d          }d} |||          s||f| j        |<   | j        rt          d|           ǌd S )Nr   r   r'  zUnknown xref type: )_pairsr  r  r  r   r0   )rY   r  r  r  r  rJ   r  	xref_typenext_free_objectnext_generationbyte_offsetr  
objstr_num	obstr_idxs                 rP   r  z PdfReader._read_xref_subsections  sd     ;;y11 	J 	JKE4UEDL11 J J%IaLL	>>'0y||$&/illOO!^^"+)A,,K!*1J!2202	*-&;sJ77 A5@	*-c2!^^!*1J )	!I!"J&;sJ77 H1;Y0G(-[ J&'HY'H'HIIIJ/J	J 	JrR   c              #   r   K   d}	 ||         ||dz            fV  |dz  }|dz   t          |          k    rd S 3)Nr   Tr   r'  )r   )rY   rj  rJ  s      rP   r  zPdfReader._pairs;  sU      	(E!a%L((((FAA#e**$$		rR   c                 b    | j         st          d          | j                             |          S )a  
        When using an encrypted / secured PDF file with the PDF Standard
        encryption handler, this function will allow the file to be decrypted.
        It checks the given password against the document's user password and
        owner password, and then stores the resulting decryption key if either
        password is correct.

        It does not matter which password was matched.  Both passwords provide
        the correct decryption key that will allow the document to be used with
        this library.

        Args:
            password: The password to match.

        Returns:
            An indicator if the document was decrypted and weather it was the
            owner password or the user password.
        r   )r   r0   r   )rY   r   s     rP   decryptzPdfReader.decryptC  s5    &  	53444&&x000rR   permissions_codec           	         t          ddd           t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j	        d}fd|
                                D             S )z>Take the permissions as an integer, return the allowed access.decode_permissionsuser_access_permissionsz5.0.0)old_namenew_name
removed_in)printmodifycopyannotationsformsaccessabilityassembleprint_high_qualityc                 *    i | ]\  }}||z  d k    S r   r   )r   rZ   flagr'  s      rP   r   z0PdfReader.decode_permissions.<locals>.<dictcomp>n  s:     
 
 
T !D(A-
 
 
rR   )r   r'   PRINTMODIFYEXTRACTADD_OR_MODIFYFILL_FORM_FIELDSEXTRACT_TEXT_AND_GRAPHICSASSEMBLE_DOCPRINT_TO_REPRESENTATIONr   )rY   r'  permissions_mappings    ` rP   r)  zPdfReader.decode_permissions[  s    ").	
 	
 	
 	
 +0+2)10>*;2L-:"7"O	
 	

 
 
 
06688
 
 
 	
rR   c                 F    | j         dS t          | j         j                  S )zWGet the user access permissions for encrypted documents. Returns None if not encrypted.N)r   r'   PrX   s    rP   r*  z!PdfReader.user_access_permissionss  s&     #4$T%5%7888rR   c                 (    t           j        | j        v S )z
        Read-only boolean property showing whether this PDF file is encrypted.

        Note that this property, if true, will remain true even after the
        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
        )r   r   r   rX   s    rP   r   zPdfReader.is_encryptedz  s     zT\))rR   c                 $   d }i }t          t          | j        t          j                           }d|vs|d         sd S t          t
          |d                   }d|v rt          t          |d                   }t          |          }|D ]}|}t          |          }t          |t                    r_t          t          t                   |                                          }|r+t          j        t!          |j                            }	|	||<   |S )N	/AcroFormz/XFA)r   r8   r   r   r   rC   r3   iternextr]   r=   r   r9   rc   zlib
decompressr   _data)
rY   r   r^   r   r   rJ  r   tagr   ess
             rP   xfazPdfReader.xfa  s    %)!#'bg)>??g%%W[-A%4J 455T>>+tF|44FVA ) )GGa00 ) *=!>OOE )!_R__==&(srR   r  c                 R   t          t          | j        t          j                           }d|vst          |d         t                    sdS t          t          |t          d                             }d|vrdS t                      }t          |          |t          d          <   |t          d                   |t          d          <   |                     dt          d | j
        D                       dz   |           t                      }|                    |j                   ||t          d          <   t          t          |d                   D ]O}|                                }d	|v rt          d
|j         dt                      |j        |t          d	          <   P|S )z
        Add a top level form that groups all form fields below it.

        Args:
            name: text string of the "/T" Attribute of the created object

        Returns:
            The created object. ``None`` means no object was created.
        rF  Nr   r   r6  r   c                 $    g | ]\  }}|d k    |S r7  r   )r   grJ  s      rP   r&  z.PdfReader.add_form_topname.<locals>.<listcomp>  s!    AAAv1!q&&&&&rR   r   r   zTop Level Form Field z have a non-expected parent)r   r8   r   r   r   r]   r>   rB   r  maxr   r3   r  r   rc   r   r   )rY   r  r   acroforminterimarrr   r   s           rP   add_form_topnamezPdfReader.add_form_topname  s    'bg)>??g%%ZK "2.
 .
% 4('*[2I2I*JKKH$$4"$$$4T$:$:
4  !'/
90E0E'F
7##$""AA!6AAABBQF	
 	
 	

 mm

7-...*-I&&'k77#344 	D 	DA,,..CC_C,B___   *1)CC
9%%&&rR   c           
         t          t          | j        t          j                           }d|vst          |d         t                    sdS t          t          |t          d                             }d|vrdS t          t          t          t          |t          d                             d                                                   }t          |          |t          d          <   |S )z
        Rename top level form field that all form fields below it.

        Args:
            name: text string of the "/T" field of the created object

        Returns:
            The modified object. ``None`` means no object was modified.
        rF  Nr   r   r   )
r   r8   r   r   r   r]   r>   r3   rc   rB   )rY   r  r   rS  rT  s        rP   rename_form_topnamezPdfReader.rename_form_topname  s     'bg)>??g%%ZK "2.
 .
% 4('*[2I2I*JKKH$$4hz)'<'<=>>qALLNN
 
 %5T$:$:
4  !rR   c                 ^     t           fd                                 D                       S )Nc                 $    i | ]}|j         |fS r   )_get_attachment_list)r   r  rY   s     rP   r   z)PdfReader.attachments.<locals>.<dictcomp>  s3        t0$7  rR   )LazyDict_list_attachmentsrX   s   `rP   attachmentszPdfReader.attachments  sG        2244  
 
 	
rR   c                     t          t          | j        d                   }	 t          t          t          t          t          t          |d                   d                   d                   }n# t          $ r g cY S w xY wd |D             }|S )zv
        Retrieves the list of filenames of file attachments.

        Returns:
            list of filenames
        r   /Names/EmbeddedFilesc                 <    g | ]}t          |t                    |S r   )r]   r   )r   r   s     rP   r&  z/PdfReader._list_attachments.<locals>.<listcomp>  s'    HHH1Z35G5GHQHHHrR   )r   r8   r   r3   r   )rY   r   	filenamesattachments_namess       rP   r]  zPdfReader._list_attachments  s     'g)>??		$)78+<==>NO   II  	 	 	III	HH	HHH     AA0 0A?>A?c                 l    |                      |          |         }t          |t                    r|S |gS rV   )_get_attachmentsr]   r  )rY   r  outs      rP   r[  zPdfReader._get_attachment_list   s8    ##D))$/c4   	JurR   filenamec                    t          t          | j        d                   }	 t          t          t          t          t          t          |d                   d                   d                   }n# t          $ r i cY S w xY wi }t          t          |                    D ]}||         }t          |t                    r|||k    r(|}||dz            	                                }|d         d         
                                }	||v rCt          ||         t                    s||         g||<   ||                             |	           |	||<   |S )a  
        Retrieves all or selected file attachments of the PDF as a dictionary of file names
        and the file data as a bytestring.

        Args:
            filename: If filename is None, then a dictionary of all attachments
                will be returned, where the key is the filename and the value
                is the content. Otherwise, a dictionary with just a single key
                - the filename - and its content will be returned.

        Returns:
            dictionary of filename -> Union[bytestring or List[ByteString]]
            if the filename exists multiple times a List of the different version will be provided
        r   r`  ra  Nr   z/EFrx  )r   r8   r   r3   r   r  r   r]   r   rc   r  r  r  )
rY   ri  r   rc  r^  rJ  r   r  f_dictf_datas
             rP   rg  zPdfReader._get_attachments  sv   " 'g)>??		$)78+<==>NO   II  	 	 	III	<>s9~~&& 	/ 	/A!A!S!! /'AMM"1q5)4466t,5577;&&%k$&7>> @-8->,?D)%,,V4444(.K%re  )FNrl  rt  )FrV   )fr   r   r   r   r   r   rD   r   r   r   r   r  r   r  rW   r
   r	   r   r   r   rT   r   rH   r   rc  r   r   r   r   rC   r   r8   r   r   r   r   r.  r:   rA   r=   r   rA  r   rF   rQ  rP  r3   r^  r?   r:  rf  r7   ri  r@   rH  rX  r9  r  r  rG   r  r   r  r  rc   r   r   r  r  r  r   r  r  r  r  r  r  r  r5   r9   r6   r  staticmethodr  r  r   r  r  r   r&  r)  r'   r*  r   rN  rV  rX  r   r^  r]  r[  rg  r   rR   rP   r   r     s        $ 
H->$? 
 
 
 X
 ,0	25 25k4'(25 25 c5()	25
 
25 25 25 25l /3.2 tXc]*+ tXc]*+ 
c3h	   :  C       X  (#67    X& .h~6 . . . X.- - - - -*1S 1Z 1 1 1 1  .DcN . . . X. &*+/!%	0 0z"0 c3h(0 #	0
 
$sCx.	!0 0 0 0d+0@ +S + + + +4UZ!1124U S#X4U 	4U
 4U 
4U 4U 4U 4UlC*&667CADCORC	C C C C#C # #s #t # # # #J$ $ $cSVh $ $ $ $LD
5)^;<D
	j	D
 D
 D
 D
P )- $D DJ$%D D 
c3h	D D D DL # # # # X# QU' '-.'@H'	' ' ' 'R +.    X"'c:~(M"N	#   6JJ J8C= J J J J
C{ 
CxPS} 
C 
C 
C 
CII lND*FVVW
I 
I I I I@I(8 IXk=R I I I IV AtJ' A A A XA PT#Y P P P XP Xc]    X6 8L1    X6 <@,07;	32 32T+Z7832 $sCx.)32 %^4	32
 
32 32 32 32j4"04	sIs"	#4 4 4 4l; ;# ;(9:M ; ; ; ;@"'^(;"<@	)	@ @ @ @D+ +c3h + + + +8>>&)>	)	> > > >
&)080C	)	   $ : $ $ $  $  $  $ L$
 $t $ $ $ $&.z .d . . . .$*     2k
 kt k k k kZ/K /K-5c]/KKN/K	/K /K /K /Kb     2'N 'N-0'N	#'N 'N 'N 'NR& &	}13FF	G& & & &P       \<** * * * * *& J9 J SE5eCHo)=#>>? J sE#uS#X*>$?@$FG	 J
 
 J  J  J  JDDI (5c?*C    1c5j 1 1l 1 1 1 10
3 
4T	? 
 
 
 
0 92G)H 9 9 9 X9 *d * * * X* Xd38n-    X.(S (X6F-G ( ( ( (T 9I0J    6 
WS$u+%56 
 
 
 X
!49 ! ! ! !, e     )-- - -	c5U+,,	-- - - - - -rR   r   c                   b    e Zd ZdededdfdZdedefdZdee         fdZde	fd	Z
defd
ZdS )r\  r  kwrK   Nc                 (    t          |i || _        d S rV   )dict	_raw_dict)rY   r  ro  s      rP   rW   zLazyDict.__init__7  s    t*r**rR   rZ   c                 R    | j                             |          \  }} ||          S rV   )rr  __getitem__)rY   rZ   funcargs       rP   rt  zLazyDict.__getitem__:  s)    N..s33	ctCyyrR   c                 *    t          | j                  S rV   )rG  rr  rX   s    rP   __iter__zLazyDict.__iter__>  s    DN###rR   c                 *    t          | j                  S rV   )r   rr  rX   s    rP   __len__zLazyDict.__len__A  s    4>"""rR   c                 L    dt          |                                            dS )NzLazyDict(keys=r  )r  r  rX   s    rP   __str__zLazyDict.__str__D  s#    4TYY[[ 1 14444rR   )r   r   r   r   rW   r   rt  r   rx  rc  rz  r|  r   rR   rP   r\  r\  6  s        +c + + + + + +s s    $(3- $ $ $ $# # # # #5 5 5 5 5 5 5rR   r\  )dr  r  rN   rI  r   ior   r   pathlibr   typingr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   _pager   r   _page_labelsr   r  _utilsr   r   r   r   r   r   r   r   r    r!   r"   	constantsr#   rD  r$   r   r%   r&   r'   r(   rU  r)   ra   r*   r   r+   r  r,   r  r-   r   errorsr.   r/   r0   r1   r2   genericr3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   r  rF   rG   xmprH   r  rc  rQ   rT   r   r\  r   rR   rP   <module>r     s  < 
			 				         , , , , , , , ,                                2 1 1 1 1 1 1 1 + + + + + + + + > > > > > >                          / . . . . . . . . . . .         
 " ! ! ! ! ! : : : : : : 6 6 6 6 6 6 + + + + + + , , , , , , ( ( ( ( ( (                                                      * - , , , , , , ,      %e %3 %5eCHo1E+F % % % %L% L% L% L% L%* L% L% L%^l l l l l l l l^95 5 5 5 5wsCx  5 5 5 5 5rR   