Untitled

<?php
/**************************************************************************************************************

    NAME
	PdfToText.phpclass

    DESCRIPTION
    	A class for extracting text from Pdf files.
	Usage is very simple : just instantiate a PdfToText object, specifying an input filename, then use the
	Text property to retrieve PDF textual contents :

		$pdf	=  new PdfToText ( 'sample.pdf' ) ;
		echo $pdf -> Text ;		// or : echo ( string ) $pdf ;

	Or :

		$pdf	=  new PdfToText ( ) ;
		// Modify any property here before loading the file ; for example :
		// $pdf -> BlockSeparator = " " ;
		$pdf -> Load ( 'sample.pdf' ) ;
		echo $pdf -> Text ;

    AUTHOR
        Christian Vigh, 04/2016.

    HISTORY
    [Version : 1.6.7]	[Date : 2017/05/31]     [Author : CV]
	. Added CID fonts
	. Changed the way CID font maps are searched and handled

    (...)

    [Version : 1.0]	[Date : 2016/04/16]     [Author : CV]
        Initial version.

 **************************************************************************************************************/


/*==============================================================================================================

    class PdfToTextException et al -
        Implements an exception thrown when an error is encountered while decoding PDF files.

  ==============================================================================================================*/

// PdfToText exception -
//	Base class for all other PdfToText exceptions.
class  PdfToTextException			extends  Exception
   {
	public static	$IsObject		=  false ;
    } ;


// PdfToTextDecodingException -
//	Thrown when unexpected data is encountered while analyzing PDF contents.
class  PdfToTextDecodingException		extends  PdfToTextException
   {
	public function  __construct ( $message, $object_id = false )
	   {
		$text	=  "Pdf decoding error" ;

		if  ( $object_id  !==  false )
			$text	.=  " (object #$object_id)" ;

		$text	.=  " : $message" ;

		parent::__construct ( $text ) ;
	    }
    }


// PdfToTextDecryptionException -
//	Thrown when something unexpected is encountered while processing encrypted data.
class  PdfToTextDecryptionException		extends  PdfToTextException
   {
	public function  __construct ( $message, $object_id = false )
	   {
		$text	=  "Pdf decryption error" ;

		if  ( $object_id  !==  false )
			$text	.=  " (object #$object_id)" ;

		$text	.=  " : $message" ;

		parent::__construct ( $text ) ;
	    }
    }


// PdfToTextTimeoutException -
//	Thrown when the PDFOPT_ENFORCE_EXECUTION_TIME or PDFOPT_ENFORCE_GLOBAL_EXECUTION_TIME option is set, and
//	the script took longer than the allowed execution time limit.
class  PdfToTextTimeoutException		extends  PdfToTextException
   {
	// Set to true if the reason why the max execution time was reached because of too many invocations of the Load() method
	// Set to false if the max execution time was reached by simply processing one PDF file
	public		$GlobalTimeout ;

	public function  __construct ( $message, $global, $php_setting, $class_setting )
	   {
		$text	=  "PdfToText max execution time reached " ;

		if  ( ! $global )
			$text	.=  "for one single file " ;

		$text	.=  "(php limit = {$php_setting}s, class limit = {$class_setting}s) : $message" ;

		$this -> GlobalTimeout		=  $global ;

		parent::__construct ( $text ) ;
	    }
    }


// PdfToTextFormException -
//	Thrown if the xml template passed to the GetFormData() method contains an error.
class  PdfToTextFormException		extends  PdfToTextException
   {
	public function  __construct ( $message )
	   {
		$text	=  "Pdf form template error" ;

		$text	.=  " : $message" ;

		parent::__construct ( $text ) ;
	    }
    }


// PdfToTextCaptureException -
//	Thrown if the xml template passed to the SetCaptures() method contains an error.
class  PdfToTextCaptureException		extends  PdfToTextException
   {
	public function  __construct ( $message )
	   {
		$text	=  "Pdf capture template error" ;

		$text	.=  " : $message" ;

		parent::__construct ( $text ) ;
	    }
    }


/*==============================================================================================================

        Custom error reporting functions.

  ==============================================================================================================*/
if  ( ! function_exists ( 'warning' ) )
   {
	function  warning ( $message )
	   {
		trigger_error ( $message, E_USER_WARNING ) ;
	    }
    }


if  ( ! function_exists ( 'error' ) )
   {
	function  error ( $message )
	   {
		if  ( is_string ( $message ) )
			trigger_error ( $message, E_USER_ERROR ) ;
		else if (  is_a ( $message, '\Exception' ) )
			throw $message ;
	    }
    }


/*==============================================================================================================

        Backward-compatibility issues.

  ==============================================================================================================*/

// hex2bin -
//	This function appeared only in version 5.4.0
if  ( ! function_exists ( 'hex2bin' ) )
   {
	function  hex2bin  ( $hexstring )
	   {
		$length		=  strlen ( $hexstring ) ;
		$binstring	=  '' ;
		$index		=  0 ;

		while  ( $index   <  $length )
		   {
			$byte		 =  substr ( $hexstring, $index, 2 ) ;
			$ch		 =  pack ( 'H*', $byte ) ;
			$binstring	.=  $ch ;

			$index		+=  2 ;
		    }

		return ( $binstring ) ;
	    }

    }


/*==============================================================================================================

    class PfObjectBase -
        Base class for all PDF objects defined here.

  ==============================================================================================================*/
abstract class  PdfObjectBase		// extends  Object
   {
	// Possible encoding types for streams inside objects ; "unknown" means that the object contains no stream
	const 	PDF_UNKNOWN_ENCODING 		=   0 ;		// No stream decoding type could be identified
	const 	PDF_ASCIIHEX_ENCODING 		=   1 ;		// AsciiHex encoding - not tested
	const 	PDF_ASCII85_ENCODING		=   2 ;		// Ascii85 encoding - not tested
	const 	PDF_FLATE_ENCODING		=   3 ;		// Flate/deflate encoding
	const	PDF_TEXT_ENCODING		=   4 ;		// Stream data appears in clear text - no decoding required
	const	PDF_LZW_ENCODING		=   5 ;		// Not implemented yet
	const	PDF_RLE_ENCODING		=   6 ;		// Runtime length encoding ; not implemented yet
	const	PDF_DCT_ENCODING		=   7 ;		// JPEG images
	const	PDF_CCITT_FAX_ENCODING		=   8 ;		// CCITT Fax encoding - not implemented yet
	const	PDF_JBIG2_ENCODING		=   9 ;		// JBIG2 filter encoding (black/white) - not implemented yet
	const	PDF_JPX_ENCODING		=  10 ;		// JPEG2000 encoding - not implemented yet

	// Regular expression used for recognizing references to a font (this list is far from being exhaustive, as it seems
	// that you can specify almost everything - however, trying to recognize everything would require to develop a complete
	// parser)
	protected static	$FontSpecifiers		=  '
		(/F \d+ (\.\d+)? )						|
		(/R \d+)							|
		(/f-\d+-\d+)							|
		(/[CT]\d+_\d+)							|
		(/TT \d+)							|
		(/OPBaseFont \d+)						|
		(/OPSUFont \d+)							|
		(/[0-9a-zA-Z])							|
		(/F\w+)								|
		(/[A-Za-z][A-Za-z0-9]* ( [\-+] [A-Za-z][A-Za-z0-9]* ))
		' ;

	// Maps alien Unicode characters such as special spaces, letters with ligatures to their ascii string equivalent
	protected static	$UnicodeToSimpleAscii	=  false ;


 	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Performs static initializations such as the Unicode to Ascii table.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( )
	   {
		if  ( self::$UnicodeToSimpleAscii  ===  false )
		   {
			$charset_file			=  dirname ( __FILE__ ) . "/Maps/unicode-to-ansi.map" ;
			include ( $charset_file ) ;
			self::$UnicodeToSimpleAscii	=  ( isset ( $unicode_to_ansi ) ) ?  $unicode_to_ansi : array ( ) ;
		    }

		// parent::__construct ( ) ;
	    }


 	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        CodePointToUtf8 - Encodes a Unicode codepoint to UTF8.

	    PROTOTYPE
	        $char	=  $this -> CodePointToUtf8 ( $code ) ;

	    DESCRIPTION
	        Encodes a Unicode codepoint to UTF8, trying to handle all possible cases.

	    PARAMETERS
	        $code (integer) -
	                Unicode code point to be translated.

	    RETURN VALUE
	        A string that contains the UTF8 bytes representing the Unicode code point.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  CodePointToUtf8 ( $code )
	   {
		if  ( $code )
		   {
			$result		=  '' ;

			while  ( $code )
			   {
				$word		=  ( $code & 0xFFFF ) ;

				if  ( ! isset ( self::$UnicodeToSimpleAscii [ $word ] ) )
				   {
					$entity		 =  "&#$word;" ;
					$result		.=  mb_convert_encoding ( $entity, 'UTF-8', 'HTML-ENTITIES' ) . $result ;
				    }
				else
					$result		.=  self::$UnicodeToSimpleAscii [ $word ] ;

				$code		 =  ( integer ) ( $code / 0xFFFF ) ;	// There is no unsigned right-shift operator in PHP...
			    }

			return ( $result ) ;
		    }
		// No translation is apparently possible : use a placeholder to signal this situation
		else
		   {
			if  ( strpos ( PdfToText::$Utf8Placeholder, '%' )   ===  false )
			   {
				return ( PdfToText::$Utf8Placeholder ) ;
			    }
			else
				return ( sprintf ( PdfToText::$Utf8Placeholder, $code ) ) ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    DecodeRawName -
		Decodes a string that may contain constructs such as '#xy', where 'xy' are hex digits.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  DecodeRawName ( $str )
	   {
		return ( rawurldecode ( str_replace ( '#', '%', $str ) ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetEncodingType - Gets an object encoding type.

	    PROTOTYPE
	        $type	=  $this -> GetEncodingType ( $object_id, $object_data ) ;

	    DESCRIPTION
	        When an object is a stream, returns its encoding type.

	    PARAMETERS
		$object_id (integer) -
			PDF object number.

	        $object_data (string) -
	                Object contents.

	    RETURN VALUE
	        Returns one of the following values :

		- PdfToText::PDF_ASCIIHEX_ENCODING :
			Hexadecimal encoding of the binary values.
			Decoding algorithm was taken from the unknown contributor and not tested so far, since I
			couldn't find a PDF file with such an encoding type.

		- PdfToText::PDF_ASCII85_ENCODING :
			Obscure encoding format.
			Decoding algorithm was taken from the unknown contributor and not tested so far, since I
			couldn't find a PDF file with such an encoding type.

		- PdfToText::PDF_FLATE_ENCODING :
			gzip/deflate encoding.

		- PdfToText::PDF_TEXT_ENCODING :
			Stream data is unencoded (ie, it is pure ascii).

		- PdfToText::PDF_UNKNOWN_ENCODING :
			The object data does not specify any encoding at all. It can happen on objects that do not have
			a "stream" part.

		- PdfToText::PDF_DCT_ENCODING :
			a lossy filter based on the JPEG standard.

		The following constants are defined but not yet implemented ; an exception will be thrown if they are
		encountered somewhere in the PDF file :

		- PDF_LZW_ENCODING :
			a filter based on LZW Compression; it can use one of two groups of predictor functions for more
			compact LZW compression : Predictor 2 from the TIFF 6.0 specification and predictors (filters)
			from the PNG specification

		- PDF_RLE_ENCODING :
			a simple compression method for streams with repetitive data using the run-length encoding
			algorithm and the image-specific filters.

		PDF_CCITT_FAX_ENCODING :
			a lossless bi-level (black/white) filter based on the Group 3 or Group 4 CCITT (ITU-T) fax
			compression standard defined in ITU-T T.4 and T.6.

		PDF_JBIG2_ENCODING :
			a lossy or lossless bi-level (black/white) filter based on the JBIG2 standard, introduced in
			PDF 1.4.

		PDF_JPX_ENCODING :
			a lossy or lossless filter based on the JPEG 2000 standard, introduced in PDF 1.5.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetEncodingType ( $object_id, $object_data )
	   {
		$status 	=  preg_match ( '# / (?P<encoding> (ASCIIHexDecode) | (AHx) | (ASCII85Decode) | (A85) | (FlateDecode) | (Fl) | (DCTDecode) | (DCT) | ' .
						                   '(LZWDecode) | (LZW) | (RunLengthDecode) | (RL) | (CCITTFaxDecode) | (CCF) | (JBIG2Decode) | (JPXDecode) ) \b #imsx',
						$object_data, $match ) ;

		if  ( ! $status )
			return ( self::PDF_TEXT_ENCODING ) ;

		switch ( strtolower ( $match [ 'encoding' ] ) )
		    {
		    	case 	'asciihexdecode' 	:
			case	'ahx'			:  return ( self::PDF_ASCIIHEX_ENCODING  ) ;

		    	case 	'ascii85decode' 	:
			case	'a85'			:  return ( self::PDF_ASCII85_ENCODING   ) ;

		    	case	'flatedecode'		:
			case	'fl'			:  return ( self::PDF_FLATE_ENCODING     ) ;

			case    'dctdecode'		:
			case	'dct'			:  return ( self::PDF_DCT_ENCODING       ) ;

			case	'lzwdecode'		:
			case	'lzw'			:  return ( self::PDF_LZW_ENCODING       ) ;

			case	'ccittfaxdecode'	:
			case	'ccf'			:

			case	'runlengthdecode'	:
			case	'rl'			:

			case	'jbig2decode'		:

			case	'jpxdecode'		:
				if  ( PdfToText::$DEBUG  >  1 )
					warning ( "Encoding type \"{$match [ 'encoding' ]}\" not yet implemented for pdf object #$object_id." ) ;

			default				:  return ( self::PDF_UNKNOWN_ENCODING  ) ;
		     }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetObjectReferences - Gets object references from a specified construct.

	    PROTOTYPE
	        $status		=  $this -> GetObjectReferences ( $object_id, $object_data, $searched_string, &$object_ids ) ;

	    DESCRIPTION
	        Certain parameter specifications are followed by an object reference of the form :
			x 0 R
		but it can also be an array of references :
			[x1 0 R x2 0 R ... xn 0 r]
		Those kind of constructs can occur after parameters such as : /Pages, /Contents, /Kids...
		This method extracts the object references found in such a construct.

	    PARAMETERS
	        $object_id (integer) -
	                Id of the object to be analyzed.

		$object_data (string) -
			Object contents.

		$searched_string (string) -
			String to be searched, that must be followed by an object or an array of object references.
			This parameter can contain constructs used in regular expressions. Note however that the '#'
			character must be escaped, since it is used as a delimiter in the regex that is applied on
			object data.

		$object_ids (array of integers) -
			Returns on output the ids of the pdf object that have been found after the searched string.

	    RETURN VALUE
	        True if the searched string has been found and is followed by an object or array of object references,
		false otherwise.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetObjectReferences ( $object_id, $object_data, $searched_string, &$object_ids )
	   {
		$status		=  true ;
		$object_ids	=  array ( ) ;

		if  ( preg_match ( "#$searched_string \s* \\[ (?P<objects> [^\]]+ ) \\]#ix", $object_data, $match ) )
		   {
			$object_list	=  $match [ 'objects' ] ;

			if  ( preg_match_all ( '/(?P<object> \d+) \s+ \d+ \s+ R/x', $object_list, $matches ) )
			   {
				foreach  ( $matches [ 'object' ]  as  $id )
					$object_ids []	=  ( integer ) $id ;
			    }
			else
				$status		=  false ;
		    }
		else if  ( preg_match ( "#$searched_string \s+ (?P<object> \d+) \s+ \d+ \s+ R#ix", $object_data, $match ) )
		   {
			$object_ids []	=  ( integer ) $match [ 'object' ] ;
		    }
		else
			$status		=  false ;

		return ( $status ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetStringParameter - Retrieve a string flag value.

	    PROTOTYPE
	        $result		=  $this -> GetStringParameter ( $parameter, $object_data ) ;

	    DESCRIPTION
	        Retrieves the value of a string parameter ; for example :

			/U (parameter value)

		or :

			/U <hexdigits>

	    PARAMETERS
	        $parameter (string) -
	                Parameter name.

		$object_data (string) -
			Object containing the parameter.

	    RETURN VALUE
	        The parameter value.

	    NOTES
	        description

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetStringParameter ( $parameter, $object_data )
	   {
		if  ( preg_match ( '#' . $parameter . ' \s* \( \s* (?P<value> [^)]+) \)#ix', $object_data, $match ) )
			$result		=  $this -> ProcessEscapedString ( $match [ 'value' ] ) ;
		else if  ( preg_match ( '#' . $parameter . ' \s* \< \s* (?P<value> [^>]+) \>#ix', $object_data, $match ) )
		   {
			$hexdigits	=  $match [ 'value' ] ;
			$result		=  '' ;

			for  ( $i = 0, $count = strlen ( $hexdigits ) ; $i  <  $count ; $i +=  2 )
				$result		.=  chr ( hexdec ( substr ( $hexdigits, $i, 2 ) ) ) ;
		    }
		else
			$result		=  '' ;

		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    GetUTCDate -
	        Reformats an Adobe UTC date to a format that can be understood by the strtotime() function.
		Dates are specified in the following format :
			D:20150521154000Z
			D:20160707182114+02
		with are both recognized by strtotime(). However, another format can be specified :
			D:20160707182114+02'00'
		which is not recognized by strtotime() so we have to get rid from the '00' part.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetUTCDate ( $date )
	   {
		if  ( $date )
		   {
			if  ( ( $date [0]  ==  'D'  ||  $date [0]  ==  'd' )  &&  $date [1]  ==  ':' )
				$date	=  substr ( $date, 2 ) ;

			if  ( ( $index  =  strpos ( $date, "'" ) )  !==  false )
				$date	=  substr ( $date, 0, $index ) ;
		    }

		return ( $date ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    IsCharacterMap -
	        Checks if the specified text contents represent a character map definition or not.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsCharacterMap  ( $decoded_data )
	   {
		// preg_match is faster than calling strpos several times
		return ( preg_match ( '#(begincmap)|(beginbfrange)|(beginbfchar)|(/Differences)#ix', $decoded_data ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    IsFont -
		Checks if the current object contents specify a font declaration.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsFont ( $object_data )
	   {
		return
		   (
			stripos ( $object_data, '/BaseFont' )  !==  false  ||
				( ! preg_match ( '#/Type \s* /FontDescriptor#ix', $object_data )  &&
					preg_match ( '#/Type \s* /Font#ix', $object_data ) )
		    ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    IsFormData -
		Checks if the current object contents specify references to font data.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsFormData ( $object_data )
	   {
		return
		   (
			preg_match ( '#\bR \s* \( \s* datasets \s* \)#imsx', $object_data )
		    ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    IsFontMap -
		Checks if the code contains things like :
			<</F1 26 0 R/F2 22 0 R/F3 18 0 R>>
		which maps font 1 (when specified with the /Fx instruction) to object 26, 2 to object 22 and 3 to
		object 18, respectively, in the above example.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsFontMap ( $object_data )
	   {
		$object_data	=  self::UnescapeHexCharacters ( $object_data ) ;

		if  ( preg_match ( '#<< \s* ( ' . self::$FontSpecifiers . ' ) \s+ .* >>#imsx', $object_data ) )
			return ( true ) ;
		else
			return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    IsImage -
		Checks if the code contains things like :
			/Subtype/Image

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsImage ( $object_data )
	   {
		if  ( preg_match ( '#/Subtype \s* /Image#msx', $object_data ) )
			return ( true ) ;
		else
			return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    IsObjectStream -
		Checks if the code contains an object stream (/Type/ObjStm)
			/Subtype/Image

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsObjectStream ( $object_data )
	   {
		if  ( preg_match ( '#/Type \s* /ObjStm#isx', $object_data ) )
			return ( true ) ;
		else
			return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        IsPageHeaderOrFooter - Check if the specified object contents denote a text stream.

	    PROTOTYPE
	        $status		=  $this -> IsPageHeaderOrFooter ( $stream_data ) ;

	    DESCRIPTION
	        Checks if the specified decoded stream contents denotes header or footer data.

	    PARAMETERS
	        $stream_data (string) -
	                Decoded stream contents.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsPageHeaderOrFooter ( $stream_data )
	   {
		if  ( preg_match ( '#/Type \s* /Pagination \s* /Subtype \s*/((Header)|(Footer))#ix', $stream_data ) )
			return ( true ) ;
		else if  ( preg_match ( '#/Attached \s* \[ .*? /((Top)|(Bottom)) [^]]#ix', $stream_data ) )
			return ( true ) ;
		else
			return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        IsText - Check if the specified object contents denote a text stream.

	    PROTOTYPE
	        $status		=  $this -> IsText ( $object_data, $decoded_stream_data ) ;

	    DESCRIPTION
	        Checks if the specified object contents denote a text stream.

	    PARAMETERS
	        $object_data (string) -
	                Object data, ie the contents located between the "obj" and "endobj" keywords.

	        $decoded_stream_data (string) -
	        	The flags specified in the object data are not sufficient to be sure that we have a block of
	        	drawing instructions. We must also check for certain common instructions to be present.

	    RETURN VALUE
	        True if the specified contents MAY be text contents, false otherwise.

	    NOTES
		I do not consider this method as bullet-proof. There may arise some cases where non-text blocks can be
		mistakenly considered as text blocks, so it is subject to evolve in the future.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsText ( $object_data, $decoded_stream_data )
	   {
		if  ( preg_match ( '# / (Filter) | (Length) #ix', $object_data )  &&
		      ! preg_match ( '# / (Type) | (Subtype) | (Length1) #ix', $object_data ) )
		   {
		   	if  ( preg_match ( '/\\b(BT|Tf|Td|TJ|Tj|Tm|Do|cm)\\b/', $decoded_stream_data ) )
				return ( true ) ;
		    }
		else if  ( preg_match ( '/\\b(BT|Tf|Td|TJ|Tj|Tm|Do|cm)\\b/', $decoded_stream_data ) )
			return ( true ) ;

		return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        PregStrReplace - Replace string(s) using regular expression(s)

	    PROTOTYPE
	        $result		=  PdfToText::PregStrReplace ( $pattern, $replacement, $subject, $limit = -1,
						&$match_count = null )

	    DESCRIPTION
	        This function behaves like a mix of str_replace() and preg_replace() ; it allows to search for strings
		using regular expressions, but the replacements are plain-text strings and no reference to a capture
		specified in the regular expression will be interpreted.
		This is useful when processing templates, which can contain constructs such as "\00" or "$", which are
		interpreted by preg_replace() as references to captures.

		The function has the same parameters as preg_replace().

	    RETURN VALUE
	        Returns the substituted text.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  PregStrReplace ( $pattern, $replacement, $subject, $limit = -1, &$match_count = null )
	   {
		// Make sure that $pattern and $replacement become arrays of the same size
		if  ( is_array ( $pattern ) )
		   {
			if  ( is_array ( $replacement ) )
			   {
				if  ( count ( $pattern )  !==  count ( $replacement ) )
				   {
					warning ( "The \$replacement parameter should have the same number of element as \$pattern." ) ;
					return ( $subject ) ;
				    }
			    }
			else
				$replacement	=  array_fill ( $replacement, count ( $pattern ), $replacement ) ;
		    }
		else
		   {
			if  ( is_array ( $replacement ) )
			   {
				warning ( "Expected string for the \$replacement parameter." ) ;
				return ( $subject ) ;
			    }

			$pattern	=  array ( $pattern ) ;
			$replacement	=  array ( $replacement ) ;
		    }

		// Upper limit
		if  ( $limit  <  1 )
			$limit		=  PHP_INT_MAX ;

		// Loop through each supplied pattern
		$current_subject	=  $subject ;
		$count			=  0 ;

		for  ( $i = 0, $pattern_count = count ( $pattern ) ; $i  <  $pattern_count ; $i ++ )
		   {
			$regex		=  $pattern [$i] ;

			// Get all matches for this pattern
			if  ( preg_match_all ( $regex, $current_subject, $matches, PREG_OFFSET_CAPTURE ) )
			   {
				$result		=  '' ;		// Current output result
				$last_offset	=  0 ;

				// Process each match
				foreach  ( $matches [0]  as  $match )
				   {
					$offset		=  ( integer ) $match [1] ;

					// Append data from the last seen offset up to the current one
					if  ( $last_offset  <  $offset )
						$result		.=  substr ( $current_subject, $last_offset, $offset - $last_offset ) ;

					// Append the replacement string for this match
					$result		.=  $replacement [$i] ;

					// Compute next offset in $current_subject
					$last_offset     =  $offset + strlen ( $match [0] ) ;

					// Limit checking
					$count ++ ;

					if  ( $count  >  $limit )
						break 2 ;
				    }

				// Append the last part of the subject that has not been matched by anything
				$result			.=  substr ( $current_subject, $last_offset ) ;

				// The current subject becomes the string that has been built in the steps above
				$current_subject	 =  $result ;
			    }
		    }

		/// All done, return
		return ( $current_subject ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        ProcessEscapedCharacter - Interprets a character after a backslash in a string.

	    PROTOTYPE
	        $ch		=  $this -> ProcessEscapedCharacter ( $ch ) ;

	    DESCRIPTION
	        Interprets a character after a backslash in a string and returns the interpreted value.

	    PARAMETERS
	        $ch (char) -
	                Character to be escaped.

	    RETURN VALUE
	        The escaped character.

	    NOTES
		This method does not process octal sequences.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  ProcessEscapedCharacter ( $ch )
	   {
		switch ( $ch )
		   {
			// Normally, only a few characters should be escaped...
			case	'('	:  $newchar =  "("		; break ;
			case	')'	:  $newchar =  ")"		; break ;
			case	'['	:  $newchar =  "["		; break ;
			case	']'	:  $newchar =  "]"		; break ;
			case	'\\'	:  $newchar =  "\\"		; break ;
			case 	'n' 	:  $newchar =  "\n"		; break ;
			case 	'r' 	:  $newchar =  "\r"		; break ;
			case 	'f' 	:  $newchar =  "\f"		; break ;
			case 	't' 	:  $newchar =  "\t"		; break ;
			case 	'b' 	:  $newchar =  chr (  8 )	; break ;
			case 	'v' 	:  $newchar =  chr ( 11 )	; break ;

			// ... but should we consider that it is a heresy to escape other characters ?
			// For the moment, no.
			default		:  $newchar =  $ch  ; break ;
		    }

		return ( $newchar ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        ProcessEscapedString - Processes a string which can have escaped characters.

	    PROTOTYPE
	        $result		=  $this -> ProcessEscapedString ( $str, $process_octal_escapes = false ) ;

	    DESCRIPTION
	        Processes a string which may contain escape sequences.

	    PARAMETERS
	        $str (string) -
	                String to be processed.

		$process_octal_escapes (boolean) -
			When true, octal escape sequences such as \037 are processed.

	    RETURN VALUE
	        The processed input string.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  ProcessEscapedString ( $str, $process_octal_escapes = false )
	   {
		$length		=  strlen ( $str ) ;
		$offset		=  0 ;
		$result		=  '' ;
		$ord0		=  ord ( '0' ) ;

		while  ( ( $backslash_index = strpos ( $str, '\\', $offset ) )  !==  false )
		   {
			if  ( $backslash_index + 1  <  $length )
			   {
				$ch		 =  $str [ ++ $backslash_index ] ;

				if  ( ! $process_octal_escapes )
				   {
					$result		.=  substr ( $str, $offset, $backslash_index - $offset - 1 ) . $this -> ProcessEscapedCharacter ( $ch ) ;
					$offset		 =  $backslash_index + 1 ;
				    }
				else if  ( $ch  <  '0'  ||  $ch  >  '7' )
				   {
					$result		.=  substr ( $str, $offset, $backslash_index - $offset - 1 ) . $this -> ProcessEscapedCharacter ( $ch ) ;
					$offset		 =  $backslash_index + 1 ;
				    }
				else
				   {
					$result		.=  substr ( $str, $offset, $backslash_index - $offset - 1 ) ;
					$ord		 =  ord ( $ch ) - $ord0 ;
					$count		 =  0 ;
					$backslash_index ++ ;

					while  ( $backslash_index  <  $length  &&  $count  <  2  &&
							$str [ $backslash_index ]  >=  '0'  &&  $str [ $backslash_index ]  <=  '7' )
					   {
						$ord	=  ( $ord * 8 ) + ( ord ( $str [ $backslash_index ++ ] ) - $ord0 ) ;
						$count ++ ;
					    }

					$result		.=  chr ( $ord ) ;
					$offset		 =  $backslash_index ;
				    }
			    }
			else
				break ;
		    }

		$result		.=  substr ( $str, $offset ) ;

		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        Unescape - Processes escape sequences from the specified string.

	    PROTOTYPE
	        $value	=  $this -> Unescape ( $text ) ;

	    DESCRIPTION
	        Processes escape sequences within the specified text. The recognized escape sequences are like the
		C-language ones : \b (backspace), \f (form feed), \r (carriage return), \n (newline), \t (tab).
		All other characters prefixed by "\" are returned as is.

	    PARAMETERS
	        $text (string) -
	                Text to be unescaped.

	    RETURN VALUE
	        Returns the unescaped value of $text.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  Unescape ( $text )
	   {
		$length 	=  strlen ( $text ) ;
		$result 	=  '' ;
		$ord0		=  ord ( 0 ) ;

		for  ( $i = 0 ; $i  <  $length ; $i ++ )
		   {
		   	$ch 	=  $text [$i] ;

			if  ( $ch  ==  '\\'  &&  isset ( $text [$i+1] ) )
			   {
				$nch 	=  $text [++$i] ;

				switch  ( $nch )
				   {
				   	case 	'b' 	:  $result .=  "\b" ; break ;
				   	case 	't' 	:  $result .=  "\t" ; break ;
				   	case 	'f' 	:  $result .=  "\f" ; break ;
				   	case 	'r' 	:  $result .=  "\r" ; break ;
				   	case 	'n' 	:  $result .=  "\n" ; break ;
				   	default 	:
						// Octal escape notation
						if  ( $nch  >=  '0'  &&  $nch  <=  '7' )
						   {
							$ord		=  ord ( $nch ) - $ord0 ;
							$digits		=  1 ;
							$i ++ ;

							while  ( $i  <  $length  &&  $digits  <  3  &&  $text [$i]  >=  '0'  &&  $text [$i]  <=  '7' )
							   {
								$ord	=  ( $ord * 8 ) + ord ( $text [$i] ) - $ord0 ;
								$i ++ ;
								$digits ++ ;
							    }

							$i -- ;		// Count one character less since $i will be incremented at the end of the for() loop

							$result .= chr ( $ord ) ;
						    }
						else
							$result .=  $nch ;
				    }
			    }
			else
				$result 	.=  $ch ;
		    }

		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        UnescapeHexCharacters - Unescapes characters in the #xy notation.

	    PROTOTYPE
	        $result		=  $this -> UnescapeHexCharacters ( $data ) ;

	    DESCRIPTION
		Some specifications contain hex characters specified as #xy. For the moment, I have met such a construct in
		font aliases such as :
			/C2#5F0 25 0 R
		where "#5F" stands for "_", giving :
			/C2_0 25 0 R
		Hope that such constructs do not happen in other places...

	    PARAMETERS
	        $data (string) -
	                String to be unescaped.

	    RETURN VALUE
	        The input string with all the hex character representations replaced with their ascii equivalent.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  UnescapeHexCharacters ( $data )
	   {
		if  ( strpos ( $data, 'stream' )  ===  false  &&  preg_match ( '/(?P<hex> \# [0-9a-f] [0-9a-f])/ix', $data ) )
		    {
			preg_match_all ( '/(?P<hex> \# [0-9a-f] [0-9a-f])/ix', $data, $matches ) ;

			$searches		=   array ( ) ;
			$replacements		=   array ( ) ;

			foreach  ( $matches [ 'hex' ]  as  $hex )
			   {
				if  ( ! isset ( $searches [ $hex ] ) )
				   {
					$searches [ $hex ]	=  $hex ;
					$replacements []	=  chr ( hexdec ( substr ( $hex, 1 ) ) ) ;
				    }

				$data	=  str_replace ( $searches, $replacements, $data ) ;
			    }
		     }

		return ( $data ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    ValidatePhpName -
		Checks that the specified name (declared in the XML template) is a valid PHP name.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function ValidatePhpName ( $name )
	   {
		$name	=  trim ( $name ) ;

		if  ( ! preg_match ( '/^ [a-z_][a-z0-9_]* $/ix', $name ) )
			error ( new PdfToTextFormException ( "Invalid PHP name \"$name\"." ) ) ;

		return ( $name ) ;
	    }
	}


/*==============================================================================================================

    PdfToText class -
	A class for extracting text from Pdf files.

 ==============================================================================================================*/
class  PdfToText 	extends PdfObjectBase
   {
	// Current version of the class
	const		VERSION					=  "1.6.7" ;

	// Pdf processing options
	const		PDFOPT_NONE				=  0x00000000 ;		// No extra option
	const		PDFOPT_REPEAT_SEPARATOR			=  0x00000001 ;		// Repeats the Separator property if the offset between two text blocks (in array notation)
											// is greater than $this -> MinSpaceWidth
	const		PDFOPT_GET_IMAGE_DATA			=  0x00000002 ;		// Retrieve raw image data in the $ths -> ImageData array
	const		PDFOPT_DECODE_IMAGE_DATA		=  0x00000004 ;		// Creates a jpeg resource for each image
	const		PDFOPT_IGNORE_TEXT_LEADING		=  0x00000008 ;		// Ignore text leading values
	const		PDFOPT_NO_HYPHENATED_WORDS		=  0x00000010 ;		// Join hyphenated words that are split on two lines
	const		PDFOPT_AUTOSAVE_IMAGES			=  0x00000020 ;		// Autosave images ; the ImageFileTemplate property will need to be defined
	const		PDFOPT_ENFORCE_EXECUTION_TIME		=  0x00000040 ;		// Enforces the max_execution_time PHP setting when processing a file. A PdfTexterTimeoutException
											// will be thrown if processing of a single file reaches (time_limit - 1 second) by default
											// The MaxExecutionTime property can be set to modify this default value.
	const		PDFOPT_ENFORCE_GLOBAL_EXECUTION_TIME	=  0x00000080 ;		// Same as PDFOPT_ENFORCE_EXECUTION_TIME, but for all calls to the Load() method of the PdfToText class
											// The MaxGlobalExecutionTime static property can be set to modify the default time limit
	const		PDFOPT_IGNORE_HEADERS_AND_FOOTERS	=  0x00000300 ;		// Ignore headers and footers

	const		PDFOPT_RAW_LAYOUT			=  0x00000000 ;		// Layout rendering : raw (default)
	const		PDFOPT_BASIC_LAYOUT			=  0x00000400 ;		// Layout rendering : basic

	const		PDFOPT_LAYOUT_MASK			=  0x00000C00 ;		// Mask to isolate the targeted layout

	const		PDFOPT_ENHANCED_STATISTICS		=  0x00001000 ;		// Compute statistics on PDF language instructions
	const		PDFOPT_DEBUG_SHOW_COORDINATES		=  0x00002000 ;		// Include text coordinates ; implies the PDFOPT_BASIC_LAYOUT option
											// This option can be useful if you want to use capture areas and get information about
											// their coordinates
	const		PDFOPT_CAPTURE				=  0x00004000 ;		// Indicates that the caller wants to capture some text and use the SetCaptures() method
											// It currently enables the PDFOPT_BASIC_LAYOUT option
	const		PDFOPT_LOOSE_X_CAPTURE			=  0x00008000 ;		// Includes in captures text fragments whose dimensions may exceed the captured area dimensions
	const		PDFOPT_LOOSE_Y_CAPTURE			=  0x00010000 ;		// (currently not used)

	// When boolean true, outputs debug information about fonts, character maps and drawing contents.
	// When integer > 1, outputs additional information about other objects.
	public static 		$DEBUG 			=  false ;

	// Current filename
	public 			$Filename 			=  false ;
	// Extracted text
	public			$Text				=  '' ;
	// Document pages (array of strings)
	public			$Pages				=  array ( ) ;
	// Document images (array of PdfImage objects)
	public			$Images				=  array ( ) ;
	protected		$ImageCount			=  0 ;
	// Raw data for document images
	public			$ImageData			=  array ( ) ;
	// ImageAutoSaveFileTemplate :
	//	Template for the file names to be generated when extracting images, if the PDFOPT_AUTOSAVE_IMAGES has been specified.
	//	Can contain any path, plus the following printf()-like modifiers :
	//	. "%p" : Path of the original PDF file.
	//	. "%f" : Filename part of the original PDF file.
	//	. "%d" : A sequential number, starting from 1, used when generating filenames. The format can contains a width specifier,
	//		 such as "%3d", which will generate 3-digits sequential numbers left-filled with zeroes.
	//	. "%s" : Image suffix, which will automatically based on the underlying image type.
	public			$ImageAutoSaveFileTemplate	=   "%p/%f.%d.%s" ;
	// Auto-save image file format
	public			$ImageAutoSaveFormat		=  IMG_JPEG ;
	// Auto-saved image file names
	public			$AutoSavedImageFiles		=  array ( ) ;
	// Text chunk separator (used to separate blocks of text specified as an array notation)
	public			$BlockSeparator			=  '' ;
	// Separator used to separate text groups where the offset value is less than -1000 thousands of character units
	// (eg : [(1)-1822(2)] will add a separator between the characters "1" and "2")
	// Note that such values are expressed in thousands of text units and subtracted from the current position. A
	// negative value means adding more space between the two text units it separates.
	public			$Separator			=  ' ' ;
	// Separator to be used between pages in the $Text property
	public			$PageSeparator			=  "\n" ;
	// Minimum value (in 1/1000 of text units) that separates two text chunks that can be considered as a real space
	public			$MinSpaceWidth			=  200 ;
	// Pdf options
	public			$Options			=  self::PDFOPT_NONE ;
	// Maximum number of pages to extract from the PDF. A zero value means "extract everything"
	// If this number is negative, then the pages to be extract start from the last page. For example, a value of -2
	// extracts the last two pages
	public			$MaxSelectedPages		=  false ;
	// Maximum number of images to be extracted. A value of zero means "extract everything". A non-zero value gives
	// the number of images to extract.
	public			$MaxExtractedImages		=  false ;
	// Location of the CID tables directory
	public static		$CIDTablesDirectory ;
	// Loacation of the Font metrics directory, for the Adobe standard 14 fonts
	public static		$FontMetricsDirectory ;
	// Standard Adobe font names, and their corresponding file in $FontMetricsDirectory
	public static		$AdobeStandardFontMetrics	=  array
	   (
		'courier'		=>  'courier.fm',
		'courier-bold'		=>  'courierb.fm',
		'courier-oblique'	=>  'courieri.fm',
		'courier-boldoblique'	=>  'courierbi.fm',
		'helvetica'		=>  'helvetica.fm',
		'helvetica-bold'	=>  'helveticab.fm',
		'helvetica-oblique'	=>  'helveticai.fm',
		'helvetica-boldoblique'	=>  'helveticabi.fm',
		'symbol'		=>  'symbol.fm',
		'times-roman'		=>  'times.fm',
		'times-bold'		=>  'timesb.fm',
		'times-bolditalic'	=>  'timesbi.fm',
		'times-italic'		=>  'timesi.fm',
		'zapfdingbats'		=>  'zapfdingbats.fm'
	    ) ;
	// Author information
	public			$Author				=  '' ;
	public			$CreatorApplication		=  '' ;
	public			$ProducerApplication		=  '' ;
	public			$CreationDate			=  '' ;
	public			$ModificationDate		=  '' ;
	public			$Title				=  '' ;
	public			$Subject			=  '' ;
	public			$Keywords			=  '' ;
	protected		$GotAuthorInformation		=  false ;
	// Unique and arbitrary file identifier, as specified in the PDF file
	// Well, in fact, there are two IDs, but the PDF specification does not mention the goal of the second one
	public			$ID				=  '' ;
	public			$ID2				=  '' ;
	// End of line string
	public			$EOL				=  PHP_EOL ;
	// String to be used when no Unicode translation is possible
	public static		$Utf8Placeholder		=  '' ;
	// Information about memory consumption implied by the file currently being loaded
	public			$MemoryUsage,
				$MemoryPeakUsage ;
	// Offset of the document start (%PDF-x.y)
	public			$DocumentStartOffset ;
	// Debug statistics
	public		$Statistics			=  array ( ) ;
	// Max execution time settings. A positive value means "don't exceed that number of seconds".
	// A negative value means "Don't exceed PHP setting max_execution_time - that number of seconds". If the result
	// is negative, then the default will be "max_execution_time - 1".
	// For those limits to be enforced, you need to specify either the PDFOPT_ENFORCE_EXECUTION_TIME or
	// PDFOPT_ENFORCE_GLOBAL_EXECUTION_TIME options, or both
	public			$MaxExecutionTime		=  -1 ;
	public static		$MaxGlobalExecutionTime		=  -1 ;
	// This property is expressed in percents ; it gives the extra percentage to add to the values computed by
	// the PdfTexterFont::GetStringWidth() method.
	// This is basically used when computing text positions and string lengths with the PDFOPT_BASIC_LAYOUT option :
	// the computed string length is shorter than its actual length (because of extra spacing determined by character
	// kerning in the font data). To determine whether two consecutive blocks of text should be separated by a space,
	// we empirically add this extra percentage to the computed string length. The default is -5%.
	public			$ExtraTextWidth			=  -5 ;

	// Marker stuff. The unprocessed marker list is a sequential array of markers, which will later be dispatched into
	// indexed arrays during their first reference
	protected		$UnprocessedMarkerList		=  array ( 'font' => array ( ) ) ;
	protected		$TextWithFontMarkers		=  array ( ) ;

	// Internal variables used when the PDFOPT_ENFORCE_* options are specified
	protected static	$PhpMaxExecutionTime ;
	protected static	$GlobalExecutionStartTime ;
	protected static	$AllowedGlobalExecutionTime ;
	protected		$ExecutionStartTime ;
	protected		$AllowedExecutionTime ;

	// Font mappings
	protected 		$FontTable			=  false ;
	// Extra Adobe standard font mappings (for character names of the form "/axxx" for example)
	protected		$AdobeExtraMappings		=  array ( ) ;
	// Page map object
	protected		$PageMap ;
	// Page locations (start and end offsets)
	protected		$PageLocations ;
	// Encryption data
	public			$IsEncrypted			=  false ;
	protected		$EncryptionData			=  false ;
	// A flag coming from the constructor options, telling if enhanced statistics are enabled
	protected		$EnhancedStatistics ;

	// Document text fragments, with their absolute (x,y) position, approximate width and height
	protected		$DocumentFragments ;

	// Form data
	protected		$FormData ;
	protected		$FormDataObjectNumbers ;
	protected		$FormDataDefinitions ;
	protected		$FormaDataObjects ;

	// Capture data
	public			$CaptureDefinitions ;
	protected		$CaptureObject ;

	// Indicates whether global static initializations have been made
	// This is mainly used for variables such as $Utf8PlaceHolder, which is initialized to a different value
	private static		$StaticInitialized		=  false ;

	// Drawing instructions that are to be ignored and removed from a text stream before processing, for performance
	// reasons (it is faster to call preg_replace() once to remove them than calling the __next_instruction() and
	// __next_token() methods to process an input stream containing such useless instructions)
	// This is an array of regular expressions where the following constructs are replaced at runtime during static
	// initialization :
	// %n - Will be replaced with a regex matching a decimal number.
	private static		$IgnoredInstructionTemplatesLayout	=  array
	   (
		'%n{6} ( (c) ) \s+',
		'%n{4} ( (re) | (y) | (v) | (k) | (K) ) \s+',
		'%n{3} ( (scn) | (SCN) | (r) | (rg) | (RG) | (sc) | (SC) ) \s+',
		'%n{2} ( (m) | (l) ) \s+',
		'%n ( (w) | (M) | (g) | (G) | (J) | (j) | (d) | (i) | (sc) | (SC) | (Tc) | (Tw) | (scn) | (Tr) | (Tz) | (Ts) ) \s+',
		'\b ( (BDC) | (EMC) ) \s+',
		'\/( (Cs \d+) | (CS \d+) | (G[Ss] \d+) | (Fm \d+) | (Im \d+) | (PlacedGraphic) ) \s+ \w+ \s*',
		'\/( (Span) | (Artifact) | (Figure) | (P) ) \s* << .*? >> [ \t\r\n>]*',
		'\/ ( (PlacedGraphic) | (Artifact) ) \s+',
		'\d+ \s+ ( (scn) | (SCN) )',
		'\/MC \d+ \s+',
		 '^ \s* [fhS] \r? \n',
		 '^W \s+ n \r? \n',
		 '(f | W) \* \s+',
		 '^[fhnS] \s+',
		 '-?0 (\. \d+)? \s+ T[cw]',
		 '\bBI \s+ .*? \bID \s+ .*? \bEI',
		 '\/ \w+ \s+ ( (cs) | (CS) | (ri) | (gs) )',
		 // Hazardous replaces ?
		 '( [Ww] \s+ ){3,}',
		 ' \[\] \s+ [Shs] \s+'
	    ) ;
	// Additional instructions to be stripped when no particular page layout has been requested
	private static		$IgnoredInstructionTemplatesNoLayout	=  array
	   (
		'%n{6} ( (cm) ) \s+',
//		'\b ( (BT) | (ET) ) \s+',
		 '^ \s* [Qq] \r? \n',
		 '^ \s* (\b [a-zA-Z] \s+)+',
		 '\s* (\b [a-zA-Z] \s+)+$',
		 '^[qQ] \s+',
		 '^q \s+ [hfS] \n',
		 '( [Qfhnq] \s+ ){2,}'
	    ) ;
	// Replacement regular expressions for %something constructs specified in the $IgnoredInstructions array
	private static		$ReplacementConstructs		=  array
	    (
		'%n'	=>  '( [+\-]? ( ( [0-9]+ ( \. [0-9]* )? ) | ( \. [0-9]+ ) ) \s+ )'
	     ) ;
	// The final regexes that are built during static initialization by the __build_ignored_instructions() method
	private static		$IgnoredInstructionsNoLayout	=  array ( ) ;
	private static		$IgnoredInstructionsLayout	=  array ( ) ;
	private			$IgnoredInstructions		=  array ( ) ;

	// Map id buffer - for avoiding unneccesary calls to GetFontByMapId
	private			$MapIdBuffer			=  array ( ) ;

	// Same for MapCharacter()
	private			$CharacterMapBuffer		=  array ( ) ;

	// Font objects buffer - used by __assemble_text_fragments()
	private			$FontObjectsBuffer		=  array ( ) ;

	// Regex used for removing hyphens - we have to take care of different line endings : "\n" for Unix, "\r\n"
	// for Windows, and "\r" for pure Mac files.
	// Note that we replace an hyphen followed by an end-of-line then by non-space characters with the non-space
	// characters, so the word gets joined on the same line. Spaces after the end of the word (on the next line)
	// are removed, in order for the next word to appear at the beginning of the second line.
	private static		$RemoveHyphensRegex		=  '#
									(
										  -
										  [ \t]* ( (\r\n) | \n | \r )+ [ \t\r\n]*
									 )
									([^ \t\r\n]+)
									\s*
								    #msx' ;

	// A small list of Unicode character ranges that are related to languages written from right to left
	// For performance reasons, everythings is mapped to a range here, even if it includes codepoints that do not map to anything
	// (this class is not a Unicode codepoint validator, but a Pdf text extractor...)
	// The UTF-16 version is given as comments ; only the UTF-8 translation is used here
	// To be completed !
	private static		$RtlCharacters			=  array
	   (
		// This range represents the following languages :
		// - Hebrew			(0590..05FF)
		// - Arabic			(0600..06FF)
		// - Syriac			(0700..074F)
		// - Supplement for Arabic	(0750..077F)
		// - Thaana			(0780..07BF)
		// - N'ko			(07C0..07FF)
		// - Samaritan			(0800..083F)
		// - Mandaic			(0840..085F)
		//	array ( 0x00590, 0x0085F ),
		// Hebrew supplement (I suppose ?) + other characters
		//	array ( 0x0FB1D, 0x0FEFC ),
		// Mende kikakui
		//	array ( 0x1E800, 0x1E8DF ),
		// Adlam
		//	array ( 0x1E900, 0x1E95F ),
		// Others
		//	 array ( 0x10800, 0x10C48 ),
		//	 array ( 0x1EE00, 0x1EEBB )
		"\xD6"		=>  array ( array ( "\x90", "\xBF" ) ),
		"\xD7"		=>  array ( array ( "\x80", "\xBF" ) ),
		"\xD8"		=>  array ( array ( "\x80", "\xBF" ) ),
		"\xD9"		=>  array ( array ( "\x80", "\xBF" ) ),
		"\xDA"		=>  array ( array ( "\x80", "\xBF" ) ),
		"\xDB"		=>  array ( array ( "\x80", "\xBF" ) ),
		"\xDC"		=>  array ( array ( "\x80", "\xBF" ) ),
		"\xDD"		=>  array ( array ( "\x80", "\xBF" ) ),
		"\xDE"		=>  array ( array ( "\x80", "\xBF" ) ),
		"\xDF"		=>  array ( array ( "\x80", "\xBF" ) )
		/*
		"\xE0"		=>  array
		   (
			array ( "\xA0\x80", "\xA0\xBF" ),
			array ( "\xA1\x80", "\xA1\x9F" )
		    ),
		"\xEF"		=>  array
		   (
			array ( "\xAC\x9D", "\xAC\xBF" ),
			array ( "\xAD\x80", "\xAD\xBF" ),
			array ( "\xAE\x80", "\xAE\xBF" ),
			array ( "\xAF\x80", "\xAF\xBF" ),
			array ( "\xB0\x80", "\xB0\xBF" ),
			array ( "\xB1\x80", "\xB1\xBF" ),
			array ( "\xB2\x80", "\xB2\xBF" ),
			array ( "\xB3\x80", "\xB3\xBF" ),
			array ( "\xB4\x80", "\xB4\xBF" ),
			array ( "\xB5\x80", "\xB5\xBF" ),
			array ( "\xB6\x80", "\xB6\xBF" ),
			array ( "\xB7\x80", "\xB7\xBF" ),
			array ( "\xB8\x80", "\xB8\xBF" ),
			array ( "\xB9\x80", "\xB9\xBF" ),
			array ( "\xBA\x80", "\xBA\xBF" ),
			array ( "\xBB\x80", "\xBB\xBC" )
		    )
		    */
	    ) ;

	// UTF-8 prefixes for RTL characters as keys, and number of characters that must follow the prefix as values
	private static		$RtlCharacterPrefixLengths	=  array
	   (
		"\xD6"		=>  1,
		"\xD7"		=>  1,
		"\xD8"		=>  1,
		"\xD9"		=>  1,
		"\xDA"		=>  1,
		"\xDB"		=>  1,
		"\xDC"		=>  1,
		"\xDE"		=>  1,
		"\xDF"		=>  1
		/*
		"\xE0"		=>  2,
		"\xEF"		=>  2
		*/
	    ) ;

	// A string that contains all the RTL character prefixes above
	private static		$RtlCharacterPrefixes ;

	// As usual, caching a little bit the results of the IsRtlCharacter() method is welcome. Each item will have the value true if the
	// character is RTL, or false if LTR.
	private			$RtlCharacterBuffer		=  array ( ) ;

	// A subset of a character classification array that avoids too many calls to the ctype_* functions or too many
	// character comparisons.
	// This array is used only for highly sollicited parts of code
	const	CTYPE_ALPHA		=  0x01 ;		// Letter
	const	CTYPE_DIGIT		=  0x02 ;		// Digit
	const	CTYPE_XDIGIT		=  0x04 ;		// Hex digit
	const	CTYPE_ALNUM		=  0x08 ;		// Letter or digit
	const	CTYPE_LOWER		=  0x10 ;		// Lower- or upper-case letters
	const	CTYPE_UPPER		=  0x20 ;

	private static		$CharacterClasses		=  false ;

	// Stuff specific to the current PHP version
	private static		$HasMemoryGetUsage ;
	private static		$HasMemoryGetPeakUsage ;


	/*--------------------------------------------------------------------------------------------------------------

	    CONSTRUCTOR
	        $pdf	=  new PdfToText ( $filename = null, $options = PDFOPT_NONE ) ;

	    DESCRIPTION
	        Builds a PdfToText object and optionally loads the specified file's contents.

	    PARAMETERS
	        $filename (string) -
	                Optional PDF filename whose text contents are to be extracted.

		$options (integer) -
			A combination of PDFOPT_* flags. This can be any of the following :

			- PDFOPT_REPEAT_SEPARATOR :
				Text constructs specified as an array are separated by an offset which is expressed as
				thousands of text units ; for example :

					[(1)-2000(2)]

				will be rendered as the text "1  2" ("1" and "2" being separated by two spaces) if the
				"Separator" property is set to a space (the default) and this flag is specified.
				When not specified, the text will be rendered as "1 2".

			- PDFOPT_NONE :
				None of the above options will apply.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $filename = null, $options = self::PDFOPT_NONE, $user_password = false, $owner_password = false )
	   {
		// We need the mbstring PHP extension here...
		if  ( ! function_exists ( 'mb_convert_encoding' ) )
			error ( "You must enable the mbstring PHP extension to use this class." ) ;

		// Perform static initializations if needed
		if  ( ! self::$StaticInitialized )
		   {
			if  ( self::$DEBUG )
			   {
				// In debug mode, initialize the utf8 placeholder only if it still set to its default value, the empty string
				if  ( self::$Utf8Placeholder  ==  '' )
					self::$Utf8Placeholder	=  '[Unknown character : 0x%08X]' ;
			    }

			// Build the list of regular expressions from the list of ignored instruction templates
			self::__build_ignored_instructions (  ) ;

			// Check if some functions are supported or not
			self::$HasMemoryGetUsage	=  function_exists ( 'memory_get_usage' ) ;
			self::$HasMemoryGetPeakUsage	=  function_exists ( 'memory_get_peak_usage' ) ;

			// Location of the directory containing CID fonts
			self::$CIDTablesDirectory	=  dirname ( __FILE__ ) . DIRECTORY_SEPARATOR . 'CIDTables' ;
			self::$FontMetricsDirectory	=  dirname ( __FILE__ ) . DIRECTORY_SEPARATOR . 'FontMetrics' ;

			// The string that contains all the Rtl character prefixes in UTF-8 - An optimization used by the __rtl_process() method
			self::$RtlCharacterPrefixes	=  implode ( '', array_keys ( self::$RtlCharacterPrefixLengths ) ) ;

			// Build the character classes (used only for testing letters and digits)
			if  ( self::$CharacterClasses  ===  false )
			   {
				for  ( $ord = 0 ; $ord  <  256 ; $ord ++ )
				   {
					$ch	=  chr ( $ord ) ;

					if  ( $ch  >=  '0'  &&  $ch  <=  '9' )
						self::$CharacterClasses [ $ch ]		=  self::CTYPE_DIGIT | self::CTYPE_XDIGIT | self::CTYPE_ALNUM ;
					else if  ( $ch  >=  'A'  &&  $ch  <=  'Z' )
					   {
						self::$CharacterClasses [ $ch ]		=  self::CTYPE_ALPHA | self::CTYPE_UPPER | self::CTYPE_ALNUM ;

						if  ( $ch  <=  'F' )
							self::$CharacterClasses [ $ch ]		|=  self::CTYPE_XDIGIT ;
					    }
					else if  ( $ch  >=  'a'  &&  $ch  <=  'z' )
					   {
						self::$CharacterClasses [ $ch ]		=  self::CTYPE_ALPHA | self::CTYPE_LOWER | self::CTYPE_ALNUM ;

						if  ( $ch  <=  'f' )
							self::$CharacterClasses [ $ch ]		|=  self::CTYPE_XDIGIT ;
					    }
					else
						self::$CharacterClasses [ $ch ]		=  0 ;
				    }
			    }

			// Global execution time limit
			self::$PhpMaxExecutionTime	=  ( integer ) ini_get ( 'max_execution_time' ) ;

			if  ( ! self::$PhpMaxExecutionTime )					// Paranoia : default max script execution time to 120 seconds
				self::$PhpMaxExecutionTime	=  120 ;

			self::$GlobalExecutionStartTime		=  microtime ( true ) ;		// Set the start of the first execution

			if  ( self::$MaxGlobalExecutionTime  >  0 )
				self::$AllowedGlobalExecutionTime	=  self::$MaxGlobalExecutionTime ;
			else
				self::$AllowedGlobalExecutionTime	=  self::$PhpMaxExecutionTime + self::$MaxGlobalExecutionTime ;

			// Adjust in case of inconsistent values
			if  ( self::$AllowedGlobalExecutionTime  <  0  ||  self::$AllowedGlobalExecutionTime  >  self::$PhpMaxExecutionTime )
				self::$AllowedGlobalExecutionTime	=  self::$PhpMaxExecutionTime - 1 ;

			self::$StaticInitialized	=  true ;
		    }

		parent::__construct ( ) ;

		$this -> Options		=  $options ;

		if  ( $filename )
			$this -> Load ( $filename, $user_password, $owner_password ) ;
	    }


	public function  __tostring ( )
	   { return ( $this -> Text ) ; }


	/**************************************************************************************************************
	 **************************************************************************************************************
	 **************************************************************************************************************
	 ******                                                                                                  ******
	 ******                                                                                                  ******
	 ******                                          PUBLIC METHODS                                          ******
	 ******                                                                                                  ******
	 ******                                                                                                  ******
	 **************************************************************************************************************
	 **************************************************************************************************************
	 **************************************************************************************************************/

	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        Load		- Loads text contents from a PDF file.
		LoadFromString	- Loads PDF contents from a string.

	    PROTOTYPE
	        $text	=  $pdf -> Load ( $filename, $user_password = false, $owner_password = false ) ;
	        $text	=  $pdf -> LoadFromString ( $contents, $user_password = false, $owner_password = false ) ;

	    DESCRIPTION
	        The Load() method extracts text contents from the specified PDF file. Once processed, text contents will
		be available through the "Text" property.
		The LoadFromString() method performs the same operation on PDF contents already loaded into memory.

	    PARAMETERS
	        $filename (string) -
	                Optional PDF filename whose text contents are to be extracted.

		$contents (string) -
			String containing PDF contents.

		$user_password (string) -
			User password used for decrypting PDF contents.

		$owner_password (string) -
			Owner password.

	 *-------------------------------------------------------------------------------------------------------------*/
	private		$__memory_peak_usage_start,
			$__memory_usage_start ;

	public function  Load  ( $filename, $user_password = false, $owner_password = false )
	   {
		$this -> __memory_usage_start		=  ( self::$HasMemoryGetUsage     ) ?  memory_get_usage      ( true ) : 0 ;
		$this -> __memory_peak_usage_start	=  ( self::$HasMemoryGetPeakUsage ) ?  memory_get_peak_usage ( true ) : 0 ;

		// Check if the file exists, but only if the file is on a local filesystem
		if  ( ! preg_match ( '#^ [^:]+ ://#ix', $filename )  && ! file_exists ( $filename ) )
			error ( new  PdfToTextDecodingException ( "File \"$filename\" does not exist." ) ) ;

		// Load its contents
		$contents 	=  @file_get_contents ( $filename, FILE_BINARY ) ;

		if  ( $contents  ===  false )
			error ( new  PdfToTextDecodingException ( "Unable to open \"$filename\"." ) ) ;

		return ( $this -> __load ( $filename, $contents, $user_password, $owner_password ) ) ;
	    }


	public function  LoadFromString ( $contents, $user_password = false, $owner_password = false )
	   {
		$this -> __memory_usage_start		=  ( self::$HasMemoryGetUsage     ) ?  memory_get_usage      ( true ) : 0 ;
		$this -> __memory_peak_usage_start	=  ( self::$HasMemoryGetPeakUsage ) ?  memory_get_peak_usage ( true ) : 0 ;

		return ( $this -> __load ( '', $contents, $user_password, $owner_password ) ) ;
	    }


	private function  __load ( $filename, $contents, $user_password = false, $owner_password = false )
	   {
		// Search for the start of the document ("%PDF-x.y")
		$start_offset	=  strpos ( $contents, '%PDF' ) ;

		if  ( $start_offset  ===  false )		// Not a pdf document !
			error ( new PdfToTextDecodingException ( "File \"$filename\" is not a valid PDF file." ) ) ;
		else						// May be a PDF document
			$this -> DocumentStartOffset	=  $start_offset ;

		// Check that this is a PDF file with a valid version number
		if  ( ! preg_match ( '/ %PDF- (?P<version> \d+ (\. \d+)*) /ix', $contents, $match, 0, $start_offset ) )
			error ( new PdfToTextDecodingException ( "File \"$filename\" is not a valid PDF file." ) ) ;

		$this -> PdfVersion 		=  $match [ 'version' ] ;

		// Initializations
		$this -> Text 				=  '' ;
		$this -> FontTable 			=  new PdfTexterFontTable ( ) ;
		$this -> Filename 			=  realpath ( $filename ) ;
		$this -> Pages				=  array ( ) ;
		$this -> Images				=  array ( ) ;
		$this -> ImageData			=  array ( ) ;
		$this -> ImageCount			=  0 ;
		$this -> AutoSavedImageFiles		=  array ( ) ;
		$this -> PageMap			=  new PdfTexterPageMap ( ) ;
		$this -> PageLocations			=  array ( ) ;
		$this -> Author				=  '' ;
		$this -> CreatorApplication		=  '' ;
		$this -> ProducerApplication		=  '' ;
		$this -> CreationDate			=  '' ;
		$this -> ModificationDate		=  '' ;
		$this -> Title				=  '' ;
		$this -> Subject			=  '' ;
		$this -> Keywords			=  '' ;
		$this -> GotAuthorInformation		=  false ;
		$this -> ID				=  '' ;
		$this -> ID2				=  '' ;
		$this -> EncryptionData			=  false ;
		$this -> EnhancedStatistics		=  ( ( $this -> Options  &  self::PDFOPT_ENHANCED_STATISTICS )  !=  0 ) ;

		// Also reset cached information that may come from previous runs
		$this -> MapIdBuffer			=  array ( ) ;
		$this -> RtlCharacterBuffer		=  array ( ) ;
		$this -> CharacterMapBuffer		=  array ( ) ;
		$this -> FontObjectsBuffer		=  array ( ) ;
		$this -> FormData			=  array ( ) ;
		$this -> FormDataObjectNumbers		=  false ;
		$this -> FomDataDefinitions		=  array ( ) ;
		$this -> FormDataObjects		=  array ( ) ;
		$this -> CaptureDefinitions		=  false ;
		$this -> CaptureObject			=  false ;
		$this -> DocumentFragments		=  array ( ) ;

		// Enable the PDFOPT_BASIC_LAYOUT option if the PDFOPT_CAPTURE flag is specified
		if  ( $this -> Options & self::PDFOPT_CAPTURE )
			$this -> Options	|=  self::PDFOPT_BASIC_LAYOUT ;

		// Enable the PDFOPT_BASIC_LAYOUT_OPTION is PDFOPT_DEBUG_SHOW_COORDINATES is specified
		if  ( $this -> Options & self::PDFOPT_DEBUG_SHOW_COORDINATES )
			$this -> Options	|=  self::PDFOPT_BASIC_LAYOUT ;

		// Page layout options needs more instructions to be retained - select the appropriate list of useless instructions
		if  ( $this -> Options  &  self::PDFOPT_BASIC_LAYOUT )
			$this -> IgnoredInstructions	=  self::$IgnoredInstructionsLayout ;
		else
			$this -> IgnoredInstructions	=  self::$IgnoredInstructionsNoLayout ;


		// Debug statistics
		$this -> Statistics			=  array
		   (
			'TextSize'			=>  0,				// Total size of drawing instructions ("text" objects)
			'OptimizedTextSize'		=>  0,				// Optimized text size, with useless instructions removed
			'Distributions'			=>  array			// Statistics about handled instructions distribution - Works only with the page layout option in debug mode
			   (
				'operand'	=>  0,
				'Tm'		=>  0,
				'Td'		=>  0,
				'TD'		=>  0,
				"'"		=>  0,
				'TJ'		=>  0,
				'Tj'		=>  0,
				'Tf'		=>  0,
				'TL'		=>  0,
				'T*'		=>  0,
				'('		=>  0,
				'<'		=>  0,
				'['		=>  0,
				'cm'		=>  0,
				'BT'		=>  0,
				'template'	=>  0,
				'ignored'	=>  0,
				'space'		=>  0
			    )
		    ) ;

		// Per-instance execution time limit
		$this -> ExecutionStartTime		=  microtime ( true ) ;

		if  ( $this -> MaxExecutionTime  >  0 )
			$this -> AllowedExecutionTime		=  $this -> MaxExecutionTime ;
		else
			$this -> AllowedExecutionTime		=  self::$PhpMaxExecutionTime + $this -> MaxExecutionTime ;

		// Adjust in case of inconsistent values
		if  ( $this -> AllowedExecutionTime  <  0  ||  $this -> AllowedExecutionTime  >  self::$PhpMaxExecutionTime )
			$this -> AllowedExecutionTime		=  self::$PhpMaxExecutionTime - 1 ;

		// Systematically set the DECODE_IMAGE_DATA flag if the AUTOSAVE_IMAGES flag has been specified
		if  ( $this -> Options  &  self::PDFOPT_AUTOSAVE_IMAGES )
			$this -> Options	|=  self::PDFOPT_DECODE_IMAGE_DATA ;

		// Systematically set the GET_IMAGE_DATA flag if DECODE_IMAGE_DATA is specified (debug mode only)
		if  ( self::$DEBUG  &&  $this -> Options  &  self::PDFOPT_DECODE_IMAGE_DATA )
			$this -> Options	|=  self::PDFOPT_GET_IMAGE_DATA ;

		// Since page layout options take 2 bits, but not all of the 4 possible values are allowed, make sure that an invalid
		// value will default to PDFOPT_RAW_LAYOUT value
		$layout_option		=  $this -> Options & self::PDFOPT_LAYOUT_MASK ;

		if  ( ! $layout_option  ===  self::PDFOPT_RAW_LAYOUT  &&  $layout_option  !==  self::PDFOPT_BASIC_LAYOUT )
		   {
			$layout_option		=  self::PDFOPT_RAW_LAYOUT ;
			$this -> Options	=  ( $this -> Options & ~self::PDFOPT_LAYOUT_MASK ) | self::PDFOPT_RAW_LAYOUT ;
		    }

		// Author information needs to be processed after, because it may reference objects that occur later in the PDF stream
		$author_information_object_id		=  false ;

		// Extract pdf objects that are enclosed by the "obj" and "endobj" keywords
		$pdf_objects		=  array ( ) ;
		$contents_offset	=  $this -> DocumentStartOffset ;
		$contents_length	=  strlen ( $contents ) ;


		while  ( $contents_offset  <  $contents_length  &&
				preg_match ( '/(?P<re> (?P<object_id> \d+) \s+ \d+ \s+ obj (?P<object> .*?) endobj )/imsx', $contents, $match, PREG_OFFSET_CAPTURE, $contents_offset ) )
		   {
			$object_number		=  $match [ 'object_id' ] [0] ;
			$object_data		=  $match [ 'object' ] [0] ;

			// Handle the special case of object streams (compound objects)
			// They are not added in the $pdf_objects array, because they could be mistakenly processed as relevant information,
			// such as font definitions, etc.
			// Instead, only the objects they are embedding are stored in this array.
			if  ( $this -> IsObjectStream ( $object_data ) )
			   {
				// Ignore ill-formed object streams
				if  ( ( $object_stream_matches = $this -> DecodeObjectStream ( $object_number, $object_data ) )  !==  false )
				   {
					// Add this list of objects to the list of known objects
					for  ( $j = 0, $object_stream_count = count ( $object_stream_matches [ 'object_id' ] ) ; $j  <  $object_stream_count ; $j ++ )
						$pdf_objects [ $object_stream_matches [ 'object_id' ] [$j] ]	=  $object_stream_matches [ 'object' ] [$j] ;
				    }
			    }
			// Normal (non-compound) object
			else
				$pdf_objects [ $object_number ]	=  $object_data ;

			// Update current offset through PDF contents
			$contents_offset	=  $match [ 're' ] [1] + strlen ( $match [ 're' ] [0] ) ;
		    }

		// We put a particular attention in treating errors returned by preg_match_all() here, since we need to be really sure why stopped
		// to find further PDF objects in the supplied contents
		$preg_error	=  preg_last_error ( ) ;

		switch  ( $preg_error )
		   {
			case  PREG_NO_ERROR :
				break ;

			case  PREG_INTERNAL_ERROR :
				error ( new PdfToTextDecodingException ( "PDF object extraction : the preg_match_all() function encountered an internal error." ) ) ;

			case  PREG_BACKTRACK_LIMIT_ERROR :
				error ( new PdfToTextDecodingException ( "PDF object extraction : backtrack limit reached (you may have to modify the pcre.backtrack_limit " .
						"setting of your PHP.ini file, which is currently set to " . ini_get ( 'pcre.backtrack_limit' ) . ")." ) ) ;

			case  PREG_JIT_STACKLIMIT_ERROR :
				error ( new PdfToTextDecodingException ( "PDF object extraction : JIT stack limit reached (you may disable this feature by setting the pcre.jit " .
						"setting of your PHP.ini file to 0)." ) ) ;

			case  PREG_RECURSION_LIMIT_ERROR :
				error ( new PdfToTextDecodingException ( "PDF object extraction : recursion limit reached (you may have to modify the pcre.recursion_limit " .
						"setting of your PHP.ini file, which is currently set to " . ini_get ( 'pcre.recursion_limit' ) . ")." ) ) ;

			case  PREG_BAD_UTF8_ERROR :
				error ( new PdfToTextDecodingException ( "PDF object extraction : bad UTF8 character encountered." ) ) ;

			case PREG_BAD_UTF8_OFFSET_ERROR :
				error ( new PdfToTextDecodingException ( "PDF object extraction : the specified offset does not start at the beginning of a valid UTF8 codepoint." ) ) ;

			default :
				error ( new PdfToTextDecodingException ( "PDF object extraction : unkown PREG error #$preg_error" ) ) ;
		    }


		// Extract trailer information, which may contain the ID of an object specifying encryption flags
		$this -> GetTrailerInformation ( $contents, $pdf_objects ) ;
		unset ( $contents ) ;

		// Character maps encountered so far
		$cmaps			=  array ( ) ;

		// An array that will store object ids as keys and text contents as values
		$text			=  array ( ) ;

		// Loop through the objects
		foreach  ( $pdf_objects  as  $object_number => $object_data )
		   {
			// Some additional objects may be uncovered after processing (in an object containing compacted objects for example)
			// so add them to the list if necessary
			if  ( ! isset ( $pdf_objects [ $object_number ] ) )
				$pdf_objects [ $object_number ]		=  $object_data ;

			// Try to catch information related to page mapping - but don't discard the object since it can contain additional information
			$this -> PageMap -> Peek ( $object_number, $object_data, $pdf_objects ) ;

			// Check if the object contais authoring information - it can appear encoded or unencoded
			if  ( ! $this -> GotAuthorInformation )
				$author_information_object_id	=  $this -> PeekAuthorInformation ( $object_number, $object_data ) ;

			// Also catch the object encoding type
			$type 		=  $this -> GetEncodingType ( $object_number, $object_data ) ;
			$stream_match	=  null ;

			if  ( strpos ( $object_data, 'stream' )  ===  false  ||
					! preg_match ( '#[^/] stream \s+ (?P<stream> .*?) endstream#imsx', $object_data, $stream_match ) )
			   {
				// Some font definitions are in clear text in an object, some are encoded in a stream within the object
				// We process here the unencoded ones
				if  ( $this -> IsFont ( $object_data ) )
				   {
					$this -> FontTable -> Add ( $object_number, $object_data, $pdf_objects, $this -> AdobeExtraMappings ) ;
					continue ;
				    }
				// Some character maps may also be in clear text
				else if  ( $this -> IsCharacterMap ( $object_data ) )
				    {
					$cmap	=  PdfTexterCharacterMap::CreateInstance ( $object_number, $object_data, $this -> AdobeExtraMappings ) ;

					if  ( $cmap )
						$cmaps [] 	=  $cmap ;

					continue ;
				    }
				// Check if there is an association between font number and object number
				else if  ( $this -> IsFontMap ( $object_data ) )
				   {
					$this -> FontTable -> AddFontMap ( $object_number, $object_data ) ;
				    }
				// Retrieve form data if present
				else if  ( $this -> IsFormData ( $object_data ) )
				   {
					$this -> RetrieveFormData ( $object_number, $object_data, $pdf_objects ) ;
				    }
				// Ignore other objects that do not contain an encoded stream
				else
				   {
					if  ( self::$DEBUG  >  1 )
						echo "\n----------------------------------- UNSTREAMED #$object_number\n$object_data" ;

					continue ;
				    }
			    }
			// Extract image data, if any
			else if  ( $this -> IsImage ( $object_data ) )
			   {
				$this -> AddImage ( $object_number, $stream_match [ 'stream' ], $type, $object_data ) ;
				continue ;
			    }
			// Check if there is an association between font number and object number
			else if  ( $this -> IsFontMap ( $object_data ) )
			   {
				$this -> FontTable -> AddFontMap ( $object_number, $object_data ) ;

				if  ( ! $stream_match )
					continue ;
			    }

			// Check if the stream contains data (yes, I have found a sample that had streams of length 0...)
			// In other words : ignore empty streams
			if  ( stripos ( $object_data, '/Length 0' )  !==  false )
				continue ;

			// Isolate stream data and try to find its encoding type
			if  ( isset  ( $stream_match [ 'stream' ] ) )
				$stream_data 		=  ltrim ( $stream_match [ 'stream' ], "\r\n" ) ;
			else
				continue ;

			// Ignore this stream if the object does not contain an encoding type (/FLATEDECODE, /ASCIIHEX or /ASCII85)
			if  ( $type  ==  self::PDF_UNKNOWN_ENCODING )
			   {
				if  ( self::$DEBUG  >  1 )
					echo "\n----------------------------------- UNENCODED #$object_number :\n$object_data" ;

				continue ;
			    }

			// Decode the encoded stream
			$decoded_stream_data 	=  $this -> DecodeData ( $object_number, $stream_data, $type, $object_data ) ;

			// Second chance to peek author information, this time on a decoded stream data
			if  ( ! $this -> GotAuthorInformation )
				$author_information_object_id	=  $this -> PeekAuthorInformation ( $object_number, $decoded_stream_data ) ;

			// Check for character maps
			if  ( $this -> IsCharacterMap ( $decoded_stream_data ) )
			   {
				$cmap	=  PdfTexterCharacterMap::CreateInstance ( $object_number, $decoded_stream_data, $this -> AdobeExtraMappings ) ;

				if  ( $cmap )
					$cmaps [] 	=  $cmap ;
			   }
			// Font definitions
			else if  ( $this -> IsFont ( $decoded_stream_data ) )
			   {
				$this -> FontTable -> Add ( $object_number, $decoded_stream_data, $pdf_objects, $this -> AdobeExtraMappings ) ;
			    }
			// Retrieve form data if present
			else if  ( $this -> IsFormData ( $object_data ) )
			   {
				$this -> RetrieveFormData ( $object_number, $decoded_stream_data, $pdf_objects ) ;
			    }
			// Plain text (well, in fact PDF drawing instructions)
			else if  ( $this -> IsText ( $object_data, $decoded_stream_data ) )
			   {
				$text_data	=  false ;

				// Check if we need to ignore page headers and footers
				if  ( $this -> Options  &  self::PDFOPT_IGNORE_HEADERS_AND_FOOTERS )
				   {
					if  ( ! $this -> IsPageHeaderOrFooter ( $decoded_stream_data ) )
					   {
						$text [ $object_number ]	=
						$text_data			=  $decoded_stream_data ;
					    }
					// However, they may be mixed with actual text contents so we need to separate them...
					else
					   {
						$this -> ExtractTextData ( $object_number, $decoded_stream_data, $remainder, $header, $footer ) ;

						// We still need to check again that the extracted text portion contains something useful
						if  ( $this -> IsText ( $object_data, $remainder ) )
						   {
							$text [ $object_number ]	=
							$text_data			=  $remainder ;
						    }
					    }
				    }
				else
				   {
					$text [ $object_number ]	=
					$text_data			=  $decoded_stream_data ;
				    }


				// The current object may be a text object that have been defined as an XObject in some other object
				// In this case, we have to keep it since it may be referenced by a /TPLx construct from within
				// another text object
				if  ( $text_data )
					$this -> PageMap -> AddTemplateObject ( $object_number, $text_data ) ;
			    }
			// This may be here the opportunity to look into the $FormData property and replace object ids with their corresponding data
			else
			   {
				$found		=  false ;

				foreach  ( $this -> FormData  as  &$form_entry )
				   {
					if  ( is_integer ( $form_entry [ 'values' ] )  &&  $object_number  ==  $form_entry [ 'values' ] )
					   {
						$form_entry [ 'values' ]	=  $decoded_stream_data ;
						$found				=  true ;
					    }
					else if  ( is_integer ( $form_entry [ 'form' ] )  &&  $object_number  ==  $form_entry [ 'form' ] )
					   {
						$form_entry [ 'form' ]	=  $decoded_stream_data ;
						$found				=  true ;
					    }
				    }

				if  ( ! $found  &&  self::$DEBUG  >  1 )
					echo "\n----------------------------------- UNRECOGNIZED #$object_number :\n$decoded_stream_data\n" ;
			    }
		    }

		// Form data object numbers
		$this -> FormDataObjectNumbers	=  array_keys ( $this -> FormData ) ;

		// Associate character maps with declared fonts
		foreach  ( $cmaps  as  $cmap )
			$this -> FontTable -> AddCharacterMap ( $cmap ) ;

		// Current font defaults to -1, which means : take the first available font as the current one.
		// Sometimes it may happen that text drawing instructions do not set a font at all (PdfPro for example)
		$current_font		=  -1 ;

		// Build the page catalog
		$this -> Pages	=  array ( ) ;
		$this -> PageMap -> MapObjects ( $text ) ;

		// Add font mappings local to each page
		$mapped_fonts	=  $this -> PageMap -> GetMappedFonts ( ) ;
		$this -> FontTable -> AddPageFontMap ( $mapped_fonts ) ;

		// Extract text from the collected text elements
		foreach ( $this -> PageMap -> Pages as  $page_number => $page_objects )
		   {
			// Checks if this page is selected
			if  ( ! $this -> IsPageSelected ( $page_number ) )
				continue ;

			$this -> Pages [ $page_number ]		=  '' ;

			if  ( $layout_option  ===  self::PDFOPT_RAW_LAYOUT )
			   {
				foreach  ( $page_objects  as  $page_object )
				   {
					if  ( isset ( $text [ $page_object ] ) )
					   {
						$new_text				 =  $this -> PageMap -> ProcessTemplateReferences ( $page_number, $text [ $page_object ] ) ;
						$object_text				 =  $this -> ExtractText ( $page_number, $page_object, $new_text, $current_font ) ;
						$this -> Pages [ $page_number ]		.=  $object_text ;
					    }
					else if  ( self::$DEBUG  >  1 )
						echo "\n----------------------------------- MISSING OBJECT #$page_object for page #$page_number\n" ;
				    }
			     }
			// New style (basic) layout rendering
			else if  ( $layout_option  ===  self::PDFOPT_BASIC_LAYOUT )
			   {
				$page_fragments		=  array ( ) ;

				foreach  ( $page_objects  as  $page_object )
				   {
					if  ( isset ( $text [ $page_object ] ) )
					   {
						$new_text				 =  $this -> PageMap -> ProcessTemplateReferences ( $page_number, $text [ $page_object ] ) ;
						$this -> ExtractTextWithLayout ( $page_fragments, $page_number, $page_object, $new_text, $current_font ) ;
					    }
					else if  ( self::$DEBUG  >  1 )
						echo "\n----------------------------------- MISSING OBJECT #$page_object for page #$page_number\n" ;
				    }

				$this -> Pages [ $page_number ]			=  $this -> __assemble_text_fragments ( $page_number, $page_fragments, $page_width, $page_height ) ;

				$this -> DocumentFragments [ $page_number ]	=  array
				   (
					'fragments'		=>  $page_fragments,
					'page-width'		=>  $page_width,
					'page_height'		=>  $page_height
				    ) ;
			    }
		    }

		// Retrieve author information
		if  ( $this -> GotAuthorInformation )
			$this -> RetrieveAuthorInformation ( $author_information_object_id, $pdf_objects ) ;

		// Build the page locations (ie, starting and ending offsets)
		$offset			=  0 ;
		$page_separator		=  utf8_encode ( $this -> PageSeparator ) ;
		$page_separator_length	=  strlen ( $page_separator ) ;

		foreach  ( $this -> Pages  as  $page_number => &$page )
		   {
			// If hyphenated words are unwanted, then remove them
			if  ( $this -> Options &  self::PDFOPT_NO_HYPHENATED_WORDS )
				$page	=  preg_replace ( self::$RemoveHyphensRegex, '$4$2', $page ) ;

			$length					 =  strlen ( $page ) ;
			$this -> PageLocations [ $page_number ]	 =  array ( 'start' => $offset, 'end' => $offset + $length - 1 ) ;
			$offset					+=  $length + $page_separator_length ;
		    }

		// And finally, the Text property
		$this -> Text	=  implode ( $page_separator, $this -> Pages ) ;

		// Free memory
		$this -> MapIdBuffer			=  array ( ) ;
		$this -> RtlCharacterBuffer		=  array ( ) ;
		$this -> CharacterMapBuffer		=  array ( ) ;

		// Compute memory occupied for this file
		$memory_usage_end		=  ( self::$HasMemoryGetUsage     ) ?  memory_get_usage      ( true ) : 0 ;
		$memory_peak_usage_end		=  ( self::$HasMemoryGetPeakUsage ) ?  memory_get_peak_usage ( true ) : 0 ;

		$this -> MemoryUsage		=  $memory_usage_end      - $this -> __memory_usage_start ;
		$this -> MemoryPeakUsage	=  $memory_peak_usage_end - $this -> __memory_peak_usage_start ;

		// Adjust the "Distributions" statistics
		if  ( $this -> Options  &  self::PDFOPT_ENHANCED_STATISTICS )
		   {
			$instruction_count		=  0 ;
			$statistics			=  array ( ) ;

			// Count the total number of instructions
			foreach  ( $this -> Statistics [ 'Distributions' ]  as  $count )
				$instruction_count  +=  $count ;

			// Now transform the Distributions entries into an associative array containing the instruction counts
			// ('count') and their relative percentage
			foreach  ( $this -> Statistics [ 'Distributions' ]  as  $name => $count )
			   {
				if  ( $instruction_count )
					$percent	=  round ( ( 100.0 / $instruction_count ) * $count, 2 ) ;
				else
					$percent	=  0 ;

				$statistics [ $name ]	=  array
				   (
					'instruction'		=>  $name,
					'count'			=>  $count,
					'percent'		=>  $percent
				    ) ;
			    }

			// Set the new 'Distributions' array and sort it by instruction count in reverse order
			$this -> Statistics [ 'Distributions' ]		=  $statistics ;
			uksort ( $this -> Statistics [ 'Distributions' ], array ( $this, '__sort_distributions' ) ) ;
		    }

		// All done, return
		return ( $this -> Text ) ;
	    }


	public function  __sort_distributions ( $a, $b )
	   { return ( $this -> Statistics [ 'Distributions' ] [$b] [ 'count' ] - $this -> Statistics [ 'Distributions' ] [$a] [ 'count' ] ) ; }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        AddAdobeExtraMappings - Adds extra mappings for standard Adobe fonts.

	    PROTOTYPE
	        $pdf -> AddAdobeExtraMappings ( $mappings ) ;

	    DESCRIPTION
	        Adobe supports 4 predefined fonts : standard, Mac, WinAnsi and PDF). All the characters in these fonts
		are identified by a character time, a little bit like HTML entities ; for example, 'one' will be the
		character '1', 'acircumflex' will be 'â', etc.
		There are thousands of character names defined by Adobe (see https://mupdf.com/docs/browse/source/pdf/pdf-glyphlist.h.html).
		Some of them are not in this list ; this is the case for example of the 'ax' character names, where 'x'
		is a decimal number. When such a character is specified in a /Differences array, then there is somewhere
		a CharProc[] array giving an object id for each of those characters.
		The referenced object(s) in turn contain drawing instructions to draw the glyph. At no point you could
		guess what is the corresponding Unicode character for this glyph, since the information is not contained
		in the PDF file.
		The AddAdobeExtraMappings() method allows you to specify such correspondences. Specify an array as the
		$mappings parameter, whose keys are the Adobe character name (for example, "a127") and values the
		corresponding Unicode values (see the description of the $mappings parameter for more information).

	    PARAMETERS
	        $mappings (associative array) -
	                Associative array whose keys are Adobe character names. The array values can take several forms :
			- A character
			- An integer value
			- An array of up to four character or integer values.
			Internally, every specified value is converted to an array of four integer values, one for
			each of the standard Adobe character sets (Standard, Mac, WinAnsi and PDF). The following
			rules apply :
			- If the input value is a single character, the output array corrsponding the Adobe character
			  name will be a set of 4 elements corresponding to the ordinal value of the supplied
			  character.
			- If the input value is an integer, the output array will be a set of 4 identical values
			- If the input value is an array :
			  . Arrays with less that 4 elements will be padded, using the last array item for padding
			  . Arrays with more than 4 elements will be silently truncated
			  . Each array value can either be a character or a numeric value.

	    NOTES
	        In this current implementation, the method applies the mappings to ALL Adobe default fonts. That is,
		you cannot have one mapping for one Adobe font referenced in the PDF file, then a second mapping for
		a second Adobe font, etc.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  AddAdobeExtraMappings ( $mappings )
	   {
		// Loop through each mapping
		foreach  ( $mappings  as  $key => $value )
		   {
			// Character value : we retain its ordinal value as the 4 values of the output array
			if  ( is_string ( $value ) )
			   {
				$ord		=  ord ( $value ) ;
				$items		=  array ( $ord, $ord, $ord, $ord ) ;
			    }
			// Numeric value : the output array will contain 4 times the supplied value
			else if  ( is_numeric ( $value ) )
			   {
				$value		=  ( integer ) $value ;
				$items		=  array ( $value, $value, $value, $value ) ;
			    }
			// Array value : make sure we will have an output array of 4 values
			else if  ( is_array ( $value ) )
			   {
				$items		=  array ( ) ;

				// Collect the supplied values, converting characters to their ordinal values if necessary
				for  ( $i = 0, $count = count ( $value ) ;  $i  <  $count  &&  $i  <  4 ; $i ++ )
				   {
					$code	=  $value [$i] ;

					if  ( is_string ( $code ) )
						$items []	=  ord ( $code ) ;
					else
						$items []	=  ( integer ) $code ;
				    }

				// Ensure that we have 4 values ; fill the missing ones with the last seen value if necessary
				$count		=  count ( $items ) ;

				if  ( ! $count )
					error ( new PdfToTextException ( "Adobe extra mapping \"$key\" has no values." ) ) ;

				$last_value		=  $items [ $count - 1 ] ;

				for  ( $i = $count ; $i  <  4 ; $i ++ )
					$items []	=  $last_value ;
			    }
			else
				error ( new PdfToTextException ( "Invalid value \"$value\" for Adobe extra mapping \"$key\"." ) ) ;

			// Add this current mapping to the Adobe extra mappings array
			$this -> AdobeExtraMappings [ $key ]	=  $items ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetPageFromOffset - Returns a page number from a text offset.

	    PROTOTYPE
	        $offset		=  $pdf -> GetPageFromOffset ( $offset ) ;

	    DESCRIPTION
	        Given a byte offset in the Text property, returns its page number in the pdf document.

	    PARAMETERS
	        $offset (integer) -
	                Offset, in the Text property, whose page number is to be retrieved.

	    RETURN VALUE
	        Returns a page number in the pdf document, or false if the specified offset does not exist.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetPageFromOffset ( $offset )
	   {
		if  ( $offset  ===  false )
			return ( false ) ;

		foreach  ( $this -> PageLocations  as  $page => $location )
		   {
			if  ( $offset  >=  $location [ 'start' ]  &&  $offset  <=  $location [ 'end' ] )
				return ( $page ) ;
		    }

		return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        text_strpos, text_stripos - Search for an occurrence of a string.

	    PROTOTYPE
	        $result		=  $pdf -> text_strpos  ( $search, $start = 0 ) ;
	        $result		=  $pdf -> text_stripos ( $search, $start = 0 ) ;

	    DESCRIPTION
	        These methods behave as the strpos/stripos PHP functions, except that :
		- They operate on the text contents of the pdf file (Text property)
		- They return an array containing the page number and text offset. $result [0] will be set to the page
		  number of the searched text, and $result [1] to its offset in the Text property

	    PARAMETERS
	        $search (string) -
	                String to be searched.

		$start (integer) -
			Start offset in the pdf text contents.

	    RETURN VALUE
	        Returns an array of two values containing the page number and text offset if the searched string has
		been found, or false otherwise.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  text_strpos ( $search, $start = 0 )
	   {
		$offset		=  mb_strpos ( $this -> Text, $search, $start, 'UTF-8' ) ;

		if  ( $offset  !==  false )
			return ( array ( $this -> GetPageFromOffset ( $offset ), $offset ) ) ;

		return ( false ) ;
	    }


	public function  text_stripos ( $search, $start = 0 )
	   {
		$offset		=  mb_stripos ( $this -> Text, $search, $start, 'UTF-8' ) ;

		if  ( $offset  !==  false )
			return ( array ( $this -> GetPageFromOffset ( $offset ), $offset ) ) ;

		return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        document_strpos, document_stripos - Search for all occurrences of a string.

	    PROTOTYPE
	        $result		=  $pdf -> document_strpos  ( $search, $group_by_page = false ) ;
	        $result		=  $pdf -> document_stripos ( $search, $group_by_page = false ) ;

	    DESCRIPTION
		Searches for ALL occurrences of a given string in the pdf document. The value of the $group_by_page
		parameter determines how the results are returned :
		- When true, the returned value will be an associative array whose keys will be page numbers and values
		  arrays of offset of the found string within the page
		- When false, the returned value will be an array of arrays containing two entries : the page number
		  and the text offset.

		For example, if a pdf document contains the string "here" at character offset 100 and 200 in page 1, and
		position 157 in page 3, the returned value will be :
		- When $group_by_page is false :
			[ [ 1, 100 ], [ 1, 200 ], [ 3, 157 ] ]
		- When $group_by_page is true :
			[ 1 => [ 100, 200 ], 3 => [ 157 ] ]

	    PARAMETERS
	        $search (string) -
	                String to be searched.

		$group_by_page (boolean) -
			Indicates whether the found offsets should be grouped by page number or not.

	    RETURN VALUE
	        Returns an array of page numbers/character offsets (see Description above) or false if the specified
		string does not appear in the document.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  document_strpos ( $text, $group_by_page = false )
	   {
		$length		=  strlen ( $text ) ;

		if  ( ! $length )
			return ( false ) ;

		$result		=  array ( ) ;
		$index		=  0 ;

		while ( ( $index =  mb_strpos ( $this -> Text, $text, $index, 'UTF-8' ) )  !==  false )
		   {
			$page	=  $this -> GetPageFromOffset ( $index ) ;

			if  ( $group_by_page )
				$result [ $page ] []	=  $index ;
			else
				$result []		=  array ( $page, $index ) ;

			$index	+=  $length ;
		    }

		return ( $result ) ;
	    }


	public function  document_stripos ( $text, $group_by_page = false )
	   {
		$length		=  strlen ( $text ) ;

		if  ( ! $length )
			return ( false ) ;

		$result		=  array ( ) ;
		$index		=  0 ;

		while ( ( $index =  mb_stripos ( $this -> Text, $text, $index, 'UTF-8' ) )  !==  false )
		   {
			$page	=  $this -> GetPageFromOffset ( $index ) ;

			if  ( $group_by_page )
				$result [ $page ] []	=  $index ;
			else
				$result []		=  array ( $page, $index ) ;

			$index	+=  $length ;
		    }

		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        text_match, document_match - Search string using regular expressions.

	    PROTOTYPE
	        $status		=  $pdf -> text_match ( $pattern, &$match = null, $flags = 0, $offset = 0 ) ;
	        $status		=  $pdf -> document_match ( $pattern, &$match = null, $flags = 0, $offset = 0 ) ;

	    DESCRIPTION
	        text_match() calls the preg_match() PHP function on the pdf text contents, to locate the first occurrence
		of text that matches the specified regular expression.
		document_match() calls the preg_match_all() function to locate all occurrences that match the specified
		regular expression.
		Note that both methods add the PREG_OFFSET_CAPTURE flag when calling preg_match/preg_match_all so you
		should be aware that all captured results are an array containing the following entries :
		- Item [0] is the captured string
		- Item [1] is its text offset
		- The text_match() and document_match() methods add an extra array item (index 2), which contains the
		  page number where the matched text resides

	    PARAMETERS
	        $pattern (string) -
	                Regular expression to be searched.

		$match (any) -
			Output captures. See preg_match/preg_match_all.

		$flags (integer) -
			PCRE flags. See preg_match/preg_match_all.

		$offset (integer) -
			Start offset. See preg_match/preg_match_all.

	    RETURN VALUE
	        Returns the number of matched occurrences, or false if the specified regular expression is invalid.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  text_match ( $pattern, &$match = null, $flags = 0, $offset = 0 )
	   {
		$local_match	=  null ;
		$status		=  preg_match ( $pattern, $this -> Text, $local_match, $flags | PREG_OFFSET_CAPTURE, $offset ) ;

		if  ( $status )
		   {
			foreach  ( $local_match  as  &$entry )
				$entry [2]	=  $this -> GetPageFromOffset ( $entry [1] ) ;

			$match	=  $local_match ;
		    }

		return ( $status ) ;
	    }


	public function  document_match ( $pattern, &$matches = null, $flags = 0, $offset = 0 )
	   {
		$local_matches	=  null ;
		$status		=  preg_match_all ( $pattern, $this -> Text, $local_matches, $flags | PREG_OFFSET_CAPTURE, $offset ) ;

		if  ( $status )
		   {
			foreach  ( $local_matches  as  &$entry )
			   {
				foreach  ( $entry  as  &$subentry )
				$subentry [2]	=  $this -> GetPageFromOffset ( $subentry [1] ) ;
			    }

			$matches	=  $local_matches ;
		    }

		return ( $status ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    HasFormData -
		Returns true if the PDF file contains form data or not.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  HasFormData ( )
	   {
		return ( count ( $this -> FormData )  >  0 ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    GetFormCount -
		Returns the number of top-level forms contained in the PDF file.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetFormCount ( )
	   {
		return ( count ( $this -> FormData ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetFormData - Returns form data, if any

	    PROTOTYPE
	        $object		=  $pdf -> GetFormData ( $template = null, $form_index = 0 ) ;

	    DESCRIPTION
	        Retrieves form data if present.

	    PARAMETERS
	        $template (string) -
	                An XML file describing form data using human-readable names for field values.
			If not specified, the inline form definitions will be used, together with the field names
			specified in the PDF file.

		$form_index (integer) -
			Form index in the PDF file. So far, I really don't know if a PDF file can have multiple forms.

	    RETURN VALUE
	        An object derived from the PdfToTextFormData class.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetFormData ( $template = null, $form_index = 0 )
	   {
		if  ( isset ( $this -> FormDataObjects [ $form_index ] ) )
			return ( $this -> FormDataObjects [ $form_index ] ) ;

		if  ( $form_index  >  count ( $this -> FormDataObjectNumbers ) )
			error ( new PdfToTextFormException ( "Invalid form index #$form_index." ) ) ;

		$form_data	=  $this -> FormData [ $this -> FormDataObjectNumbers [ $form_index ] ] ;

		if  ( $template )
		   {
			if  ( ! file_exists ( $template ) )
				error ( new PdfToTextFormException ( "Form data template file \"$template\" not found." ) ) ;

			$xml_data	=  file_get_contents ( $template ) ;
			$definitions	=  new PdfToTextFormDefinitions ( $xml_data, $form_data [ 'form' ] ) ; ;
		    }
		else
		   {
			$definitions	=  new PdfToTextFormDefinitions ( null, $form_data [ 'form' ] ) ;
		    }

		$object		=  $definitions [ $form_index ]	-> GetFormDataFromPdfObject ( $form_data [ 'values' ] ) ;

		$this -> FormDataDefinitions []		=  $definitions ;
		$this -> FormDataObjects []		=  $object ;

		return ( $object ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        MarkTextLike - Marks output text.

	    PROTOTYPE
	        $pdf -> MarkTextLike ( $regex, $marker_start, $marker_end ) ;

	    DESCRIPTION
	        Sometimes it may be convenient, when you want to extract only a portion of text, to say : "I want to
		extract text between this title and this title". The MarkTextLike() method provides some support for
		such a task. Imagine you have documents that have the same structure, all starting with an "Introduction"
		title :

			Introduction
				...
				some text
				...
			Some other title
				...

		By calling the MarkTextLike() method such as in the example below :

			$pdf -> MarkTextLike ( '/\bIntroduction\b/', '<M>', '</M' ) ;

		then you will get as output :

			<M>Introduction</M>
				...
				some text
				...
			<M>Some other title</M>

		Adding such markers in the output will allow you to easily extract the text between the chapters
		"Introduction" and "Some other title", using a regular expression.

		The font name used for the first string matched by the specified regular expression will be searched
		later to add markers around all the text portions using this font.


	    PARAMETERS
	        $regex (string) -
	                A regular expression to match the text to be matched. Subsequent portions of text using the
			same font will be surrounded by the marker start/end strings.

		$marker_start, $marker_end (string) -
			Markers to surround the string when a match is found.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  MarkTextLike ( $regex, $marker_start, $marker_end )
	   {
		$this -> UnprocessedMarkerList [ 'font' ] []	=  array
		   (
			'regex'		=>  $regex,
			'start'		=>  $marker_start,
			'end'		=>  $marker_end
		    ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        SetCaptures, SetCapturesFromString - Defines document parts to be captured.

	    PROTOTYPE
	        $pdf -> SetCaptures ( $xml_file ) ;
		$pdf -> SetCapturesFromString ( $xml_data ) ;

	    DESCRIPTION
	        Defines document parts to be captured.
		SetCaptures() takes the definitions for the areas to be captured from an XML file, while
		SetCapturesFromString() takes them from a string representing xml capture definitions.

	    NOTES
	        - See file README.md for an explanation on the format of the XML capture definition file.
		- The SetCaptures() methods must be called before the Load() method.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  SetCaptures ( $xml_file )
	   {
		if  ( ! file_exists ( $xml_file ) )
			error ( new PdfToTextException ( "File \"$xml_file\" does not exist." ) ) ;

		$xml_data	=  file_get_contents ( $xml_file ) ;

		$this -> SetCapturesFromString ( $xml_data )  ;

	    }


	public function  SetCapturesFromString ( $xml_data )
	   {
		// Setting capture areas implies having the PDFOPT_BASIC_LAYOUT option
		$this -> Options	|=  self::PDFOPT_BASIC_LAYOUT ;

		$this -> CaptureDefinitions		=  new PdfToTextCaptureDefinitions ( $xml_data ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetCaptures - Returns captured data.

	    PROTOTYPE
	        $object		=  $pdf -> GetCaptures ( $full = false ) ;

	    PARAMETERS
		$full (boolean) -
			When true, the whole captures, togethers with their definitions, are returned. When false,
			only a basic object containing the capture names and their values is returned.

	    DESCRIPTION
	        Returns the object that contains captured data.

	    RETURN VALUE
	        An object of type PdfToTextCaptures, or false if an error occurred.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetCaptures ( $full = false )
	   {
		if  ( ! $this -> CaptureObject )
		   {
			$this -> CaptureDefinitions -> SetPageCount ( count ( $this -> Pages ) ) ;
			$this -> CaptureObject	=  $this -> CaptureDefinitions -> GetCapturedObject ( $this -> DocumentFragments ) ;
		    }

		if  ( $full )
			return ( $this -> CaptureObject ) ;
		else
			return ( $this -> CaptureObject -> ToCaptures ( ) ) ;
	    }


	/**************************************************************************************************************
	 **************************************************************************************************************
	 **************************************************************************************************************
	 ******                                                                                                  ******
	 ******                                                                                                  ******
	 ******                                         INTERNAL METHODS                                         ******
	 ******                                                                                                  ******
	 ******                                                                                                  ******
	 **************************************************************************************************************
	 **************************************************************************************************************
	 **************************************************************************************************************/

	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        AddImage - Adds an image from the PDF stream to the current object.

	    PROTOTYPE
	        $this -> AddImage ( $object_id, $stream_data, $type, $object_data ) ;

	    DESCRIPTION
	        Adds an image from the PDF stream to the current object.
		If the PDFOPT_GET_IMAGE_DATA flag is enabled, image data will be added to the ImageData property.
		If the PDFOPT_DECODE_IMAGE_DATA flag is enabled, a jpeg resource will be created and added into the
		Images array property.

	    PARAMETERS
	        $object_id (integer) -
	                Pdf object id.

		$stream_data (string) -
			Contents of the unprocessed stream data containing the image.

		$type (integer) -
			One of the PdfToText::PDF_*_ENCODING constants.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  AddImage ( $object_id, $stream_data, $type, $object_data )
	   {

		if  ( self::$DEBUG  &&   $this -> Options  &  self::PDFOPT_GET_IMAGE_DATA )
		    {
			switch  ( $type )
			   {
				case	self::PDF_DCT_ENCODING :
					$this -> ImageData	=  array ( 'type' => 'jpeg', 'data' => $stream_data ) ;
					break ;
			    }

		     }


		if  ( $this -> Options  &  self::PDFOPT_DECODE_IMAGE_DATA  &&
			( ! $this -> MaxExtractedImages  ||  $this -> ImageCount  <  $this -> MaxExtractedImages ) )
		   {
			$image	=  $this -> DecodeImage ( $object_id, $stream_data, $type, $object_data, $this -> Options  &  self::PDFOPT_AUTOSAVE_IMAGES ) ;

			if  ( $image  !==  false )
			   {
				$this -> ImageCount ++ ;

				// When the PDFOPT_AUTOSAVE_IMAGES flag is set, we simply use a template filename to generate a real output filename
				// then save the image to that file. The memory is freed after that.
				if  ( $this -> Options  &  self::PDFOPT_AUTOSAVE_IMAGES )
				   {
					$output_filename 			=  $this -> __get_output_image_filename ( ) ;

					$image -> SaveAs ( $output_filename, $this -> ImageAutoSaveFormat ) ;
					unset ( $image ) ;

					$this -> AutoSavedImageFiles []		=  $output_filename ;
				    }
				// Otherwise, simply store the image data into memory
				else
					$this -> Images []	=  $image ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        DecodeData - Decodes stream data.

	    PROTOTYPE
	        $data	=  $this -> DecodeData ( $object_id, $stream_data, $type ) ;

	    DESCRIPTION
	        Decodes stream data (binary data located between the "stream" and "enstream" directives) according to the
		specified encoding type, given in the surrounding object parameters.

	    PARAMETERS
		$object_id (integer) -
			Id of the object containing the data.

	        $stream_data (string) -
	                Contents of the binary stream.

		$type (integer) -
			One of the PDF_*_ENCODING constants, as returned by the GetEncodingType() method.

	    RETURN VALUE
	        Returns the decoded stream data.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  DecodeData ( $object_id, $stream_data, $type, $object_data )
	   {
		$decoded_stream_data 	=  '' ;

		switch  ( $type )
		   {
		   	case 	self::PDF_FLATE_ENCODING :
				// Objects in password-protected Pdf files SHOULD be encrypted ; however, it happens that we may encounter normal,
				// unencrypted ones. This is why we always try to gzuncompress them first then, if failed, try to decrypt them
		   		$decoded_stream_data 	=  @gzuncompress ( $stream_data ) ;

				if  ( $decoded_stream_data  ===  false )
				   {
					if  ( $this -> IsEncrypted )
					   {
						$decoded_stream_data	=  $this -> EncryptionData -> Decrypt ( $object_id, $stream_data ) ;

						if  ( $decoded_stream_data  ===  false )
						   {
							if  ( self::$DEBUG  >  1 )
								warning ( new PdfToTextDecodingException ( "Unable to decrypt object contents.", $object_id ) ) ;
						    }
					    }
					else if  ( self::$DEBUG  >  1 )
						warning ( new PdfToTextDecodingException ( "Invalid gzip data.", $object_id ) ) ;
				    }

		   		break ;

			case	self::PDF_LZW_ENCODING :
				$decoded_stream_data	=  $this -> __decode_lzw ( $stream_data ) ;
				break ;

		   	case 	self::PDF_ASCIIHEX_ENCODING :
		   		$decoded_stream_data 	=  $this -> __decode_ascii_hex ( $stream_data ) ;
		   		break ;

			case 	self::PDF_ASCII85_ENCODING :
				$decoded_stream_data 	=  $this -> __decode_ascii_85 ( $stream_data ) ;

				// Dumbly check if this could not be gzipped data after decoding (normally, the object flags should also specify
				// the /FlateDecode flag)
				if  ( $decoded_stream_data  !==  false  &&  ( $result = @gzuncompress ( $decoded_stream_data ) )  !==  false )
					$decoded_stream_data  =  $result ;

				break ;

			case	self::PDF_TEXT_ENCODING :
				$decoded_stream_data	=  $stream_data ;
				break ;
		    }

		return ( $decoded_stream_data ) ;
	    }


	// __decode_lzw -
	//	Decoding function for LZW encrypted data. This function is largely inspired by the TCPDF one but has been rewritten
	//	for a performance gain of 30-35%.
	private function   __decode_lzw ( $data )
	   {
		// The initial dictionary contains 256 entries where each index is equal to its character representation
		static $InitialDictionary      =  array
		   (
			"\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D", "\x0E", "\x0F",
			"\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\x1B", "\x1C", "\x1D", "\x1E", "\x1F",
			"\x20", "\x21", "\x22", "\x23", "\x24", "\x25", "\x26", "\x27", "\x28", "\x29", "\x2A", "\x2B", "\x2C", "\x2D", "\x2E", "\x2F",
			"\x30", "\x31", "\x32", "\x33", "\x34", "\x35", "\x36", "\x37", "\x38", "\x39", "\x3A", "\x3B", "\x3C", "\x3D", "\x3E", "\x3F",
			"\x40", "\x41", "\x42", "\x43", "\x44", "\x45", "\x46", "\x47", "\x48", "\x49", "\x4A", "\x4B", "\x4C", "\x4D", "\x4E", "\x4F",
			"\x50", "\x51", "\x52", "\x53", "\x54", "\x55", "\x56", "\x57", "\x58", "\x59", "\x5A", "\x5B", "\x5C", "\x5D", "\x5E", "\x5F",
			"\x60", "\x61", "\x62", "\x63", "\x64", "\x65", "\x66", "\x67", "\x68", "\x69", "\x6A", "\x6B", "\x6C", "\x6D", "\x6E", "\x6F",
			"\x70", "\x71", "\x72", "\x73", "\x74", "\x75", "\x76", "\x77", "\x78", "\x79", "\x7A", "\x7B", "\x7C", "\x7D", "\x7E", "\x7F",
			"\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87", "\x88", "\x89", "\x8A", "\x8B", "\x8C", "\x8D", "\x8E", "\x8F",
			"\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97", "\x98", "\x99", "\x9A", "\x9B", "\x9C", "\x9D", "\x9E", "\x9F",
			"\xA0", "\xA1", "\xA2", "\xA3", "\xA4", "\xA5", "\xA6", "\xA7", "\xA8", "\xA9", "\xAA", "\xAB", "\xAC", "\xAD", "\xAE", "\xAF",
			"\xB0", "\xB1", "\xB2", "\xB3", "\xB4", "\xB5", "\xB6", "\xB7", "\xB8", "\xB9", "\xBA", "\xBB", "\xBC", "\xBD", "\xBE", "\xBF",
			"\xC0", "\xC1", "\xC2", "\xC3", "\xC4", "\xC5", "\xC6", "\xC7", "\xC8", "\xC9", "\xCA", "\xCB", "\xCC", "\xCD", "\xCE", "\xCF",
			"\xD0", "\xD1", "\xD2", "\xD3", "\xD4", "\xD5", "\xD6", "\xD7", "\xD8", "\xD9", "\xDA", "\xDB", "\xDC", "\xDD", "\xDE", "\xDF",
			"\xE0", "\xE1", "\xE2", "\xE3", "\xE4", "\xE5", "\xE6", "\xE7", "\xE8", "\xE9", "\xEA", "\xEB", "\xEC", "\xED", "\xEE", "\xEF",
			"\xF0", "\xF1", "\xF2", "\xF3", "\xF4", "\xF5", "\xF6", "\xF7", "\xF8", "\xF9", "\xFA", "\xFB", "\xFC", "\xFD", "\xFE", "\xFF"
		    ) ;

		// Dictionary lengths - when we reach one of the values specified as the key, we have to set the bit length to the corresponding value
		static  $DictionaryLengths	=  array
		   (
			511		=>  10,
			1023		=>  11,
			2047		=>  12
		    ) ;

		// Decoded string to be returned
		$result		=  '' ;

		// Convert string to binary string
		$bit_string	=  '' ;
		$data_length	=  strlen ( $data ) ;

		for  ( $i = 0 ; $i  <  $data_length ; $i ++ )
			$bit_string	.=  sprintf ( '%08b', ord ( $data[$i] ) ) ;

		$data_length	*=  8 ;

		// Initialize dictionary
		$bit_length		=  9 ;
		$dictionary_index	=  258 ;
		$dictionary		=  $InitialDictionary ;

		// Previous value
		$previous_index		=  0 ;

		// Start index in bit string
		$start_index		=  0 ;

		// Until we encounter the EOD marker (257), read $bit_length bits
		while  ( ( $start_index  <  $data_length )  &&  ( ( $index = bindec ( substr ( $bit_string, $start_index, $bit_length ) ) )  !==  257 ) )
		   {
			// Move to next bit position
			$start_index	+=  $bit_length ;

			if  ( $index  !==  256  &&  $previous_index  !==  256 )
			    {
				// Check if index exists in the dictionary and remember it
				if  ( $index  <  $dictionary_index )
				   {
					$result			.=  $dictionary [ $index ] ;
					$dictionary_value	 =  $dictionary [ $previous_index ] . $dictionary [ $index ] [0] ;
					$previous_index		 =  $index ;
				    }
				// Index does not exist - add it to the dictionary
				else
				   {
					$dictionary_value	 =  $dictionary [ $previous_index ] . $dictionary [ $previous_index ] [0] ;
					$result			.=  $dictionary_value ;
				    }

				// Update dictionary
				$dictionary [ $dictionary_index ++ ]	=  $dictionary_value ;

				// Change bit length whenever we reach an index limit
				if  ( isset ( $DictionaryLengths [ $dictionary_index ] ) )
					$bit_length	=  $DictionaryLengths [ $dictionary_index ] ;
			    }
			// Clear table marker
			else if  ( $index  ===  256)
			   {
				// Reset dictionary and bit length
				// Reset dictionary and bit length
				$bit_length		=  9 ;
				$dictionary_index	=  258 ;
				$previous_index		=  256 ;
				$dictionary		=  $InitialDictionary ;
			     }
			// First entry
			else	// $previous_index  === 256
			   {
				// first entry
				$result		.=  $dictionary [ $index ] ;
				$previous_index  =  $index ;
			    }
		    }

		// All done, return
		return ( $result ) ;
	   }


	// __decode_ascii_hex -
	//	Decoder for /AsciiHexDecode streams.
	private function __decode_ascii_hex ( $input )
	    {
	    	$output 	=  "" ;
	    	$is_odd 		=  true ;
	    	$is_comment 	=  false ;

	    	for  ( $i = 0, $codeHigh =  -1 ; $i  <  strlen ( $input )  &&  $input [ $i ]  !=  '>' ; $i++ )
	    	   {
	    		$c 	=  $input [ $i ] ;

	    		if  ( $is_comment )
	    		   {
	    			if   ( $c  ==  '\r'  ||  $c  ==  '\n' )
	    				$is_comment 	=  false ;

	    			continue;
	    		    }

	    		switch  ( $c )
	    		   {
	    			case  '\0' :
	    			case  '\t' :
	    			case  '\r' :
	    			case  '\f' :
	    			case  '\n' :
	    			case  ' '  :
	    				break ;

	    			case '%' :
	    				$is_comment 	=  true ;
	    				break ;

	    			default :
	    				$code 	=  hexdec ( $c ) ;

	    				if  ( $code  ===  0  &&  $c  !=  '0' )
	    					return ( '' ) ;

	    				if  ( $is_odd )
	    					$codeHigh 	 =  $code ;
					else
	    					$output 	.=  chr ( ( $codeHigh << 4 ) | $code ) ;

	    				$is_odd 	=  ! $is_odd ;
	    				break ;
	    		    }
	    	    }

	    	if  ( $input [ $i ]  !=  '>' )
	    		return ( '' ) ;

	    	if  ( $is_odd )
	    		$output 	.=  chr ( $codeHigh << 4 ) ;

	    	return ( $output ) ;
	    }


	// __decode_ascii_85 -
	//	Decoder for /Ascii85Decode streams.
	private function  __decode_ascii_85 ( $data )
	   {
		// Ordinal value of the first character used in Ascii85 encoding
		static	$first_ord	=  33 ;
		// "A 'z' in the input data means "sequence of 4 nuls"
		static	$z_exception	=  "\0\0\0\0" ;
		// Powers of 85, from 4 to 0
		static	$exp85		=  array ( 52200625, 614125, 7225, 85, 1 ) ;

		// Ignore empty data
		if  ( $data  ===  '' )
			return ( false ) ;

		$data_length	=  strlen ( $data ) ;
		$ords		=  array ( ) ;
		$ord_count	=  0 ;
		$result		=  '' ;

		// Paranoia : Ascii85 data may start with '<~' (but it always end with '~>'). Anyway, we must start past this construct if present
		if  ( $data [0]  ==  '<'  &&  $data [1]  ==  '~' )
			$start	=  2 ;
		else
			$start	=  0 ;

		// Loop through nput characters
		for  ( $i = $start ; $i  <  $data_length  &&  $data [$i]  !=  '~' ; $i ++ )
		   {
			$ch	=  $data [$i] ;

			// Most common case : current character is in the range of the Ascii85 encoding ('!'..'u')
			if  ( $ch  >=  '!'  &&  $ch  <=  'u' )
				$ords [ $ord_count ++ ]		=  ord ( $ch ) - $first_ord ;
			// 'z' is replaced with a sequence of null bytes
			else if  ( $ch  ==  'z'  &&  ! $ord_count )
				$result		.=  $z_exception ;
			// Spaces are ignored
			else if  ( $ch  !==  "\0"  &&  $ch  !==  "\t"  &&  $ch  !==  ' '  &&  $ch  !==  "\r"  &&  $ch  !==  "\n"  &&  $ch  !==  "\f" )
				continue ;
			// Other characters : corrupted data...
			else
				return ( false ) ;

			// We have collected 5 characters in base 85 : convert their 32-bits value to base 2 (3 characters)
			if  ( $ord_count  ==  5 )
			   {
				$ord_count	=  0 ;

    				for  ( $sum = 0, $j = 0  ; $j  <  5  ; $j ++ )
    					$sum 	=  ( $sum * 85 ) + $ords [ $j ] ;

    				for ( $j = 3  ; $j  >=  0  ; $j -- )
    					$result 	.=  chr ( $sum >> ( $j * 8 ) ) ;
			    }
		    }

		// A last processing for the potential remaining bytes
		// Notes : this situation has never been tested
		if  ( $ord_count )
    		   {
    			for  ( $i = 0, $sum = 0  ; $i  <  $ord_count  ; $i++ )
    				$sum 	+= ( $ords [ $i ] + ( $i == $ord_count - 1 ) ) * $exp85 [$i] ;

    			for  ( $i = 0  ; $i  <  $ord_count - 1  ; $i++ )
    				$result 	.=  chr ( $sum >> ( ( 3 - $i ) * 8 ) ) ;
    		    }

		// All done, return
		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        DecodeImage - Returns decoded image contents.

	    PROTOTYPE
	        TBC

	    DESCRIPTION
	        description

	    PARAMETERS
	        $object_id (integer) -
	                Pdf object number.

		$stream_data (string) -
			Object data.

		$type (integer) -
			One of the PdfToText::PDF_*_ENCODING constants.

		$autosave (boolean) -
			When autosave is selected, images will not be decoded into memory unless they have a format
			different from JPEG. This is intended to save memory.

	    RETURN VALUE
	        Returns an object of type PdfIMage, or false if the image encoding type is not currently supported.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  DecodeImage ( $object_id, $stream_data, $type, $object_data, $autosave )
	   {
		switch  ( $type )
		   {
			// Normal JPEG image
			case	self::PDF_DCT_ENCODING :
				return ( new PdfJpegImage ( $stream_data, $autosave ) ) ;

			// CCITT fax image
			case	self::PDF_CCITT_FAX_ENCODING :
				return ( new PdfFaxImage ( $stream_data ) ) ;

			// For now, I have not found enough information to be able to decode image data in an inflated stream...
			// In some cases, however, this is JPEG data
			case	self::PDF_FLATE_ENCODING :
				$image		=  PdfInlinedImage::CreateInstance ( $stream_data, $object_data, $autosave ) ;

				if  ( $image )
					return ( $image ) ;

				break ;

			default :
				return ( false ) ;
		    }

		return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        DecodeObjectStream - Decodes an object stream.

	    PROTOTYPE
	        $array	=  $this -> DecodeObjectStream ( $object_id, $object_data ) ;

	    DESCRIPTION
	        Decodes an object stream. An object stream is yet another PDF object type that contains itself several
		objects not defined using the "x y obj ... endobj" syntax.
		As far as I understood, object streams data is contained within stream/endstream delimiters, and is
		gzipped.
		Object streams start with a set of object id/offset pairs separated by a space ; catenated object data
		immediately follows the last space ; for example :

			1167 0 1168 114 <</DA(/Helv 0 Tf 0 g )/DR<</Encoding<</PDFDocEncoding 1096 0 R>>/Font<</Helv 1094 0 R/ZaDb 1095 0 R>>>>/Fields[]>>[/ICCBased 1156 0 R]

		The above example specifies two objects :
			. Object #1167, which starts at offset 0 and ends before the second object, at offset #113 in
			  the data. The contents are :
				<</DA(/Helv 0 Tf 0 g )/DR<</Encoding<</PDFDocEncoding 1096 0 R>>/Font<</Helv 1094 0 R/ZaDb 1095 0 R>>>>/Fields[]>>
			. Object #1168, which starts at offset #114 and continues until the end of the object stream.
			  It contains the following data :
				[/ICCBased 1156 0 R]

	    PARAMETERS
	        $object_id (integer) -
	                Pdf object number.

		$object_data (string) -
			Object data.

	    RETURN VALUE
	        Returns false if any error occurred (mainly for syntax reasons).
		Otherwise, returns an associative array containing the following elements :
		- object_id :
			Array of all the object ids contained in the object stream.
		- object :
			Array of corresponding object data.

		The reason for this format is that it is identical to the array returned by the preg_match() function
		used in the Load() method for finding objects in a PDF file (ie, a regex that matches "x y oj/endobj"
		constructs).

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  DecodeObjectStream ( $object_id, $object_data )
	   {
		// Extract gzipped data for this object
		if  ( preg_match ( '#[^/] stream ( (\r? \n) | \r ) (?P<stream> .*?) endstream#imsx', $object_data, $stream_match ) )
		    {
			$stream_data	=  $stream_match [ 'stream' ] ;
			$type 		=  $this -> GetEncodingType ( $object_id, $object_data ) ;
			$decoded_data	=  $this -> DecodeData ( $object_id, $stream_data, $type, $object_data ) ;

			if  ( self::$DEBUG  >  1 )
				echo "\n----------------------------------- OBJSTREAM #$object_id\n$decoded_data" ;
		      }
		// Stay prepared to find one day a sample declared as an object stream but not having gzipped data delimited by stream/endstream tags
		else
		   {
			if  ( self::$DEBUG  >  1 )
				error ( new PdfToTextDecodingException ( "Found object stream without gzipped data", $object_id ) ) ;

			return ( false ) ;
		    }

		// Object streams data start with a series of object id/offset pairs. The offset is absolute to the first character
		// after the last space of these series.
		// Note : on Windows platforms, the default stack size is 1Mb. The following regular expression will make Apache crash in most cases,
		// so you have to enable the following lines in your http.ini file to set a stack size of 8Mb, as for Unix systems :
		//	Include conf/extra/httpd-mpm.conf
		//	ThreadStackSize 8388608
		if  ( ! preg_match ( '/^ \s* (?P<series> (\d+ \s* )+ )/x', $decoded_data, $series_match ) )
		   {
			if  ( self::$DEBUG  >  1 )
				error ( new PdfToTextDecodingException ( "Object stream does not start with integer object id/offset pairs.", $object_id ) ) ;

			return ( false ) ;
		    }

		// Extract the series of object id/offset pairs and the stream object data
		$series		=  explode ( ' ', rtrim ( preg_replace ( '/\s+/', ' ', $series_match [ 'series' ] ) ) ) ;
		$data		=  substr ( $decoded_data, strlen ( $series_match [ 'series' ] ) ) ;

		// $series should contain an even number of values
		if  ( count ( $series ) % 2 )
		   {
			if  ( self::$DEBUG )
				warning ( new PdfToTextDecodingException ( "Object stream should start with an even number of integer values.", $object_id ) ) ;

			array_pop ( $series ) ;
		    }

		// Extract every individual object
		$objects	=  array ( 'object_id' => array ( ), 'object' => array ( ) ) ;

		for  ( $i = 0, $count = count ( $series ) ; $i  <  $count ; $i += 2 )
		   {
			$object_id	=  ( integer ) $series [$i] ;
			$offset		=  ( integer ) $series [$i+1] ;

			// If there is a "next" object, extract only a substring within the object stream contents
			if  ( isset ( $series [ $i + 3 ] ) )
				$object_contents	=  substr ( $data, $offset, $series [ $i + 3 ] - $offset ) ;
			// Otherwise, extract everything until the end
			else
				$object_contents	=  substr ( $data, $offset ) ;

			$objects [ 'object_id'] []	=  $object_id ;
			$objects [ 'object'   ] []	=  $object_contents ;
		    }

		return ( $objects ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        ExtractTextData - Extracts text, header & footer information from a text object.

	    PROTOTYPE
	        $this -> ExtractTextData ( $object_id, $stream_contents, &$text, &$header, &$footer ) ;

	    DESCRIPTION
	        Extracts text, header & footer information from a text object. The extracted text contents will be
		stripped from any header/footer information.

	    PARAMETERS
	        $text (string) -
	                Variable that will receive text contents.

		$header, $footer (string) -
			Variables that will receive header and footer information.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  ExtractTextData ( $object_id, $stream_contents, &$text, &$header, &$footer )
	   {
		// Normally, a header or footer is introduced with a construct like :
		//	<< /Type /Pagination ... [/Bottom] ... >> (or [/Top]
		// The initial regular expression was :
		//	<< .*? \[ \s* / (?P<location> (Bottom) | (Top) ) \s* \] .*? >> \s* BDC .*? EMC
		// (the data contained between the BDC and EMC instructions are text-drawing instructions).
		// However, this expression revealed to be too greedy and captured too much data ; in the following example :
		//	<</MCID 0>> ...(several kb of drawing instructions)... << ... [/Bottom] ... >> BDC (other drawing instructions for the page footer) EMC
		// everything was captured, from the initial "<<M/MCID 0>>" to the final "EMC", which caused regular page contents to be interpreted as page bottom
		// contents.
		// The ".*?" in the regex has been replaced with "[^>]*?", which works better. However, it will fail to recognize header/footer contents if
		// the header/footer declaration contains a nested construct , such as :
		//	<< /Type /Pagination ... [/Bottom] ... << (some nested contents) >> ... >> (or [/Top]
		// Let's wait for the case to happen one day...
		static		$header_or_footer_re	=  '#
								(?P<contents>
									<< [^>]*? \[ \s* / (?P<location> (Bottom) | (Top) ) \s* \] [^>]*? >> \s*
									BDC .*? EMC
								 )
							    #imsx' ;

		$header		=
		$footer		=
		$text		=  '' ;

		if  ( preg_match_all ( $header_or_footer_re, $stream_contents, $matches, PREG_OFFSET_CAPTURE ) )
		   {
			for  ( $i = 0, $count = count ( $matches [ 'contents' ] ) ; $i  <  $count ; $i ++ )
			   {
				if  ( ! strcasecmp ( $matches [ 'location' ] [$i] [0], 'Bottom' ) )
					$footer		=  $matches [ 'contents' ] [$i] [0] ;
				else
					$header		=  $matches [ 'contents' ] [$i] [0] ;
			    }

			$text	=  preg_replace ( $header_or_footer_re, '', $stream_contents ) ;
		    }
		else
			$text	=  $stream_contents ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
		ExtractText - extracts text from a pdf stream.

	    PROTOTYPE
		$text 	=  $this -> ExtractText ( $page_number, $object_id, $data, &$current_font ) ;

	    DESCRIPTION
	        Extracts text from decoded stream contents.

	    PARAMETERS
		$page_number (integer) -
			¨Page number that contains the text to be extracted.

	    	$object_id (integer) -
	    		Object id of this text block.

	    	$data (string) -
	    		Stream contents.

		$current_font (integer) -
			Id of the current font, which should be found in the $this->FontTable property, if anything
			went ok.
			This parameter is required, since text blocks may not specify a new font resource id and reuse
			the one that waas set before.

	    RETURN VALUE
		Returns the decoded text.

	    NOTES
		The PDF language can be seen as a stack-driven language  ; for example, the instruction defining a text
		matrix ( "Tm" ) expects 6 floating-point values from the stack :

			0 0 0 0 x y Tm

		It can also specify specific operators, such as /Rx, which sets font number "x" to be the current font,
		or even "<< >>" constructs that we can ignore during our process of extracting textual data.
		Actually, we only want to handle a very small subset of the Adobe drawing language ; These are :
		- "Tm" instructions, that specify, among others, the x and y coordinates of the next text to be output
		- "/R" instructions, that specify which font is to be used for the next text output. This is useful
		  only if the font has an associated character map.
		- "/F", same as "/R", but use a font map id instead of a direct object id.
		- Text, specified either using a single notation ( "(sometext)" ) or the array notation
		  ( "[(...)d1(...)d2...(...)]" ), which allows for specifying inter-character spacing.
		 - "Tf" instructions, that specifies the font size. This is to be able to compute approximately the
		   number of empty lines between two successive Y coordinates in "Tm" instructions
		 - "TL" instructions, that define the text leading to be used by "T*"

		This is why I choosed to decompose the process of text extraction into three steps :
		- The first one, the lowest-level step, is a tokenizer that extracts individual elements, such as "Tm",
		  "TJ", "/Rx" or "510.77". This is handled by the __next_token() method.
		- The second one, __next_instruction(), collects tokens. It pushes every floating-point value onto the
		  stack, until an instruction is met.
		- The third one, ExtractText(), processes data returned by __next_instruction(), and actually performs
		  the (restricted) parsing of text drawing instructions.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  ExtractText ( $page_number, $object_id, $data, &$current_font )
	   {
		$new_data	=  $this -> __strip_useless_instructions ( $data ) ;

		if  ( self::$DEBUG )
		   {
			echo "\n----------------------------------- TEXT #$object_id (size = " . strlen ( $data ) . " bytes, new size = " . strlen ( $new_data ) . " bytes)\n" ;
			echo $data ;
			echo "\n----------------------------------- OPTIMIZED TEXT #$object_id (size = " . strlen ( $data ) . " bytes, new size = " . strlen ( $new_data ) . " bytes)\n" ;
			echo $new_data ;
		    }

		$data		=  $new_data ;

		// Index into the specified block of text-drawing instructions
		$data_index 			=  0 ;

		$data_length 			=  strlen ( $data ) ;		// Data length
		$result 			=  '' ;				// Resulting string

		// Y-coordinate of the last seen "Tm" instruction
		$last_goto_y 			=  0 ;
		$last_goto_x			=  0 ;

		// Y-coordinate of the last seen "Td" or "TD" relative positioning instruction
		$last_relative_goto_y		=  0 ;

		// When true, the current text should be output on the same line as the preceding one
		$use_same_line 			=  false ;

		// Instruction preceding the current one
		$last_instruction 		=  true ;

		// Current font size
		$current_font_size 		=  0 ;

		// Active template
		$current_template		=  '' ;

		// Various pre-computed variables
		$separator_length		=  strlen ( $this -> Separator ) ;

		// Current font map width, in bytes, plus a flag saying whether the current font is mapped or not
		$this -> FontTable -> GetFontAttributes ( $page_number, $current_template, $current_font, $current_font_map_width, $current_font_mapped ) ;

		// Extra newlines to add before the current text
		$extra_newlines 		=  0 ;

		// Text leading used by T*
		$text_leading 			=  0 ;

		// Set to true if a separator needs to be inserted
		$needs_separator		=  false ;

		// A flag to tell if we should "forget" the last instruction
		$discard_last_instruction	=  false ;

		// A flag that tells whether the Separator and BlockSeparator properties are identical
		$same_separators		=  ( $this -> Separator  ==  $this -> BlockSeparator ) ;

		// Instruction count (used for handling execution timeouts)
		$instruction_count		=  0 ;

		// Unprocessed markers
		$unprocessed_marker_count	=  count ( $this -> UnprocessedMarkerList [ 'font' ] ) ;

		// Loop through instructions
		while  ( ( $instruction =  $this -> __next_instruction ( $page_number, $data, $data_length, $data_index, $current_template ) )  !==  false )
		   {
			$fragment	=  '' ;

			$instruction_count ++ ;

			// Timeout handling - don't test for every instruction processed
			if  ( ! ( $instruction_count % 100 ) )
			   {
				// Global timeout handling
				if  ( $this -> Options  &  self::PDFOPT_ENFORCE_GLOBAL_EXECUTION_TIME )
				   {
					$now	=  microtime ( true ) ;

					if  ( $now - self::$GlobalExecutionStartTime  >  self::$MaxGlobalExecutionTime )
						error ( new PdfToTextTimeoutException ( "file {$this -> Filename}", true, self::$PhpMaxExecutionTime, self::$MaxGlobalExecutionTime ) ) ;
				    }

				// Per-instance timeout handling
				if  ( $this -> Options  &  self::PDFOPT_ENFORCE_EXECUTION_TIME )
				   {
					$now	=  microtime ( true ) ;

					if  ( $now - $this -> ExecutionStartTime  >  $this -> MaxExecutionTime )
						error ( new PdfToTextTimeoutException ( "file {$this -> Filename}", false, self::$PhpMaxExecutionTime, $this -> MaxExecutionTime ) ) ;
				    }
			    }

			// Character position after the current instruction
			$data_index 	=  $instruction [ 'next' ] ;

			// Process current instruction
			switch  ( $instruction [ 'instruction' ] )
			   {
				// Raw text (enclosed by parentheses) or array text (enclosed within square brackets)
				// is returned as a single instruction
			   	case 	'text' :
					// Empty arrays of text may be encountered - ignore them
					if  ( ! count ( $instruction [ 'values' ] ) )
						break ;

					// Check if we have to insert a newline
			   		if ( ! $use_same_line )
					   {
			   			$fragment 		.=  $this -> EOL ;
						$needs_separator	 =  false ;
					    }
			   		// Roughly simulate spacing between lines by inserting newline characters
			   		else if  ( $extra_newlines  > 0 )
			   		   {
			   			$fragment 		.=  str_repeat ( $this -> EOL, $extra_newlines ) ;
			   			$extra_newlines		 =  0 ;
						$needs_separator	 =  false ;
			   		    }
					else
						$needs_separator	=  true ;

					// Add a separator if necessary
					if  ( $needs_separator )
					   {
						// If the Separator and BlockSeparator properties are the same (and not empty), only add a block separator if
						// the current result does not end with it
						if  ( $same_separators )
						   {
							if  ( $this -> Separator  !=  ''  &&  substr ( $fragment, - $separator_length )  !=  $this -> BlockSeparator )
								$fragment		.=  $this -> BlockSeparator ;
						    }
						else
							$fragment		.=  $this -> BlockSeparator ;
					    }

					$needs_separator	=  true ;
					$value_index		=  0 ;

					// Fonts having character maps will require some special processing
					if  ( $current_font_mapped )
					   {
					   	// Loop through each text value
			   			foreach  ( $instruction [ 'values' ]  as  $text )
			   			   {
			   		   		$is_hex 	=  ( $text [0]  ==  '<' ) ;
			   			   	$length 	=  strlen ( $text ) - 1 ;
							$handled	=  false ;

			   			   	// Characters are encoded within angle brackets ( "<>" ).
							// Note that several characters can be specified within the same angle brackets, so we have to take
							// into account the width we detected in the begincodespancerange construct
			   			   	if  ( $is_hex )
			   			   	   {
			   			   	   	for  ( $i = 1 ; $i  <  $length ; $i += $current_font_map_width )
			   			   	   	   {
									$value		 =  substr ( $text, $i, $current_font_map_width ) ;
			   			   	   	   	$ch 		 =  hexdec ( $value ) ;

									if  ( isset ( $this -> CharacterMapBuffer [ $current_font ] [ $ch ] ) )
										$newchar	=  $this -> CharacterMapBuffer [ $current_font ] [ $ch ] ;
									else if  ( $current_font  ==  -1 )
									   {
										$newchar	=  chr ( $ch ) ;
									    }
									else
									   {
										$newchar	 =  $this -> FontTable -> MapCharacter ( $current_font, $ch ) ;
										$this -> CharacterMapBuffer [ $current_font ] [ $ch ]		=  $newchar ;
									    }

			   			   			$fragment		.=  $newchar ;
			   			   	   	    }

								$handled	 =  true ;
			   			   	    }
							// Yes ! double-byte codes can also be specified as plain text within parentheses !
							// However, we have to be really careful here ; the sequence :
							//	(Be)
							// can mean the string "Be" or the Unicode character 0x4265 ('B' = 0x42, 'e' = 0x65)
							// We first look if the character map contains an entry for Unicode codepoint 0x4265 ;
							// if not, then we have to consider that it is regular text to be taken one character by
							// one character. In this case, we fall back to the "if ( ! $handled )" condition
							else if  ( $current_font_map_width  ==  4  )
							   {
								$temp_result		=  '' ;

								for  ( $i = 1 ; $i  <  $length ; $i ++ )
								   {
									// Each character in the pair may be a backslash, which escapes the next character so we must skip it
									// This code needs to be reviewed ; the same code is duplicated to handle escaped characters in octal notation
									if  ( $text [$i]  !=  '\\' )
										$ch1	=  $text [$i] ;
									else
									   {
										$i ++ ;

										if  ( $text [$i]  <  '0'  ||  $text [$i]  >  '7' )
											$ch1	=  $this -> ProcessEscapedCharacter ( $text [$i] ) ;
										else
										   {
											$oct		=  '' ;
											$digit_count	=  0 ;

											while  ( $i  <  $length  &&  $text [$i]  >=  '0'  &&  $text [$i]  <=  '7'  &&  $digit_count  <  3 )
											   {
												$oct	.=  $text [$i ++] ;
												$digit_count ++ ;
											    }

											$ch1	=  chr ( octdec ( $oct ) ) ;
											$i -- ;
										    }
									    }

									$i ++ ;

									if  ( $text [$i]  != '\\' )
										$ch2	=  $text [$i] ;
									else
									   {
										$i ++ ;

										if  ( $text [$i]  <  '0'  ||  $text [$i]  >  '7' )
											$ch2	=  $this -> ProcessEscapedCharacter ( $text [$i] ) ;
										else
										   {
											$oct		=  '' ;
											$digit_count	=  0 ;

											while  ( $i  <  $length  &&  $text [$i]  >=  '0'  &&  $text [$i]  <=  '7'  &&  $digit_count  <  3 )
											   {
												$oct	.=  $text [$i ++] ;
												$digit_count ++ ;
											    }

											$ch2	=  chr ( octdec ( $oct ) ) ;
											$i -- ;
										    }
									    }

									// Build the 2-bytes character code
									$ch		=  ( ord ( $ch1 )  <<  8 )  |  ord ( $ch2 ) ;

									if  ( isset ( $this -> CharacterMapBuffer [ $current_font ] [ $ch ] ) )
										$newchar	=  $this -> CharacterMapBuffer [ $current_font ] [ $ch ] ;
									else
									   {
										$newchar	=  $this -> FontTable -> MapCharacter ( $current_font, $ch, true ) ;
										$this -> CharacterMapBuffer [ $current_font ] [ $ch ]		=  $newchar ;
									    }

									// Yes !!! for characters encoded with two bytes, we can find the following construct :
									//	0x00 "\" "(" 0x00 "C" 0x00 "a" 0x00 "r" 0x00 "\" ")"
									// which must be expanded as : (Car)
									// We have here the escape sequences "\(" and "\)", but the backslash is encoded on two bytes
									// (although the MSB is nul), while the escaped character is encoded on 1 byte. waiting
									// for the next quirk to happen...
									if  ( $newchar  ==  '\\'  &&  isset ( $text [ $i + 2 ] ) )
									   {
										$newchar		=  $this -> ProcessEscapedCharacter ( $text [ $i + 2 ] ) ;
										$i ++ ;		// this time we processed 3 bytes, not 2
									    }

									$temp_result		.=  $newchar ;
								    }

								// Happens only if we were unable to translate a character using the current character map
								$fragment		.=  $temp_result ;
								$handled	 =  true ;
							    }

							// Character strings within parentheses.
							// For every text value, use the character map table for substitutions
							if  ( ! $handled )
							   {
				   		   		for  ( $i = 1 ; $i  <  $length ; $i ++ )
				   		   		   {
				   		   			$ch 		=  $text [$i] ;

									// Set to true to optimize calls to MapCharacters
									// Currently does not work with [email protected]/infoma.pdf (a few characters differ)
									$use_map_buffer	=  false ;

									// ... but don't forget to handle escape sequences "\n" and "\r" for characters
									// 10 and 13
				   		   			if  ( $ch  ==  '\\' )
				   		   			   {
				   		   				$ch 	=  $text [++$i] ;

										// Escaped character
										if  ( $ch  <  '0'  ||  $ch  >  '7' )
											$ch		=  $this -> ProcessEscapedCharacter ( $ch ) ;
										// However, an octal form can also be specified ; in this case we have to take into account
										// the character width for the current font (if the character width is 4 hex digits, then we
										// will encounter constructs such as "\000\077").
										// The method used here is dirty : we build a regex to match octal character representations on a substring
										// of the text
										else
										   {
											$width		=  $current_font_map_width / 2 ;	// Convert to byte count
											$subtext	=  substr ( $text, $i - 1 ) ;
											$regex		=  "#^ (\\\\ [0-7]{3}){1,$width} #imsx" ;

											$status		=  preg_match ( $regex, $subtext, $octal_matches ) ;

											if  ( $status )
											   {
												$octal_values	=  explode ( '\\', substr ( $octal_matches [0], 1 ) ) ;
												$ord		=  0 ;

												foreach  ( $octal_values  as  $octal_value )
													$ord	=  ( $ord  <<  8 ) + octdec ( $octal_value ) ;

												$ch	 =  chr ( $ord ) ;
												$i	+=  strlen ( $octal_matches [0] ) - 2 ;
											    }
										    }

										$use_map_buffer		=  false ;
				   		   			    }

									// Add substituted character to the output result
									$ord		 =  ord ( $ch ) ;

									if  ( ! $use_map_buffer )
										$newchar	 =  $this -> FontTable -> MapCharacter ( $current_font, $ord ) ;
									else
									   {
										if  ( isset ( $this -> CharacterMapBuffer [ $current_font ] [ $ord ] ) )
											$newchar	=  $this -> CharacterMapBuffer [ $current_font ] [ $ord ] ;
										else
										   {
											$newchar	 =  $this -> FontTable -> MapCharacter ( $current_font, $ord ) ;
											$this -> CharacterMapBuffer [ $current_font ] [ $ord ]	=  $newchar ;
										    }
									    }

									$fragment		.=  $newchar ;
				   		   		    }
							    }

							// Handle offsets between blocks of characters
							if  ( isset ( $instruction [ 'offsets' ] [ $value_index ] )  &&
									- ( $instruction [ 'offsets' ] [ $value_index ] )  >  $this -> MinSpaceWidth )
								$fragment		.=  $this -> __get_character_padding ( $instruction [ 'offsets' ] [ $value_index ] ) ;

							$value_index ++ ;
			   		   	    }
			   		    }
					// For fonts having no associated character map, we simply encode the string in UTF8
					// after the C-like escape sequences have been processed
					// Note that <xxxx> constructs can be encountered here, so we have to process them as well
			   		else
			   		   {
			   			foreach  ( $instruction [ 'values' ]  as  $text )
			   			   {
			   			   	$is_hex 	=  ( $text [0]  ==  '<' ) ;
			   			   	$length 	=  strlen ( $text ) - 1 ;

							// Some text within parentheses may have a backslash followed by a newline, to indicate some continuation line.
							// Example :
							//	(this is a sentence \
							//	 continued on the next line)
							// Funny isn't it ? so remove such constructs because we don't care
							$text		=  str_replace ( array ( "\\\r\n", "\\\r", "\\\n" ), '', $text ) ;

			   			   	// Characters are encoded within angle brackets ( "<>" )
			   			   	if  ( $is_hex )
			   			   	   {
			   			   	   	for  ( $i = 1 ; $i  <  $length ; $i += 2 )
			   			   	   	   {
			   			   	   	   	$ch 	=  hexdec ( substr ( $text, $i, 2 ) ) ;

			   			   			$fragment .=  $this -> CodePointToUtf8 ( $ch ) ;
			   			   	   	    }
			   			   	    }
							// Characters are plain text
			   			   	else
							   {
								$text	=  self::Unescape ( $text ) ;

								for  ( $i = 1, $length = strlen ( $text ) - 1 ; $i  <  $length ; $i ++ )
								   {
									$ch	=  $text [$i] ;
									$ord	=  ord ( $ch ) ;

									if  ( $ord  <  127 )
										$newchar	=  $ch ;
									else
									   {
										if  ( isset ( $this -> CharacterMapBuffer [ $current_font ] [ $ord ] ) )
											$newchar	=  $this -> CharacterMapBuffer [ $current_font ] [ $ord ] ;
										else
										   {
											$newchar	=  $this -> FontTable -> MapCharacter ( $current_font, $ord ) ;
											$this -> CharacterMapBuffer [ $current_font ] [ $ord ]	=  $newchar ;
										    }
									    }

									$fragment		.=  $newchar ;
								    }
							    }

							// Handle offsets between blocks of characters
							if  ( isset ( $instruction [ 'offsets' ] [ $value_index ] )  &&
									abs ( $instruction [ 'offsets' ] [ $value_index ] )  >  $this -> MinSpaceWidth )
								$fragment		.=  $this -> __get_character_padding ( $instruction [ 'offsets' ] [ $value_index ] ) ;

							$value_index ++ ;
			   			   }
			   		    }

					// Process the markers which do not have an associated font yet - this will be done by matching
					// the current text fragment against one of the regular expressions defined.
					// If a match occurs, then all the subsequent text fragment using the same font will be put markers
					for  ( $j = 0 ; $j  <  $unprocessed_marker_count ; $j ++ )
					   {
						$marker			=  $this -> UnprocessedMarkerList [ 'font' ] [$j] ;

						if  ( preg_match ( $marker [ 'regex' ], trim ( $fragment ) ) )
						   {
							$this -> TextWithFontMarkers [ $current_font ]	=  array
							   (
								'font'		=>  $current_font,
								'height'	=>  $current_font_size,
								'regex'		=>  $marker   [ 'regex' ],
								'start'		=>  $marker   [ 'start' ],
								'end'		=>  $marker   [ 'end'   ]
							    ) ;

							$unprocessed_marker_count -- ;
							unset ( $this -> UnprocessedMarkerList [ 'font' ] [$j] ) ;

							break ;
						    }
					    }

					// Check if we need to add markers around this text fragment
					if  ( isset ( $this -> TextWithFontMarkers [ $current_font ] )  &&
							$this -> TextWithFontMarkers [ $current_font ] [ 'height' ]  ==  $current_font_size )
					    {
						$fragment	=  $this -> TextWithFontMarkers [ $current_font ] [ 'start' ] .
								   $fragment .
								   $this -> TextWithFontMarkers [ $current_font ] [ 'end' ] ;
					     }

					$result		.=  $fragment ;

					break ;

				// An "nl" instruction means TJ, Tj, T* or "'"
			   	case 	'nl' :
			   		if  ( ! $instruction [ 'conditional' ] )
			   		   {
			   		   	if  ( $instruction [ 'leading' ]  &&  $text_leading  &&  $current_font_size )
			   		   	   {
			   		   		$count 	=  ( integer ) ( ( $text_leading - $current_font_size ) / $current_font_size ) ;

			   		   		if  ( ! $count )
			   		   			$count 	=  1 ;
			   		   	    }
			   		   	else
			   		   		$count 	=  1 ;

		   		   		$extra			 =  str_repeat ( PHP_EOL, $count ) ;
			   			$result 		.=  $extra ;
						$needs_separator	 =  false ;
						$last_goto_y 		-=  ( $count * $text_leading ) ;	// Approximation on y-coord change
						$last_relative_goto_y	 =  0 ;
			   		    }

			   		break ;

				// "Tm", "Td" or "TD" : Output text on the same line, if the "y" coordinates are equal
			   	case 	'goto' :
					// Some text is positioned using 'Tm' instructions ; however they can be immediatley followed by 'Td' instructions
					// which give a relative positioning ; so consider that the last instruction wins
					if  ( $instruction [ 'relative' ] )
					   {
						// Try to put a separator if the x coordinate is non-zero
						//if  ( $instruction [ 'x' ] - $last_goto_x  >=  $current_font_size )
						//	$result		.=  $this -> Separator ;

						$discard_last_instruction	=  true ;
						$extra_newlines			=  0 ;
						$use_same_line			=  ( ( $last_relative_goto_y - abs ( $instruction  [ 'y' ] ) )  <=  $current_font_size ) ;
						$last_relative_goto_y		=  abs ( $instruction [ 'y' ] ) ;
						$last_goto_x			=  $instruction [ 'x' ] ;

						if  ( - $instruction [ 'y' ]  >  $current_font_size )
						   {
							$use_same_line		=  false ;

							if  ( $last_relative_goto_y )
								$extra_newlines		=  ( integer ) ( $current_font_size / $last_relative_goto_y ) ;
							else
								$extra_newlines		=  0 ;
						    }
						else if  ( ! $instruction [ 'y' ] )
						   {
							$use_same_line		=  true ;
							$extra_newlines		=  0 ;
						    }

						break ;
					    }
					else
						$last_relative_goto_y	=  0 ;

					$y	=  $last_goto_y + $last_relative_goto_y ;

			   		if  ( $instruction [ 'y' ]  ==  $y  ||  abs ( $instruction [ 'y' ] - $y )  <  $current_font_size )
			   		   {
			   			$use_same_line 		=  true ;
			   			$extra_newlines 	=  0 ;
			   		    }
					else
					   {
					   	// Compute the number of newlines we have to insert between the current and the next lines
					   	if  ( $current_font_size )
					   		$extra_newlines =  ( integer ) ( ( $y - $instruction [ 'y' ] - $current_font_size ) / $current_font_size ) ;

						$use_same_line 		=  ( $last_goto_y  ==  0 ) ;
					    }

					$last_goto_y 		=  $instruction [ 'y' ] ;
			   		break ;

				// Set font size
			   	case 	'fontsize' :
			   		$current_font_size 	=  $instruction [ 'size' ] ;
			   		break ;

				// "/Rx" : sets the current font
			   	case 	'resource' :
			   		$current_font 		=  $instruction [ 'resource' ] ;

					$this -> FontTable -> GetFontAttributes ( $page_number, $current_template, $current_font, $current_font_map_width, $current_font_mapped ) ;
			   		break ;

				// "/TPLx" : references a template, which can contain additional font aliases
				case	'template' :
					if  ( $this -> PageMap -> IsValidXObjectName ( $instruction [ 'token' ] ) )
						$current_template	=  $instruction [ 'token' ] ;

					break ;

			   	// 'TL' : text leading to be used for the next "T*" in the flow
			   	case	'leading' :
					if  ( ! ( $this -> Options & self::PDFOPT_IGNORE_TEXT_LEADING ) )
			   			$text_leading 		=  $instruction [ 'size' ] ;

			   		break ;


				// 'ET' : we have to reset a few things here
				case	'ET' :
					$current_font			=  -1 ;
					$current_font_map_width		=  2 ;
					break ;
			    }

			// Remember last instruction - this will help us into determining whether we should put the next text
			// on the current or following line
			if  ( ! $discard_last_instruction )
				$last_instruction 	=  $instruction ;

			$discard_last_instruction	=  false ;
		    }

		return ( $this -> __rtl_process ( $result ) ) ;
	    }


	// __next_instruction -
	//	Retrieves the next instruction from the drawing text block.
	private function  __next_instruction ( $page_number, $data, $data_length, $index, $current_template )
	   {
		static 	$last_instruction 	=  false ;

		$ch	=  '' ;

		// Constructs such as
		if  ( $last_instruction )
		   {
			$result 		=  $last_instruction ;
			$last_instruction	=  false ;

			return ( $result ) ;
		    }

		// Whether we should compute enhanced statistics
		$enhanced_statistics		=  $this -> EnhancedStatistics ;

		// Holds the floating-point values encountered so far
		$number_stack 	=  array ( ) ;

		// Loop through the stream of tokens
		while  ( ( $part = $this -> __next_token ( $page_number, $data, $data_length, $index ) )  !==  false )
		   {
			$token 		=  $part [0] ;
			$next_index 	=  $part [1] ;

			// Floating-point number : push it onto the stack
			if  ( ( $token [0]  >=  '0'  &&  $token [0]  <=  '9' )  ||  $token [0]  ==  '-'  ||  $token [0]  ==  '+'  ||  $token [0]  ==  '.' )
			   {
				$number_stack []	=  $token ;
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'operand' ] ++ ;
			    }
			// 'Tm' instruction : return a "goto" instruction with the x and y coordinates
			else if  ( $token  ==  'Tm' )
			   {
				$x 	=  $number_stack [4] ;
				$y 	=  $number_stack [5] ;

				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'Tm' ] ++ ;

				return ( array ( 'instruction' => 'goto', 'next' => $next_index, 'x' => $x, 'y' => $y, 'relative' => false, 'token' => $token ) ) ;
			    }
			// 'Td' or 'TD' instructions : return a goto instruction with the x and y coordinates (1st and 2nd args)
			else if  ( $token  ==  'Td'  ||  $token  ==  'TD' )
			   {
				$x 		=  $number_stack [0] ;
				$y 		=  $number_stack [1] ;

				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ $token ] ++ ;

				return ( array ( 'instruction' => 'goto', 'next' => $next_index, 'x' => $x, 'y' => $y, 'relative' => true, 'token' => $token ) ) ;
			    }
			// Output text "'" instruction, with conditional newline
			else if  ( $token [0]  ==  "'" )
			   {
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ "'" ] ++ ;

				return ( array ( 'instruction' => 'nl', 'next' => $next_index, 'conditional' => true, 'leading' => false, 'token' => $token ) ) ;
			    }
			// Same as above
			else if  ( $token  ==  'TJ'  ||  $token  ==  'Tj' )
			   {
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ $token ] ++ ;

				return ( array ( 'instruction' => 'nl', 'next' => $next_index, 'conditional' => true, 'leading' => false, 'token' => $token ) ) ;
			    }
			// Set font size
			else if  ( $token  ==  'Tf' )
			    {
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'Tf' ] ++ ;

				return ( array ( 'instruction' => 'fontsize', 'next' => $next_index, 'size' => $number_stack [0], 'token' => $token ) ) ;
			     }
			// Text leading (spacing used by T*)
			else if  ( $token  ==  'TL' )
			   {
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'TL' ] ++ ;

				return ( array ( 'instruction' => 'leading', 'next' => $next_index, 'size' => $number_stack [0], 'token' => $token ) ) ;
			    }
			// Position to next line
			else if  ( $token  ==  'T*' )
			    {
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'T*' ] ++ ;

				return ( array ( 'instruction' => 'nl', 'next' => $next_index, 'conditional' => true, 'leading' => true ) ) ;
			     }
			// Draw object ("Do"). To prevent different text shapes to appear on the same line, we return a "newline" instruction
			// here. Note that the shape position is not taken into account here, and shapes will be processed in the order they
			// appear in the pdf file (which is likely to be different from their position on a graphic screen).
			else if  ( $token  ==  'Do' )
			   {
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'ignored' ] ++ ;

				return ( array ( 'instruction' => 'nl', 'next' => $next_index, 'conditional' => false, 'leading' => false, 'token' => $token ) ) ;
			    }
			// Raw text output
			else if  ( $token [0]  ==  '(' )
			   {
			   	$next_part 	=  $this -> __next_token ( $page_number, $data, $data_length, $next_index, $enhanced_statistics ) ;
			   	$instruction	=  array ( 'instruction' => 'text', 'next' => $next_index, 'values' => array ( $token ), 'token' => $token ) ;
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ '(' ] ++ ;

			   	if  ( $next_part [0]  ==  "'" )
			   	   {
			   	   	$last_instruction  	=  $instruction ;
			   	   	return ( array ( 'instruction' => 'nl', 'next' => $next_index, 'conditional' => false, 'leading' => true, 'token' => $token ) ) ;
			   	   }
			   	else
					return ( $instruction ) ;
			    }
			// Hex digits within angle brackets
		   	else if  ( $token [0]  ==  '<'  )
			   {
				$ch		=  $token [1] ;
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ '<' ] ++ ;
			   	$instruction	=  array ( 'instruction' => 'text', 'next' => $next_index, 'values' => array ( $token ), 'token' => $token ) ;

				if  ( self::$CharacterClasses [ $ch ] & self::CTYPE_ALNUM )
				   {
			   		$next_part 	=  $this -> __next_token ( $page_number, $data, $data_length, $next_index ) ;
			   		$instruction	=  array ( 'instruction' => 'text', 'next' => $next_index, 'values' => array ( $token ), 'token' => $token ) ;

			   		if  ( $next_part [0]  ==  "'" )
			   		   {
			   	   		$last_instruction  	=  $instruction ;
			   	   		return ( array ( 'instruction' => 'nl', 'next' => $next_index, 'conditional' => false, 'leading' => true, 'token' => $token ) ) ;
			   		   }
			   		else
						return ( $instruction ) ;
				    }
			    }
			    // Text specified as an array of individual raw text elements, and individual interspaces between characters
			else if  ( $token [0]  ==  '[' )
			   {
				$values 	=  $this -> __extract_chars_from_array ( $token ) ;
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ '[' ] ++ ;
				$instruction 	=  array ( 'instruction' => 'text', 'next' => $next_index, 'values' => $values [0], 'offsets' => $values [1], 'token' => $token ) ;

				return ( $instruction ) ;
			    }
			// Token starts with a slash : maybe a font specification
			else if  ( preg_match ( '#^ ( ' . self::$FontSpecifiers . ' ) #ix', $token ) )
			   {
				$key	=  "$page_number:$current_template:$token" ;
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'operand' ] ++ ;

				if  ( isset ( $this -> MapIdBuffer [ $key ] ) )
					$id	=   $this -> MapIdBuffer [ $key ] ;
				else
				   {
					$id 	=  $this -> FontTable -> GetFontByMapId ( $page_number, $current_template, $token ) ;

					$this -> MapIdBuffer [ $key ]	=  $id ;
				    }

				return ( array ( 'instruction' => 'resource', 'next' => $next_index, 'resource' => $id, 'token' => $token ) ) ;
			    }
			// Template reference, such as /TPL1. Each reference has initially been replaced by !PDFTOTEXT_TEMPLATE_TPLx during substitution
			// by ProcessTemplateReferences(), because templates not only specify text to be replaced, but also font aliases
			// -and this is the place where we catch font aliases in this case
			else if  ( preg_match ( '/ !PDFTOTEXT_TEMPLATE_ (?P<template> \w+) /ix', $token, $match ) )
			   {
				$current_template	=  '/' . $match [ 'template' ] ;
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'template' ] ++ ;

				return ( array ( 'instruction' => 'template', 'next' => $next_index, 'token' => $current_template ) ) ;
			    }
			// Others, only counted for statistics
			else if  ( $token  ===  'cm' )
			   {
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'cm' ] ++ ;
			    }
			else if  ( $token  ===  'BT' )
			   {
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'BT' ] ++ ;

				return ( array ( 'instruction' => 'BT', 'next' => $next_index, 'token' => $token ) ) ;
			    }
			else if  ( $token  ==  'ET' )	// Nothing special to count here
			   {
				return ( array ( 'instruction' => 'ET', 'next' => $next_index, 'token' => $token ) ) ;
			    }
			// Other instructions : we're not that much interested in them, so clear the number stack and consider
			// that the current parameters, floating-point values, have been processed
			else
			   {
				$number_stack 	=  array ( ) ;
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'ignored' ] ++ ;
			    }

			$index 		=  $next_index ;
		    }

		// End of input
		return ( false ) ;
	    }


	// __next_token :
	//	Retrieves the next token from the drawing instructions stream.
	private function  __next_token ( $page_number, $data, $data_length, $index )
	   {
		// Skip spaces
		$count		=  0 ;

		while  ( $index  <  $data_length  &&  ( $data [ $index ]  ==  ' '  ||  $data [ $index ]  ==  "\t"  ||  $data [ $index ]  ==  "\r"  ||  $data [ $index ]  ==  "\n" ) )
		   {
			$index ++ ;
			$count ++ ;
		    }

		$enhanced_statistics	=  $this -> EnhancedStatistics ;
		$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'space' ]	+=  $count ;

		// End of input
		if  ( $index  >=  $data_length )
			return ( false ) ;

		// The current character will tell us what to do
		$ch 	=  $data [ $index ] ;
		$ch2	=  '' ;

		switch ( $ch )
		   {
			// Opening square bracket : we have to find the closing one, taking care of escape sequences
			// that can also specify a square bracket, such as "\]"
		   	case 	"[" :
		   		$pos 		=  $index + 1 ;
		   		$parent 	=  0 ;
		   		$angle 		=  0 ;
		   		$result		=  $ch ;

		   		while  ( $pos  <  $data_length )
		   		   {
		   			$nch 	=  $data [ $pos ++ ] ;

		   			switch  ( $nch )
		   			   {
		   			   	case 	'(' :
		   			   		$parent ++ ;
		   			   		$result 	.=  $nch ;
		   			   		break ;

		   			   	case 	')' :
		   			   		$parent -- ;
		   			   		$result 	.=  $nch ;
		   			   		break ;

		   			   	case 	'<' :
							// Although the array notation can contain hex digits between angle brackets, we have to
							// take care that we do not have an angle bracket between two parentheses such as :
							// [ (<) ... ]
							if  ( ! $parent )
		   			   			$angle ++ ;

		   			   		$result 	.=  $nch ;
		   			   		break ;

		   			   	case 	'>' :
							if  ( ! $parent )
		   			   			$angle -- ;

		   			   		$result 	.=  $nch ;
		   			   		break ;

		   			   	case 	'\\' :
		   					$result 	.=  $nch . $data [ $pos ++ ] ;
		   					break ;

		   			   	case 	']' :
		   					$result 	.=  ']' ;

		   					if  ( ! $parent  )
		   						break  2 ;
		   					else
		   						break ;

						case	"\n" :
						case	"\r" :
							break ;

		   			   	default :
		   			   		$result 	.=  $nch ;
		   			    }
		   		    }

		   		return ( array ( $result, $pos ) ) ;

			// Parenthesis : Again, we have to find the closing parenthesis, taking care of escape sequences
			// such as "\)"
		   	case 	"(" :
		   		$pos 		=  $index + 1 ;
		   		$result		=  $ch ;

		   		while  ( $pos  <  $data_length )
		   		   {
		   			$nch 	=  $data [ $pos ++ ] ;

		   			if  ( $nch  ==  '\\' )
					   {
						$after		 =  $data [ $pos ] ;

						// Character references specified as \xyz, where "xyz" are octal digits
						if  ( $after  >=  '0'  &&  $after  <=  '7' )
						   {
							$result		.=  $nch ;

							while  ( $data [ $pos ]  >=  '0'  &&  $data [ $pos ]  <=  '7' )
								$result		.=  $data [ $pos ++ ] ;
						    }
						// Regular character escapes
						else
		   					$result 	.=  $nch . $data [ $pos ++ ] ;
					    }
		   			else if  ( $nch  ==  ')' )
		   			   {
		   				$result 	.=  ')' ;
		   				break ;
		   			    }
		   			else
		   				$result 	.=  $nch ;
		   		   }

		   		return ( array ( $result, $pos ) ) ;

			// A construction of the form : "<< something >>", or a unicode character
		   	case 	'<' :
				if  ( ! isset ( $data [ $index + 1 ] ) )
					return ( false ) ;

		   		if (  $data [ $index + 1 ]  ==  '<' )
		   		   {
		   		   	$pos 	=  strpos ( $data, '>>', $index + 2 ) ;

		   			if  ( $pos  ===  false )
		   				return ( false ) ;

		   			return ( array ( substr ( $data, $index, $pos - $index + 2 ), $pos + 2 ) ) ;
		   		    }
		   		else
		   		   {
		   		   	$pos 	=  strpos ( $data, '>', $index + 2 ) ;

		   			if  ( $pos  ===  false )
		   				return ( false ) ;

					// There can be spaces and newlines inside a series of hex digits, so remove them...
					$result			=  preg_replace ( '/\s+/', '', substr ( $data, $index, $pos - $index + 1 ) ) ;

		   			return ( array ( $result, $pos + 1 ) ) ;
		   		   }

			// Tick character : consider it as a keyword, in the same way as the "TJ" or "Tj" keywords
		   	case 	"'" :
		   		return ( array ( "'", $index + 1 ) ) ;

			// Other cases : this may be either a floating-point number or a keyword
		   	default :
		   		$index ++ ;
		   		$value 	=  $ch ;

				if  ( isset ( $data [ $index ] ) )
				   {
		   			if ( ( self::$CharacterClasses [ $ch ] & self::CTYPE_DIGIT )  ||
							$ch  ==  '-'  ||  $ch  ==  '+'  ||  $ch  ==  '.' )
		   			   {
		   				while  ( $index  <  $data_length  &&
		   						( ( self::$CharacterClasses [ $data [ $index ] ] & self::CTYPE_DIGIT )  ||
									$data [ $index ]  ==  '.' ) )
		   					$value 	.=  $data [ $index ++ ] ;
		   			    }
		   			else if  ( ( self::$CharacterClasses [ $ch ] & self::CTYPE_ALPHA )  ||
							$ch  ==  '/'  ||  $ch  ==  '!' )
		   			   {
						$ch	=  $data [ $index ] ;

						while  ( $index  <  $data_length  &&
							( ( self::$CharacterClasses [ $ch ] & self::CTYPE_ALNUM )  ||
								$ch  ==  '*'  ||  $ch  ==  '-'  ||  $ch  ==  '_'  ||  $ch  ==  '.'  ||  $ch  ==  '+' ) )
						   {
							$value 	.=  $ch ;
							$index ++ ;

							if  ( isset ( $data [ $index ] ) )
								$ch	=  $data [ $index ] ;
						    }
		   			    }
				    }

		   		return ( array ( $value, $index ) ) ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        ExtractTextWithLayout - Extracts text, trying to render the page layout.

		$text 	=  $this -> ExtractTextWithLayout ( $page_number, $object_id, $data, &$current_font ) ;

	    DESCRIPTION
	        Extracts text from decoded stream contents, trying to render the layout.

	    PARAMETERS
		$page_number (integer) -
			¨Page number that contains the text to be extracted.

	    	$object_id (integer) -
	    		Object id of this text block.

	    	$data (string) -
	    		Stream contents.

		$current_font (integer) -
			Id of the current font, which should be found in the $this->FontTable property, if anything
			went ok.
			This parameter is required, since text blocks may not specify a new font resource id and reuse
			the one that waas set before.

	    RETURN VALUE
		Returns the decoded text.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  ExtractTextWithLayout ( &$page_fragments, $page_number, $object_id, $data, &$current_font )
	   {
		// Characters that can start a numeric operand
		static		$numeric_starts		=  array
		   (
			'+' => true, '-' => true, '.' => true, '0' => true, '1' => true, '2' => true, '3' => true, '4' => true,
			'5' => true, '6' => true, '7' => true, '8' => true, '9' => true
		    ) ;
		// Initial (default) transformation matrix. To reflect the PDF specifications, we will keep it as a 6 elements array :
		//	[ sx tx ty sy x y ]
		// (although tx and ty are not useful here, since they affect the graphic orientation of the text)
		// sx and sy are scaling parameters, actually a multiplier for the x and y parameters. We only keep
		static		$IdentityMatrix		=  array ( 1, 0, 0, 1, 0, 0 ) ;

		// Remove useless instructions
		$new_data	=  $this -> __strip_useless_instructions ( $data ) ;

		if  ( self::$DEBUG )
		   {
			echo "\n----------------------------------- TEXT #$object_id (size = " . strlen ( $data ) . " bytes, new size = " . strlen ( $new_data ) . " bytes)\n" ;
			echo $data ;
			echo "\n----------------------------------- OPTIMIZED TEXT #$object_id (size = " . strlen ( $data ) . " bytes, new size = " . strlen ( $new_data ) . " bytes)\n" ;
			echo $new_data ;
		    }

		$data					=  $new_data ;
		$data_length 				=  strlen ( $data ) ;		// Data length

		$page_fragment_count			=  count ( $page_fragments ) ;

		// Index into the specified block of text-drawing instructions
		$data_index 				=  0 ;

		// Text matrices
		$CTM					=
		$Tm					=  $IdentityMatrix ;

		// Nesting level of BT..ET instructions (Begin text/End text) - they are not nestable but be prepared to meet buggy PDFs
		$BT_nesting_level			=  0 ;

		// Current font data
		$current_font_height			=  0 ;

		// Current font map width, in bytes, plus a flag saying whether the current font is mapped or not
		$current_template	=  '' ;
		$current_font_name	=  '' ;
		$this -> FontTable -> GetFontAttributes ( $page_number, $current_template, $current_font, $current_font_map_width, $current_font_mapped ) ;

		// Operand stack
		$operand_stack				=  array ( ) ;

		// Number of tokens processed so far
		$token_count				=  0 ;

		// Page attributes
		$page_attributes			=  $this -> PageMap -> PageAttributes [ $page_number ] ;

		// Graphics context stack - well, we only store here the current transformation matrix
		$graphic_stack				=  array ( ) ;
		$graphic_stack_size			=  0 ;

		// Global/local execution time measurements
		$tokens_between_timechecks	=  1000 ;
		$enforce_global_execution_time	=  $this -> Options  &  self::PDFOPT_ENFORCE_GLOBAL_EXECUTION_TIME ;
		$enforce_local_execution_time	=  $this -> Options  &  self::PDFOPT_ENFORCE_EXECUTION_TIME ;
		$enforce_execution_time		=  $enforce_global_execution_time | $enforce_local_execution_time ;

		// Whether we should compute enhanced statistics
		$enhanced_statistics		=  $this -> EnhancedStatistics ;

		// Whether we should show debug coordinates
		$show_debug_coordinates		=  ( $this -> Options & self::PDFOPT_DEBUG_SHOW_COORDINATES ) ;

		// Text leading value set by the TL instruction
		$text_leading			=  0.0 ;

		// Loop through the stream of tokens
		while  ( $this -> __next_token_ex ( $page_number, $data, $data_length, $data_index, $token, $next_index )  !==  false )
		   {
			$token_start	=  $token [0] ;
			$token_count ++ ;
			$length		=  $next_index - $data_index - 1 ;

			// Check if we need to enforce execution time checking, to prevent PHP from terminating our script without any hope
			// of catching the error
			if  ( $enforce_execution_time  &&  ! ( $token_count % $tokens_between_timechecks ) )
			   {
				if  ( $enforce_global_execution_time )
				   {
					$now	=  microtime ( true ) ;

					if  ( $now - self::$GlobalExecutionStartTime  >  self::$MaxGlobalExecutionTime )
						error ( new PdfToTextTimeoutException ( "file {$this -> Filename}", true, self::$PhpMaxExecutionTime, self::$MaxGlobalExecutionTime ) ) ;
				    }

				// Per-instance timeout handling
				if  ( $enforce_local_execution_time )
				   {
					$now	=  microtime ( true ) ;

					if  ( $now - $this -> ExecutionStartTime  >  $this -> MaxExecutionTime )
						error ( new PdfToTextTimeoutException ( "file {$this -> Filename}", false, self::$PhpMaxExecutionTime, $this -> MaxExecutionTime ) ) ;
				    }
			    }

			/****************************************************************************************************************

				The order of the testings is important for maximum performance : put the most common cases first.
				A study on over 1000 PDF files has shown the following :

				- Instruction operands appear 24.5 million times
				- Tx instructions (including Tf, Tm, ', ", etc.) : 24M
				- (), <> and [] constructs for drawing text : 17M
				- Other : peanuts...
				- Ignored instructions : 0.5M (these are the instructions without interest for text extraction and that
				 could not be removed by the __strip_useless_instructions() method).

				Of course, white spaces appear more than 100M times between instructions. However, it gets hard to remove
				most of them without compromising the result of __strip_useless_instructions.

			 ***************************************************************************************************************/
			// Numeric or flag for an instruction
			if  ( $token_start  ==  '/'  ||  isset ( $numeric_starts [ $token_start ] ) )
			   {
				$operand_stack []		=  $token ;

				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'operand' ] ++ ;
			    }
			// A 2-characters "Tx" or a 1-character quote/doublequote instruction
			else if  ( ( $length  ===  2   &&  $token_start  ===  'T' )  ||  ( $length  ===  1  &&  ( $token_start  ===  "'"  ||  $token_start  ===  '"' ) ) )
			   {
				switch  ( ( $length  ===  1 ) ?  $token [0] : $token [1] )
				   {
					// Tj instruction
					case	'j' :
						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'Tj' ] ++ ;
						break ;

					// Tm instruction
					case	'm' :
						$Tm [0]			=  ( double ) $operand_stack [0] ;
						$Tm [1]			=  ( double ) $operand_stack [1] ;
						$Tm [2]			=  ( double ) $operand_stack [2] ;
						$Tm [3]			=  ( double ) $operand_stack [3] ;
						$Tm [4]			=  ( double ) $operand_stack [4] ;
						$Tm [5]			=  ( double ) $operand_stack [5] ;

						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'Tm' ] ++ ;
						break ;

					// Tf instruction
					case	'f' :
						$current_font_name	=  $operand_stack [0] ;
						$key			=  "$page_number:$current_template:$current_font_name" ;

						// We have to map a font specifier (such /TT0, C0-1, etc.) into an object id.
						// Check first if we already met this font
						if  ( isset ( $this -> MapIdBuffer [ $key ] ) )
							$current_font	=   $this -> MapIdBuffer [ $key ] ;
						// Otherwise retrieve its corresponding object number and put it in our font cache
						else
						   {
							$current_font 	=  $this -> FontTable -> GetFontByMapId ( $page_number, $current_template, $current_font_name ) ;

							$this -> MapIdBuffer [ $key ]	=  $current_font ;
						    }

						$current_font_height	=  ( double ) $operand_stack [1] ;
						$this -> FontTable -> GetFontAttributes ( $page_number, $current_template, $current_font, $current_font_map_width, $current_font_mapped ) ;
						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'Tf' ] ++ ;
						break ;

					// Td instruction
					case	'd' :
						$Tm [4]		+=  ( double ) $operand_stack [0] * abs ( $Tm [0] ) ;
						$Tm [5]		+=  ( double ) $operand_stack [1] * abs ( $Tm [3] ) ;

						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'Td' ] ++ ;
						break ;

					// TJ instruction
					case	'J' :
						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'TJ' ] ++ ;
						break ;

					// TD instruction
					case	'D' :
						$Tm [4]		 +=  ( double ) $operand_stack [0] * $Tm [0] ;
						$Tm [5]		 +=  ( double ) $operand_stack [1] * $Tm [3] ;
						$text_leading	 -=  $Tm [5] ;

						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'TD' ] ++ ;
						break ;

					// T* instruction
					case	'*' :
						$Tm [4]		 =  0.0 ;
						$Tm [5]		-=  $text_leading ; //$current_font_height ;

						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'T*' ] ++ ;
						break ;

					// TL instruction - Set text leading. Currently not used.
					case	'L' :
						$text_leading		=  ( double ) $operand_stack [0] ;
						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'TL' ] ++ ;
						break ;

					// ' instruction : go to next line and display text
					case	"'" :
						// Update the coordinates of the last text block found so far
						$page_fragments [ $page_fragment_count - 1 ] [ 'x' ]		+=  $text_leading ;
						$offset								 =  $current_font_height * abs ( $Tm [3] ) ;
						$page_fragments [ $page_fragment_count - 1 ] [ 'y' ]		-=  $offset ;

						// And don't forget to update the y coordinate of the current transformation matrix
						$Tm [5]								-=  $offset ;

						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ "'" ] ++ ;
						break ;

					// "'" instruction
					case	'"' :
						if  ( self::$DEBUG )
							warning ( "Instruction $token not yet implemented." ) ;

						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ '"' ] ++ ;
						break ;

					// Other : ignore them
					default :
						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'ignored' ] ++ ;
				    }

				$operand_stack		=  array ( ) ;
			    }
			// cm instruction
			else if  ( $token  ==  'cm' )
			   {
				$a		=  ( double ) $operand_stack [0] ;
				$b		=  ( double ) $operand_stack [1] ;
				$c		=  ( double ) $operand_stack [2] ;
				$d		=  ( double ) $operand_stack [3] ;
				$e		=  ( double ) $operand_stack [4] ;
				$f		=  ( double ) $operand_stack [5] ;

				$CTM		=  array ( $a, $b, $c, $d, $e, $f ) ;
				$operand_stack	=  array ( ) ;

				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'cm' ] ++ ;
			    }
			// q/Q instructions (save/restore graphic context)
			else if  ( $token  ===  'q'  )
			   {
				$graphic_stack [ $graphic_stack_size ++ ]	=  array ( $CTM, $Tm ) ;
				$operand_stack					=  array ( ) ;
			    }
			else if  ( $token  ===  'Q' )
			    {
				if  ( $graphic_stack_size )
					list ( $CTM, $Tm )				=  $graphic_stack [ -- $graphic_stack_size ] ;
				else if  ( self::$DEBUG )
					warning ( "Tried to restore graphics context from an empty stack." ) ;

				$operand_stack					=  array ( ) ;
			    }
			// Text array in the [...] notation. Well, in fact, even non-array constructs are returned as an array by the
			// __next_token() function, for the sake of simplicity
			else if  ( $token_start  ===  '[' )
			   {
				$text			=  $this -> __decode_text ( $token, $current_font, $current_font_mapped, $current_font_map_width ) ;

				if  ( $text  !==  '' )
				   {
					$r		=  $this -> __matrix_multiply ( $Tm, $CTM, $page_attributes [ 'width' ], $page_attributes [ 'height' ] ) ;
					$fragment	=  array
					   (
						'x'		=>  ( $r [4]  <  0 ) ?  0.0 : $r [4],
						'y'		=>  ( $r [5]  <  0 ) ?  0.0 : $r [5],
						'page'		=>  $page_number,
						'template'	=>  $current_template,
						'font'		=>  $current_font_name,
						'font-height'	=>  abs ( $current_font_height * $Tm [3] ),
						'text'		=>  $text,
					    ) ;

					// Add debug information when needed
					if  ( self::$DEBUG )
					   {
						$fragment	=  array_merge
						   (
							$fragment,
							array
							   (
								'CTM'			=>  $CTM,
								'Tm'			=>  $Tm,
								'New Tm'		=>  $r,
								'Real font height'	=>  $current_font_height,
								'Page width'		=>  $page_attributes [ 'width' ],
								'Page height'		=>  $page_attributes ['height' ]
							    )
						    ) ;
					    }

					// Add this text fragment to the list
					$page_fragments []	=  $fragment ;
					$page_fragment_count ++ ;

					$operand_stack		=  array ( ) ;
				    }
			    }
			// BT instruction
			else if  ( $token  ==  'BT' )
			   {
				$BT_nesting_level ++ ;
				$operand_stack					=  array ( ) ;
				$graphic_stack [ $graphic_stack_size ++ ]	=  array ( $CTM, $Tm ) ;

				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'BT' ] ++ ;
			    }
			// ET instruction
			else if  ( $token  ==  'ET' )
			   {
				if  ( $BT_nesting_level )
				   {
					$BT_nesting_level -- ;

					if  ( ! $BT_nesting_level  &&  $graphic_stack_size )
					   {
						list ( $CTM, $Tm )	=  $graphic_stack [ -- $graphic_stack_size ] ;
					    }

				    }

				$operand_stack		=  array ( ) ;
			    }
			// Template (substituted in __next_token)
			else if  ( $token_start  ===  '!' )
			   {
				if  ( preg_match ( '/ !PDFTOTEXT_TEMPLATE_ (?P<template> \w+) /ix', $token, $match ) )
				   {
					$name	=  '/' . $match [ 'template' ] ;
					$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'template' ] ++ ;

					if  ( $this -> PageMap -> IsValidXObjectName ( $name ) )
						$current_template	=  $name ;
				    }
				else
					$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'ignored' ] ++ ;

				$operand_stack	=  array ( ) ;
			    }
			// Other instructions
			else
			   {
				$operand_stack		=  array ( ) ;
				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'ignored' ] ++ ;
			    }

			// Update current index in instruction stream
			$data_index	=  $next_index ;
		    }
	    }


	// __matrix_multiply -
	//	Multiplies matrix $ma by $mb.
	//	PDF transformation matrices are 3x3 matrices containing the following values :
	//
	//		| sx rx 0 |
	//		| ry sy 0 |
	//		| tx ty 1 |
	//
	//	However, we do not care about the 3rd column, which is always hardcoded. Transformation
	//	matrices here are implemented 6-elements arrays :
	//
	//		[ sx, rx, ry, tx, ty ]
	private function  __matrix_multiply ( $ma, $mb, $page_width, $page_height )
	   {
		// Scaling text is only appropriate for rendering graphics ; in our case, we just have to render
		// basic text without any consideration about its width or height ; so adjust the sx/sy parameters
		// accordingly
		$scale_1x	=  ( $ma [0]  >  0 ) ?  1 : -1 ;
		$scale_1y	=  ( $ma [3]  >  0 ) ?  1 : -1 ;
		$scale_2x	=  ( $mb [0]  >  0 ) ?  1 : -1 ;
		$scale_2y	=  ( $mb [3]  >  0 ) ?  1 : -1 ;

		// Perform the matrix multiplication
		$r	=  array ( ) ;
		$r [0]	=  ( $scale_1x * $scale_2x ) + ( $ma [1] * $mb [2]   ) ;
		$r [1]	=  ( $scale_1x * $mb [1]   ) + ( $ma [1] * $scale_2y ) ;
		$r [2]	=  ( $scale_1y * $scale_2x ) + ( $scale_1y * $mb [2]   ) ;
		$r [3]	=  ( $scale_1y * $mb [1]   ) + ( $scale_1y* $scale_2y ) ;
		$r [4]	=  ( $ma [4]   * $scale_2x ) + ( $ma [5] * $mb [2]   ) + $mb [4] ;
		$r [5]	=  ( $ma [4]   * $mb [1]   ) + ( $ma [5] * $scale_2y ) + $mb [5] ;

		// Negative x/y values are expressed relative to the page width/height (???)
		if  ( $r [0]  <  0 )
			$r [4]	=  abs ( $r [4] ) ;//$page_width - $r [4] ;

		if  ( $r [3]  <  0 )
			$r [5]	=  abs ( $r [5] ) ; //$page_height - $r [5] ;

		return ( $r ) ;
	    }


	// __next_token_ex :
	//	Reviewed version of __next_token, adapted to ExtractTextWithLayout.
	//	Both functions will be unified when this one will be stabilized.
	private function  __next_token_ex ( $page_number, $data, $data_length, $index, &$token, &$next_index )
	   {
		// Skip spaces
		$count		=  0 ;

		while  ( $index  <  $data_length  &&  ( $data [ $index ]  ==  ' '  ||  $data [ $index ]  ==  "\t"  ||  $data [ $index ]  ==  "\r"  ||  $data [ $index ]  ==  "\n" ) )
		   {
			$index ++ ;
			$count ++ ;
		    }

		$enhanced_statistics	=  $this -> EnhancedStatistics ;
		$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ 'space' ]	+=  $count ;

		// End of input
		if  ( $index  >=  $data_length )
			return ( false ) ;

		// The current character will tell us what to do
		$ch 	=  $data [ $index ] ;

		switch ( $ch )
		   {
			// Opening square bracket : we have to find the closing one, taking care of escape sequences
			// that can also specify a square bracket, such as "\]"
		   	case 	"[" :
		   		$next_index 	=  $index + 1 ;
		   		$parent 	=  0 ;
		   		$angle 		=  0 ;
		   		$token		=  '[' ;

		   		while  ( $next_index  <  $data_length )
		   		   {
		   			$nch 	=  $data [ $next_index ++ ] ;

		   			switch  ( $nch )
		   			   {
		   			   	case 	'(' :
		   			   		$parent ++ ;
		   			   		$token 	.=  $nch ;
		   			   		break ;

		   			   	case 	')' :
		   			   		$parent -- ;
		   			   		$token 	.=  $nch ;
		   			   		break ;

		   			   	case 	'<' :
							// Although the array notation can contain hex digits between angle brackets, we have to
							// take care that we do not have an angle bracket between two parentheses such as :
							// [ (<) ... ]
							if  ( ! $parent )
		   			   			$angle ++ ;

		   			   		$token 	.=  $nch ;
		   			   		break ;

		   			   	case 	'>' :
							if  ( ! $parent )
		   			   			$angle -- ;

		   			   		$token 	.=  $nch ;
		   			   		break ;

		   			   	case 	'\\' :
		   					$token 	.=  $nch . $data [ $next_index ++ ] ;
		   					break ;

		   			   	case 	']' :
		   					$token 	.=  ']' ;

		   					if  ( ! $parent  )
		   						break  2 ;
		   					else
		   						break ;

						case	"\n" :
						case	"\r" :
							break ;

		   			   	default :
		   			   		$token 	.=  $nch ;
		   			    }
		   		    }

				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ '[' ] ++ ;

		   		return ( true ) ;

			// Parenthesis : Again, we have to find the closing parenthesis, taking care of escape sequences
			// such as "\)"
		   	case 	"(" :
		   		$next_index 	=  $index + 1 ;
		   		$token		=  '[' . $ch ;

		   		while  ( $next_index  <  $data_length )
		   		   {
		   			$nch 	=  $data [ $next_index ++ ] ;

		   			if  ( $nch  ===  '\\' )
					   {
						$after		 =  $data [ $next_index ] ;

						// Character references specified as \xyz, where "xyz" are octal digits
						if  ( $after  >=  '0'  &&  $after  <=  '7' )
						   {
							$token		.=  $nch ;

							while  ( $data [ $next_index ]  >=  '0'  &&  $data [ $next_index ]  <=  '7' )
								$token		.=  $data [ $next_index ++ ] ;
						    }
						// Regular character escapes
						else
		   					$token 	.=  $nch . $data [ $next_index ++ ] ;
					    }
		   			else if  ( $nch  ===  ')' )
		   			   {
		   				$token 	.=  ')' ;
		   				break ;
		   			    }
		   			else
		   				$token 	.=  $nch ;
		   		   }

				$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ '(' ] ++ ;
				$token	.=  ']' ;

		   		return ( true ) ;

			// A construction of the form : "<< something >>", or a unicode character
		   	case 	'<' :
				if  ( isset ( $data [ $index + 1 ] ) )
				   {
		   			if (  $data [ $index + 1 ]  ===  '<' )
		   			   {
		   		   		$next_index 	=  strpos ( $data, '>>', $index + 2 ) ;

		   				if  ( $next_index  ===  false )
		   					return ( false ) ;

						$token		=  substr ( $data, $index, $next_index - $index + 2 ) ;
						$next_index    +=  2 ;

						return ( true ) ;
		   			    }
		   			else
		   			   {
		   		   		$next_index 	=  strpos ( $data, '>', $index + 2 ) ;

		   				if  ( $next_index  ===  false )
		   					return ( false ) ;

						$enhanced_statistics  &&  $this -> Statistics [ 'Distributions' ] [ '<' ] ++ ;

						// There can be spaces and newlines inside a series of hex digits, so remove them...
						$result			=  preg_replace ( '/\s+/', '', substr ( $data, $index, $next_index - $index + 1 ) ) ;

						$token		=  "[$result]" ;
						$next_index ++ ;

		   				return ( true ) ;
		   			   }
				    }
				else
					return ( false ) ;

			// Tick character : consider it as a keyword, in the same way as the "TJ" or "Tj" keywords
		   	case 	"'" :
			case	'"' :
		   		$token		 =  $ch ;
				$next_index	+=  2 ;

				return ( true ) ;

			// Other cases : this may be either a floating-point number or a keyword
		   	default :
		   		$next_index	=  ++ $index ;
		   		$token 		=  $ch ;

				if  ( isset ( $data [ $next_index ] ) )
				   {
		   			if ( ( $ch  >=  '0'  &&  $ch  <=  '9' )  ||  $ch  ==  '-'  ||  $ch  ==  '+'  ||  $ch  ==  '.' )
		   			   {
		   				while  ( $next_index  <  $data_length  &&
		   						( ( $data [ $next_index ]  >=  '0'  &&  $data [ $next_index ]  <=  '9' )  ||
									$data [ $next_index ]  ===  '-'  ||  $data [ $next_index ]  ===  '+'  ||  $data [ $next_index ]  ===  '.' ) )
		   					$token 	.=  $data [ $next_index ++ ] ;
		   			    }
		   			else if  ( ( self::$CharacterClasses [ $ch ] & self::CTYPE_ALPHA )  ||
							$ch  ==  '/'  ||  $ch  ==  '!' )
		   			   {
						$ch	=  $data [ $next_index ] ;

						while  ( $next_index  <  $data_length  &&
							( ( self::$CharacterClasses [ $ch ] & self::CTYPE_ALNUM )  ||
								$ch  ==  '*'  ||  $ch  ==  '-'  ||  $ch  ==  '_'  ||  $ch  ==  '.'  ||  $ch  ==  '+' ) )
						   {
							$token 	.=  $ch ;
							$next_index ++ ;

							if  ( isset ( $data [ $next_index ] ) )
								$ch	=  $data [ $next_index ] ;
						    }
		   			    }
				    }

		   		return ( true ) ;
		    }
	    }


	// __decode_text -
	//	Text decoding function when the PDFOPT_BASIC_LAYOUT flag is specified.
	private function  __decode_text ( $data, $current_font, $current_font_mapped, $current_font_map_width )
	   {
		list ( $text_values, $offsets ) 	=  $this -> __extract_chars_from_array ( $data ) ;
		$value_index				=  0 ;
		$result					=  '' ;

		// Fonts having character maps will require some special processing
		if  ( $current_font_mapped )
		   {
			// Loop through each text value
			foreach  ( $text_values  as  $text )
			   {
				$is_hex 	=  ( $text [0]  ==  '<' ) ;
				$length 	=  strlen ( $text ) - 1 ;
				$handled	=  false ;

				// Characters are encoded within angle brackets ( "<>" ).
				// Note that several characters can be specified within the same angle brackets, so we have to take
				// into account the width we detected in the begincodespancerange construct
				if  ( $is_hex )
				   {
					for  ( $i = 1 ; $i  <  $length ; $i += $current_font_map_width )
					   {
						$value		 =  substr ( $text, $i, $current_font_map_width ) ;
						$ch 		 =  hexdec ( $value ) ;

						if  ( isset ( $this -> CharacterMapBuffer [ $current_font ] [ $ch ] ) )
							$newchar	=  $this -> CharacterMapBuffer [ $current_font ] [ $ch ] ;
						else
						   {
							$newchar	 =  $this -> FontTable -> MapCharacter ( $current_font, $ch ) ;
							$this -> CharacterMapBuffer [ $current_font ] [ $ch ]		=  $newchar ;
						    }

						$result		.=  $newchar ;
					    }

					$handled	 =  true ;
				    }
				// Yes ! double-byte codes can also be specified as plain text within parentheses !
				// However, we have to be really careful here ; the sequence :
				//	(Be)
				// can mean the string "Be" or the Unicode character 0x4265 ('B' = 0x42, 'e' = 0x65)
				// We first look if the character map contains an entry for Unicode codepoint 0x4265 ;
				// if not, then we have to consider that it is regular text to be taken one character by
				// one character. In this case, we fall back to the "if ( ! $handled )" condition
				else if  ( $current_font_map_width  ==  4  )
				   {
					$temp_result		=  '' ;

					for  ( $i = 1 ; $i  <  $length ; $i ++ )
					   {
						// Each character in the pair may be a backslash, which escapes the next character so we must skip it
						// This code needs to be reviewed ; the same code is duplicated to handle escaped characters in octal notation
						if  ( $text [$i]  !=  '\\' )
							$ch1	=  $text [$i] ;
						else
						   {
							$i ++ ;

							if  ( $text [$i]  <  '0'  ||  $text [$i]  >  '7' )
								$ch1	=  $this -> ProcessEscapedCharacter ( $text [$i] ) ;
							else
							   {
								$oct		=  '' ;
								$digit_count	=  0 ;

								while  ( $i  <  $length  &&  $text [$i]  >=  '0'  &&  $text [$i]  <=  '7'  &&  $digit_count  <  3 )
								   {
									$oct	.=  $text [$i ++] ;
									$digit_count ++ ;
								    }

								$ch1	=  chr ( octdec ( $oct ) ) ;
								$i -- ;
							    }
						    }

						$i ++ ;

						if  ( $text [$i]  != '\\' )
							$ch2	=  $text [$i] ;
						else
						   {
							$i ++ ;

							if  ( $text [$i]  <  '0'  ||  $text [$i]  >  '7' )
								$ch2	=  $this -> ProcessEscapedCharacter ( $text [$i] ) ;
							else
							   {
								$oct		=  '' ;
								$digit_count	=  0 ;

								while  ( $i  <  $length  &&  $text [$i]  >=  '0'  &&  $text [$i]  <=  '7'  &&  $digit_count  <  3 )
								   {
									$oct	.=  $text [$i ++] ;
									$digit_count ++ ;
								    }

								$ch2	=  chr ( octdec ( $oct ) ) ;
								$i -- ;
							    }
						    }

						// Build the 2-bytes character code
						$ch		=  ( ord ( $ch1 )  <<  8 )  |  ord ( $ch2 ) ;

						if  ( isset ( $this -> CharacterMapBuffer [ $current_font ] [ $ch ] ) )
							$newchar	=  $this -> CharacterMapBuffer [ $current_font ] [ $ch ] ;
						else
						   {
							$newchar	=  $this -> FontTable -> MapCharacter ( $current_font, $ch, true ) ;
							$this -> CharacterMapBuffer [ $current_font ] [ $ch ]		=  $newchar ;
						    }

						// Yes !!! for characters encoded with two bytes, we can find the following construct :
						//	0x00 "\" "(" 0x00 "C" 0x00 "a" 0x00 "r" 0x00 "\" ")"
						// which must be expanded as : (Car)
						// We have here the escape sequences "\(" and "\)", but the backslash is encoded on two bytes
						// (although the MSB is nul), while the escaped character is encoded on 1 byte. waiting
						// for the next quirk to happen...
						if  ( $newchar  ==  '\\' )
						   {
							$newchar		=  $this -> ProcessEscapedCharacter ( $text [ $i + 2 ] ) ;
							$i ++ ;		// this time we processed 3 bytes, not 2
						    }

						$temp_result		.=  $newchar ;
					    }

					// Happens only if we were unable to translate a character using the current character map
					$result		.=  $temp_result ;
					$handled	 =  true ;
				    }

				// Character strings within parentheses.
				// For every text value, use the character map table for substitutions
				if  ( ! $handled )
				   {
					for  ( $i = 1 ; $i  <  $length ; $i ++ )
					   {
						$ch 		=  $text [$i] ;

						// Set to true to optimize calls to MapCharacters
						// Currently does not work with [email protected]/infoma.pdf (a few characters differ)
						$use_map_buffer	=  false ;

						// ... but don't forget to handle escape sequences "\n" and "\r" for characters
						// 10 and 13
						if  ( $ch  ==  '\\' )
						   {
							$ch 	=  $text [++$i] ;

							// Escaped character
							if  ( $ch  <  '0'  ||  $ch  >  '7' )
								$ch		=  $this -> ProcessEscapedCharacter ( $ch ) ;
							// However, an octal form can also be specified ; in this case we have to take into account
							// the character width for the current font (if the character width is 4 hex digits, then we
							// will encounter constructs such as "\000\077").
							// The method used here is dirty : we build a regex to match octal character representations on a substring
							// of the text
							else
							   {
								$width		=  $current_font_map_width / 2 ;	// Convert to byte count
								$subtext	=  substr ( $text, $i - 1 ) ;
								$regex		=  "#^ (\\\\ [0-7]{3}){1,$width} #imsx" ;

								$status		=  preg_match ( $regex, $subtext, $octal_matches ) ;

								if  ( $status )
								   {
									$octal_values	=  explode ( '\\', substr ( $octal_matches [0], 1 ) ) ;
									$ord		=  0 ;

									foreach  ( $octal_values  as  $octal_value )
										$ord	=  ( $ord  <<  8 ) + octdec ( $octal_value ) ;

									$ch	 =  chr ( $ord ) ;
									$i	+=  strlen ( $octal_matches [0] ) - 2 ;
								    }
							    }

							$use_map_buffer		=  false ;
						    }

						// Add substituted character to the output result
						$ord		 =  ord ( $ch ) ;

						if  ( ! $use_map_buffer )
							$newchar	 =  $this -> FontTable -> MapCharacter ( $current_font, $ord ) ;
						else
						   {
							if  ( isset ( $this -> CharacterMapBuffer [ $current_font ] [ $ord ] ) )
								$newchar	=  $this -> CharacterMapBuffer [ $current_font ] [ $ord ] ;
							else
							   {
								$newchar	 =  $this -> FontTable -> MapCharacter ( $current_font, $ord ) ;
								$this -> CharacterMapBuffer [ $current_font ] [ $ord ]	=  $newchar ;
							    }
						    }

						$result		.=  $newchar ;
					    }
				    }

				// Handle offsets between blocks of characters
				if  ( isset ( $offsets [ $value_index ] )  &&
						- ( $offsets [ $value_index ] )  >  $this -> MinSpaceWidth )
					$result		.=  $this -> __get_character_padding ( $offsets [ $value_index ] ) ;

				$value_index ++ ;
			    }
		    }
		// For fonts having no associated character map, we simply encode the string in UTF8
		// after the C-like escape sequences have been processed
		// Note that <xxxx> constructs can be encountered here, so we have to process them as well
		else
		   {
			foreach  ( $text_values  as  $text )
			   {
				$is_hex 	=  ( $text [0]  ==  '<' ) ;
				$length 	=  strlen ( $text ) - 1 ;

				// Some text within parentheses may have a backslash followed by a newline, to indicate some continuation line.
				// Example :
				//	(this is a sentence \
				//	 continued on the next line)
				// Funny isn't it ? so remove such constructs because we don't care
				$text		=  str_replace ( array ( "\\\r\n", "\\\r", "\\\n" ), '', $text ) ;

				// Characters are encoded within angle brackets ( "<>" )
				if  ( $is_hex )
				   {
					for  ( $i = 1 ; $i  <  $length ; $i += 2 )
					   {
						$ch 	=  hexdec ( substr ( $text, $i, 2 ) ) ;

						$result .=  $this -> CodePointToUtf8 ( $ch ) ;
					    }
				    }
				// Characters are plain text
				else
				   {
					$text	=  self::Unescape ( $text ) ;

					for  ( $i = 1, $length = strlen ( $text ) - 1 ; $i  <  $length ; $i ++ )
					   {
						$ch	=  $text [$i] ;
						$ord	=  ord ( $ch ) ;

						if  ( $ord  <  127 )
							$newchar	=  $ch ;
						else
						   {
							if  ( isset ( $this -> CharacterMapBuffer [ $current_font ] [ $ord ] ) )
								$newchar	=  $this -> CharacterMapBuffer [ $current_font ] [ $ord ] ;
							else
							   {
								$newchar	=  $this -> FontTable -> MapCharacter ( $current_font, $ord ) ;
								$this -> CharacterMapBuffer [ $current_font ] [ $ord ]	=  $newchar ;
							    }
						    }

						$result		.=  $newchar ;
					    }
				    }

				// Handle offsets between blocks of characters
				if  ( isset ( $offsets [ $value_index ] )  &&
						abs ( $offsets [ $value_index ] )  >  $this -> MinSpaceWidth )
					$result		.=  $this -> __get_character_padding ( $offsets [ $value_index ] ) ;

				$value_index ++ ;
			   }
		    }

		// All done, return
		return ( $result ) ;
	    }


	// __assemble_text_fragments -
	//	Assembles text fragments collected by the ExtractTextWithLayout function.
	private function  __assemble_text_fragments ( $page_number, &$fragments, &$page_width, &$page_height )
	   {
		$fragment_count		=  count ( $fragments ) ;

		// No fragment no cry...
		if  ( ! $fragment_count )
			return ( '' ) ;

		// Compute the width of each fragment
		foreach  ( $fragments  as  &$fragment )
			$this -> __compute_fragment_width ( $fragment ) ;

		// Sort the fragments and group them by line
		usort ( $fragments, array ( $this, '__sort_page_fragments' ) ) ;
		$line_fragments		=  $this -> __group_line_fragments ( $fragments ) ;

		// Retrieve the page attributes
		$page_attributes	=  $this -> PageMap -> PageAttributes [ $page_number ] ;

		// Some buggy PDF do not specify page width or page height so, during the processing of text fragments,
		// page width & height will be set to the largest x/y coordinate
		if  ( isset ( $page_attributes [ 'width' ] )  &&  $page_attributes [ 'width' ] )
			$page_width		=  $page_attributes [ 'width' ] ;
		else
		   {
			$page_width		=  0 ;

			foreach  ( $fragments  as  $fragment )
			   {
				$end_x	=  $fragment [ 'x' ] + $fragment [ 'width' ] ;

				if  ( $end_x  >  $page_width )
					$page_width	=  $end_x ;
			    }
		    }

		if  ( isset ( $page_attributes [ 'height' ] )  &&  $page_attributes [ 'height' ] )
			$page_height		=  $page_attributes [ 'height' ] ;
		else
			$page_height		=  $fragments [0] [ 'y' ] ;

		// Block separator
		$separator			=  ( $this -> BlockSeparator ) ?  $this -> BlockSeparator : ' ' ;

		// Unprocessed marker count
		$unprocessed_marker_count	=  count ( $this -> UnprocessedMarkerList [ 'font' ] ) ;

		// Add page information if the PDFOPT_DEBUG_SHOW_COORDINATES option has been specified
		if  ( $this -> Options  &  self::PDFOPT_DEBUG_SHOW_COORDINATES )
			$result		=  "[Page : $page_number, width = $page_width, height = $page_height]" . $this -> EOL ;
		else
			$result			=  '' ;

		// Loop through each line of fragments
		for  ( $i = 0, $line_count = count ( $line_fragments ) ; $i  <  $line_count ; $i ++ )
		   {
			$current_x	=  0 ;

			// Loop through each fragment of the current line
			for  ( $j = 0, $fragment_count = count ( $line_fragments [$i] ) ; $j  <  $fragment_count ; $j ++ )
			   {
				$fragment	=  $line_fragments [$i] [$j] ;

				// Process the markers which do not have an associated font yet - this will be done by matching
				// the current text fragment against one of the regular expressions defined.
				// If a match occurs, then all the subsequent text fragment using the same font will be put markers
				for  ( $k = 0 ; $k  <  $unprocessed_marker_count ; $k ++ )
					{
					$marker			=  $this -> UnprocessedMarkerList [ 'font' ] [$k] ;

					if  ( preg_match ( $marker [ 'regex' ], $fragment [ 'text' ] ) )
					   {
						$this -> TextWithFontMarkers [ $fragment [ 'font' ] ]	=  array
							(
							'font'		=>  $fragment [ 'font'  ],
							'height'	=>  $fragment [ 'font-height' ],
							'regex'		=>  $marker   [ 'regex' ],
							'start'		=>  $marker   [ 'start' ],
							'end'		=>  $marker   [ 'end'   ]
							) ;

						$unprocessed_marker_count -- ;
						unset ( $this -> UnprocessedMarkerList [ 'font' ] [$k] ) ;

						break ;
					    }
					}

				// Add debug info if needed
				if  ( $this -> Options & self::PDFOPT_DEBUG_SHOW_COORDINATES )
					$result		.=  $this -> __debug_get_coordinates ( $fragment ) ;

				// Add a separator between two fragments, if needed
				if  ( $j )
				   {
					if  ( $current_x  <  floor ( $fragment [ 'x' ] ) )	// Accept small rounding errors
						$result		.=  $separator ;
				    }

				// Check if we need to add markers around this text fragment
				if  ( isset ( $this -> TextWithFontMarkers [ $fragment [ 'font' ] ] )  &&
						$this -> TextWithFontMarkers [ $fragment [ 'font' ] ] [ 'height' ]  ==  $fragment [ 'font-height' ] )
				    {
					$fragment_text	=  $this -> TextWithFontMarkers [ $fragment [ 'font' ] ] [ 'start' ] .
							   $fragment [ 'text' ] .
							   $this -> TextWithFontMarkers [ $fragment [ 'font' ] ] [ 'end' ] ;
				     }
				else
					$fragment_text  =  $fragment [ 'text' ] ;

				// Add the current fragment to the result
				$result		.=  $fragment_text ;

				// Update current x-position
				$current_x	=  $fragment [ 'x' ] + $fragment [ 'width' ] ;
			    }

			// Add a line break between each line
			$result		.=  $this -> EOL ;
		    }

		// All done, return
		return ( $result ) ;
	    }


	// __sort_page_fragments -
	//	Sorts page fragments by their (y,x) coordinates.
	public function  __sort_page_fragments ( $a, $b )
	   {
		$xa			=  $a [ 'x' ] ;
		$ya			=  $a [ 'y' ] ;
		$xb			=  $b [ 'x' ] ;
		$yb			=  $b [ 'y' ] ;

		if  ( $ya  !==  $yb )
			return ( $yb - $ya ) ;
		else
			return ( $xa - $xb ) ;
	    }


	// __sort_line_fragments -
	//	Sorts fragments per line.
	public function  __sort_line_fragments ( $a, $b )
	   {
		return ( $a [ 'x' ] - $b [ 'x' ] ) ;
	    }


	// __group_line_fragments -
	//	Groups page fragments per line, allowing a certain variation in the y-position.
	private function  __group_line_fragments ( $fragments )
	   {
		$result			=  array ( ) ;
		$fragment_count		=  count ( $fragments ) ;
		$last_y_coordinate	=  $fragments [0] [ 'y' ] ;
		$current_fragments	=  array ( $fragments [0] ) ;

		for  ( $i = 1 ; $i  <  $fragment_count ; $i ++ )
		   {
			$fragment	=  $fragments [$i] ;

			if  ( $fragment [ 'y' ] + $fragment [ 'font-height' ]  >=  $last_y_coordinate )
				$current_fragments []	=  $fragment ;
			else
			   {
				$last_y_coordinate	=  $fragment [ 'y' ] ;
				usort ( $current_fragments, array ( $this, '__sort_line_fragments' ) ) ;
				$result	[]		=  $current_fragments ;
				$current_fragments	=  array ( $fragment ) ;
			    }
		    }

		if  ( count ( $current_fragments ) )
		   {
			usort ( $current_fragments, array ( $this, '__sort_line_fragments' ) ) ;
			$result []	=  $current_fragments ;
		    }

		return ( $result ) ;
	    }


	// __compute_fragment_width -
	//	Compute the width of the specified text fragment and add the width entry accordingly.
	//	Returns the font object associated with this fragment
	private function  __compute_fragment_width ( &$fragment )
	   {
		// To avoid repeated calls to the PdfTexterFontTable::GetFontObject() method, we are buffering them in the FontObjectsBuffer property.
		$object_reference	=  $fragment [ 'page' ] . ':' . $fragment [ 'template' ] . ':' . $fragment [ 'font' ] ;

		if  ( isset ( $this -> FontObjectsBuffer [ $object_reference ] ) )
			$font_object	=  $this -> FontObjectsBuffer [ $object_reference ] ;
		else
		   {
			$font_object		=  $this -> FontTable -> GetFontObject ( $fragment [ 'page' ], $fragment [ 'template' ], $fragment [ 'font' ] ) ;
			$this -> FontObjectsBuffer [ $object_reference ]	=  $font_object ;
		    }

		// The width of the previous text fragment will be computed only if its associated font contains character widths information
		$fragment [ 'width' ]		=  ( $font_object ) ?  $font_object -> GetStringWidth ( $fragment [ 'text' ], $this -> ExtraTextWidth ) : 0 ;

		// Return the font object
		return ( $font_object ) ;
	    }


	// __debug_get_coordinates -
	//	Returns the coordinates of the specified text fragment, in debug mode.
	private function  __debug_get_coordinates ( $fragment )
	   {
		return ( "\n[x:" . round ( $fragment [ 'x' ], 3 ) . ', y:' . round ( $fragment [ 'y' ], 3 ) .
			 ", w: " . round ( $fragment [ 'width' ], 3 ) . ", h:" . round ( $fragment [ 'font-height' ], 3 ) . ", font:" . $fragment [ 'font' ] . "]" ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetTrailerInformation - Retrieves trailer information.

	    PROTOTYPE
	        $this -> GetTrailerInformation ( $contents ) ;

	    DESCRIPTION
	        Retrieves trailer information :
		- Unique file ID
		- Id of the object containing encryption data, if the PDF file is encrypted
		- Encryption data

	    PARAMETERS
	        $contents (string) -
	                PDF file contents.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetTrailerInformation ( $contents, $pdf_objects )
	   {
		// Be paranoid : check if there is trailer information
		if  ( ! preg_match ( '/trailer \s* << (?P<trailer> .+?) >>/imsx', $contents, $trailer_match ) )
			return ;

		$trailer_data	=  $trailer_match [ 'trailer' ] ;

		// Get the unique file id from the trailer data
		static		$id_regex	=  '#
							/ID \s* \[ \s*
							< (?P<id1> [^>]+) >
							\s*
							< (?P<id2> [^>]+) >
							\s* \]
						    #imsx' ;

		if  ( preg_match ( $id_regex, $trailer_data, $id_match ) )
		   {
			$this -> ID	=  $id_match [ 'id1' ] ;
			$this -> ID2	=  $id_match [ 'id2' ] ;
		    }

		// If there is an object describing encryption data, get its number (/Encrypt flag)
		if (  ! preg_match ( '#/Encrypt \s+ (?P<object> \d+)#ix', $trailer_data, $encrypt_match ) )
			return ;

		$encrypt_object_id	=  $encrypt_match [ 'object' ] ;

		if  ( ! isset ( $pdf_objects [ $encrypt_object_id ] ) )
		   {
			if  ( self::$DEBUG )
				error ( new PdfToTextDecodingException ( "Object #$encrypt_object_id, which should contain encryption data, is missing." ) ) ;

			return ;
		    }

		// Parse encryption information
		$this -> EncryptionData		=  PdfEncryptionData::GetInstance ( $this -> ID, $encrypt_object_id, $pdf_objects [ $encrypt_object_id ] ) ;
		$this -> IsEncrypted		=  ( $this -> EncryptionData  !==  false ) ;
	    }


	// __build_ignored_instructions :
	//	Takes the template regular expressions from the self::$IgnoredInstructionsTemplates, replace each string with the contents
	//	of the self::$ReplacementConstructs array, and sets the self::$IgnoredInstructions to a regular expression that is able to
	//	match the Postscript instructions to be removed from any text stream.
	private function  __build_ignored_instructions ( )
	   {
		$searches	=  array_keys   ( self::$ReplacementConstructs ) ;
		$replacements	=  array_values ( self::$ReplacementConstructs ) ;

		foreach  ( self::$IgnoredInstructionTemplatesLayout  as  $template )
		   {
			$template	=  '/' . str_replace ( $searches, $replacements, $template ) . '/msx' ;

			self::$IgnoredInstructionsLayout []	=  $template ;
			self::$IgnoredInstructionsNoLayout []	=  $template ;
		    }

		foreach  ( self::$IgnoredInstructionTemplatesNoLayout  as  $template )
		   {
			$template	=  '/' . str_replace ( $searches, $replacements, $template ) . '/msx' ;

			self::$IgnoredInstructionsNoLayout []	=  $template ;
		    }
	    }


	// __convert_utf16 :
	//	Some strings found in a pdf file can be encoded in UTF16 (author information, for example).
	//	When this is the case, the string is converted to UTF8.
	private function  __convert_utf16 ( $text )
	   {
		if  ( isset ( $text [0] )  &&  isset ( $text [1] ) )
		   {
			$b1	=  ord ( $text [0] ) ;
			$b2	=  ord ( $text [1] ) ;

			if  ( ( $b1  ==  0xFE  &&  $b2  ==  0xFF )  ||  ( $b1  ==  0xFF  &&  $b2  ==  0xFE ) )
				$text	=  mb_convert_encoding ( $text, 'UTF-8', 'UTF-16' ) ;
		    }

		return ( $text ) ;
	    }


	// __extract_chars_from_array -
	//	Extracts characters enclosed either within parentheses (character codes) or angle brackets (hex value)
	//	from an array.
	//	Example :
	//
	//		[<0D>-40<02>-36<03>-39<0E>-36<0F>-36<0B>-37<10>-37<10>-35(abc)]
	//
	// 	will return an array having the following entries :
	//
	//		<0D>, <02>, <03>, <0E>, <0F>, <0B>, <10>, <10>, (abc)
	private function  __extract_chars_from_array ( $array )
	   {
		$length 	=  strlen ( $array ) - 1 ;
		$result 	=  array ( ) ;
		$offsets	=  array ( ) ;

		for  ( $i = 1 ; $i  <  $length ; $i ++ )	// Start with character right after the opening bracket
		   {
		   	$ch 	=  $array [$i] ;

			if  ( $ch  ==  '(' )
				$endch 	=  ')' ;
			else if  ( $ch  ==  '<' )
				$endch 	=  '>' ;
			else
			   {
				$value	=  '' ;

				while  ( $i  <  $length  &&  ( ( $array [$i]  >=  '0'  &&  $array [$i]  <=  '9' )  ||
						$array [$i]  ==  '-'  ||  $array [$i]  ==  '+'  ||  $array [$i]  ==  '.' ) )
					$value	.=  $array [$i++] ;

				$offsets []	=  ( double ) $value ;

				if  ( $value  !==  '' )
					$i -- ;

				continue ;
			    }

			$char 	=  $ch ;
			$i ++ ;

			while  ( $i  <  $length  &&  $array [$i]  !=  $endch )
			   {
			   	if  ( $array [$i]  ==  '\\' )
			   		$char 	.=  '\\' . $array [++$i] ;
				else
				   {
					$char 	.=  $array [$i] ;

					if  ( $array [$i]  ==  $endch )
						break ;
				    }

				$i ++ ;
			   }

			$result [] 	 =  $char . $endch ;
		    }

		return ( array ( $result, $offsets ) ) ;
	    }


	// __extract_chars_from_block -
	//	Extracts characters from a text block (enclosed in parentheses).
	//	Returns an array of character ordinals if the $as_array parameter is true, or a string if false.
	private function  __extract_chars_from_block ( $text, $start_index = false, $length = false, $as_array = false )
	   {
		if  ( $as_array )
			$result		=  array ( ) ;
		else
			$result		=  '' ;

		if  ( $start_index  ===  false )
			$start_index	=  0 ;

		if  ( $length  ===  false )
			$length		=  strlen ( $text ) ;

		$ord0	=  ord ( '0' ) ;

		for  ( $i = $start_index ; $i  <  $length ; $i ++ )
		   {
			$ch	=  $text [$i] ;

			if  ( $ch  ==  '\\' )
			   {
				if  ( isset ( $text [ $i + 1 ] ) )
				   {
					$ch2	=  $text [ ++$i ] ;

					switch  ( $ch2 )
					   {
						case  'n' :  $ch =  "\n" ; break ;
						case  'r' :  $ch =  "\r" ; break ;
						case  't' :  $ch =  "\t" ; break ;
						case  'f' :  $ch =  "\f" ; break ;
						case  'v' :  $ch =  "\v" ; break ;

						default :
							if  ( $ch2  >=  '0'  &&  $ch2  <=  '7' )
							   {
								$ord	=  $ch2 - $ord0 ;
								$i ++ ;

								while  ( isset ( $text [$i] )  &&  $text [$i]  >=  '0'  &&  $text [$i]  <=  '7' )
								   {
									$ord	=  ( $ord * 8 ) + ord ( $text [$i] ) - $ord0 ;
									$i ++ ;
								    }

								$ch	=  chr ( $ord ) ;
								$i -- ;
							    }
							else
								$ch	=  $ch2 ;

					    }
				    }
			    }

			if  ( $as_array )
				$result []	 =  ord ( $ch ) ;
			else
				$result		.=  $ch ;
		    }

		return ( $result ) ;
	    }


	// __get_character_padding :
	//	If the offset specified between two character groups in an array notation for displaying text is less
	//	than -MinSpaceWidth thousands of text units,
	private function  __get_character_padding ( $char_offset )
	   {
		if  ( $char_offset  <=  - $this -> MinSpaceWidth )
		   {
			if  ( $this -> Options & self::PDFOPT_REPEAT_SEPARATOR )
			   {
				// If the MinSpaceWidth property is less than 1000 (text units), consider it has the value 1000
				// so that an exuberant number of spaces will not be repeated
				$space_width	=  ( $this -> MinSpaceWidth  <  1000 ) ?  1000 :  $this -> MinSpaceWidth ;

				$repeat_count	=  abs ( round ( $char_offset / $space_width, 0 ) ) ;

				if  ( $repeat_count )
					$padding	=  str_repeat ( $this -> Separator, $repeat_count ) ;
				else
					$padding	=  $this -> Separator ;
				}
			else
				$padding	=  $this -> Separator ;

			return ( utf8_encode ( self::Unescape ( $padding ) ) ) ;
		    }
		else
			return ( '' ) ;
	    }


	// __get_output_image_filename -
	//	Returns a real filename based on a template supplied by the AutoSaveImageFileTemplate property.
	private function  __get_output_image_filename ( )
	   {
		static		$suffixes	=  array
		   (
			IMG_JPEG		=>  'jpg',
			IMG_JPG			=>  'jpg',
			IMG_GIF			=>  'gif',
			IMG_PNG			=>  'png',
			IMG_WBMP		=>  'wbmp',
			IMG_XPM			=>  'xpm'
		    ) ;

		$template	=  $this -> ImageAutoSaveFileTemplate ;
		$length		=  strlen ( $template ) ;
		$parts		=  pathinfo ( $this -> Filename ) ;

		if  ( ! isset ( $parts [ 'filename' ] ) )	// for PHP versions < 5.2
		   {
			$index		=  strpos ( $parts [ 'basename' ], '.' ) ;

			if  ( $index  ===  false )
				$parts [ 'filename' ]	=  $parts [ 'basename' ] ;
			else
				$parts [ 'filename' ]	=  substr ( $parts [ 'basename' ], $index ) ;
		    }

		$searches	=  array ( ) ;
		$replacements	=  array ( ) ;

		// Search for each construct starting with '%'
		for  ( $i = 0 ; $i  <  $length ; $i ++ )
		   {
			if  ( $template [$i]  !=  '%'  ||  $i + 1  >=  $length )
				continue ;

			$ch	=  $template [ ++ $i ] ;

			// Percent sign found : check the character after
			switch  ( $ch )
			   {
				// "%%" : Replace it with a single percent
				case	'%' :
					$searches []		=  '%%' ;
					$replacements []	=  '%' ;
					break ;

				// "%p" : Path of the original PDF file
				case	'p' :
					$searches []		=  '%p' ;
					$replacements []	=  $parts [ 'dirname' ] ;
					break ;

				// "%f" : Filename part of the original PDF file, without its suffix
				case	'f' :
					$searches []		=  '%f' ;
					$replacements []	=  $parts [ 'filename' ] ;
					break ;

				// "%s" : Output image file suffix, determined by the ImageAutoSaveFormat property
				case	's' :
					if  ( isset ( $suffixes [ $this -> ImageAutoSaveFormat ] ) )
					   {
						$searches []		=  '%s' ;
						$replacements []	=  $suffixes [ $this -> ImageAutoSaveFormat ] ;
					    }
					else
					   {
						$searches []		=  '%s' ;
						$replacements []	=  'unknown' ;
					    }

					break ;

				// Other : may be either "%d", or "%xd", where "x" are digits expression the width of the final sequential index
				default :
					$width	=  0 ;
					$chars	=  '' ;

					if  ( ctype_digit ( $ch ) )
					   {
						do
						   {
							$width	 =  ( $width * 10 ) + ord ( $ch ) - ord ( '0' ) ;
							$chars  .=  $ch ;
							$i ++ ;
						    }  while  ( $i  <  $length  &&  ctype_digit ( $ch = $template [$i] ) ) ;

						if  ( $template [$i]  ==  'd' )
						   {
							$searches []		=  '%' . $chars . 'd' ;
							$replacements []	=  sprintf ( "%0{$width}d", $this -> ImageCount ) ;
						    }
					    }
					else
					   {
						$searches []		=  '%d' ;
						$replacements []	=  $this -> ImageCount ;
					    }
			    }
		    }

		// Perform the replacements
		if  ( count ( $searches ) )
			$result		=  str_replace ( $searches, $replacements, $template ) ;
		else
			$result		=  $template ;

		// All done, return
		return ( $result ) ;
	    }


	// __rtl_process -
	//	Processes the contents of a page when it contains characters belonging to an RTL language.
	private function  __rtl_process ( $text )
	   {
		$length		=  strlen ( $text ) ;
		$pos		=  strcspn ( $text, self::$RtlCharacterPrefixes ) ;

		// The text does not contain any of the UTF-8 prefixes that may introduce RTL contents :
		// simply return it as is
		if   ( $pos  ==  $length  ||  $text [$pos]  ===  "\x00" )
			return ( $text ) ;

		// Extract each individual line, and get rid of carriage returns if any
		$lines		=  explode ( "\n", str_replace ( "\r", '', $text ) ) ;
		$new_lines	=  array ( ) ;

		// Loop through lines
		foreach  ( $lines  as  $line )
		   {
			// Check if the current line contains potential RTL characters
			$pos		=  strcspn ( $line, self::$RtlCharacterPrefixes ) ;
			$length		=  strlen ( $line ) ;

			// If not, simply store it as is
			if  ( $pos  ==  $length )
			   {
				$new_lines []	=  $line ;
				continue ;
			    }

			// Otherwise, it gets a little bit more complicated ; we have :
			// - To process each series of RTL characters and put them in reverse order
			// - Mark spaces and punctuation as "RTL separators", without reversing them (ie, a string like " ." remains " .", not ". ")
			// - Other sequences of non-RTL characters must be preserved as is and are not subject to reordering
			// The reordering sequence will be described later. For the moment, the $words array is used to store arrays of two elements :
			// - The first one is a boolean indicating whether it concerns RTL characters (true) or not (false)
			// - The second one is the string itself
			$words		=  array ( ) ;

			// Start of the string is not an RTL sequence ; we can add it to our $words array
			if  ( $pos )
			   {
				$word		=  substr ( $line, 0, $pos ) ;
				$words []	=  array ( $this -> __is_rtl_separator ( $word ), $word ) ;
			    }

			$in_rtl		=  true ;

			// Loop through remaining characters of the current line
			while  ( $pos  <  $length )
			   {
				// Character at the current position may be RTL character
				if  ( $in_rtl )
				   {

					$rtl_text		=  '' ;
					$rtl_char		=  '' ;
					$rtl_char_length	=  0 ;
					$found_rtl		=  false ;

					// Collect all the consecutive RTL characters, which represent a word, and put the letters in reverse order
					while  ( $pos  <  $length  &&  $this -> __is_rtl_character ( $line, $pos, $rtl_char, $rtl_char_length ) )
					   {
						$rtl_text	 =  $rtl_char . $rtl_text ;
						$pos		+=  $rtl_char_length ;
						$found_rtl	 =  true ;
					    }

					// ... but make sure that we found a valid RTL sequence
					if  ( $found_rtl )
						$words []	 =  array ( true, $rtl_text ) ;
					else
						$words []	 =  array ( false, $line [ $pos ++ ] ) ;

					// For now, we are no more in a series of RTL characters
					$in_rtl		=  false ;
				    }
				// Non-RTL characters : collect them until either the end of the current line or the next RTL character
				else
				   {
					$next_pos	=  $pos + strcspn ( $line, self::$RtlCharacterPrefixes, $pos ) ;

					if  ( $next_pos  >=  $length )
					   {
						$word		=  substr ( $line, $pos ) ;
						break ;
					    }
					else
					   {
						$word		=  substr ( $line, $pos, $next_pos - $pos ) ;
						$pos		=  $next_pos ;
						$in_rtl		=  true ;
					    }

					// Don't forget to make the distinction between a sequence of spaces and punctuations, and a real
					// piece of text. Space/punctuation strings surrounded by RTL words will be interverted
					$words []		=  array ( $this -> __is_rtl_separator ( $word ), $word ) ;
				    }
			    }

			// Now we have an array, $words, whose first entry of each element indicates whether the second entry is an RTL string
			// or not (this includes strings that contain only spaces and punctuation).
			// We have to gather all the consecutive array items whose first entry is true, then invert their order.
			// Non-RTL strings are not affected by this process.
			$stacked_rtl_words	=  array ( ) ;
			$new_words		=  array ( ) ;

			foreach  ( $words  as  $word )
			   {
				// RTL word : put it onto the stack
				if  ( $word [0] )
					$stacked_rtl_words []	=  $word [1] ;
				// Non-RTL word : add it as is to the output array, $new_words
				else
				   {
					// But if RTL words were stacked before, invert them and add them to the output array
					if  ( count ( $stacked_rtl_words ) )
					   {
						$new_words		=  array_merge ( $new_words, array_reverse ( $stacked_rtl_words ) ) ;
						$stacked_rtl_words	=  array ( ) ;
					    }

					$new_words []	=  $word [1] ;
				    }
			    }

			// Process any remaining RTL words that may have been stacked and not yet processed
			if  ( count ( $stacked_rtl_words ) )
				$new_words		=  array_merge ( $new_words, array_reverse ( $stacked_rtl_words ) ) ;

			// That's ok, we have processed one more line
			$new_lines []	=  implode ( '', $new_words ) ;
		    }

		// All done, return a catenation of all the lines processed so far
		$result		=  implode ( "\n", $new_lines ) ;

		return ( $result ) ;
	    }


	// __is_rtl_character -
	//	Checks if the sequence starting at $pos in string $text is a character belonging to an RTL language.
	//	If yes, returns true and sets $rtl_char to the UTF8 string sequence for that character, and $rtl_char_length
	//	to the length of this string.
	//	If no, returns false.
	private function  __is_rtl_character ( $text, $pos, &$rtl_char, &$rtl_char_length )
	   {
		$ch	=  $text [ $pos ] ;

		// Check that the current character is the start of a potential UTF8 RTL sequence
		if  ( isset  ( self::$RtlCharacterPrefixLengths [ $ch ] ) )
		   {
			// Get the number of characters that are expected after the sequence
			$length_after	=  self::$RtlCharacterPrefixLengths [ $ch ] ;

			// Get the sequence after the UTF8 prefix
			$codes_after	=  substr ( $text, $pos + 1, $length_after ) ;

			// Search through $RtlCharacters, which contains arrays of ranges related to the UTF8 character prefix
			foreach  ( self::$RtlCharacters [ $ch ]  as  $range )
			   {
				if  ( strcmp ( $range [0], $codes_after )  <=  0  &&
				      strcmp ( $range [1], $codes_after )  >=  0 )
				   {
					$rtl_char		=  $ch . $codes_after ;
					$rtl_char_length	=  $length_after + 1 ;

					return ( true ) ;
				}
			    }

			return ( false ) ;
		    }
		else
			return ( false ) ;
	    }


	// __is_rtl_separator -
	//	RTL words are separated by spaces and punctuation signs that are specified as LTR characters.
	//	However, such sequences, which are separators between words, must be considered as being part
	//	of an RTL sequence of words and therefore be reversed with them.
	//	This function helps to determine if the supplied string is simply a sequence of spaces and
	//	punctuation (a word separator) or plain text, that must keep its position in the line.
	private function  __is_rtl_separator ( $text )
	   {
		static		$known_separators	=  array ( ) ;
		static		$separators		=  " \t,.;:/!-_=+" ;

		if  ( isset ( $known_separators [ $text ] ) )
			return ( true ) ;

		for  ( $i = 0, $length = strlen ( $text ) ; $i  <  $length ; $i ++ )
		   {
			if  ( strpos ( $separators, $text [$i] )  ===  false )
				return ( false ) ;
		    }

		$known_separators [ $text ]	=  true ;

		return ( true ) ;
	    }


	// __strip_useless_instructions :
	//	Removes from a text stream all the Postscript instructions that are not meaningful for text extraction
	//	(these are mainly shape drawing instructions).
	private function  __strip_useless_instructions ( $data )
	   {
		$result		=  preg_replace ( $this -> IgnoredInstructions, ' ', $data ) ;

		$this -> Statistics [ 'TextSize' ]		+=  strlen ( $data ) ;
		$this -> Statistics [ 'OptimizedTextSize' ]	+=  strlen ( $result ) ;

		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        IsPageSelected - Checks if a page is selected for output.

	    PROTOTYPE
	        $status		=  $this -> IsPageSelected ( $page ) ;

	    DESCRIPTION
	        Checks if the specified page is to be selected for output.

	    PARAMETERS
	        $page (integer) -
	                Page to be checked.

	    RETURN VALUE
	        True if the page is to be selected for output, false otherwise.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  IsPageSelected ( $page )
	   {
		if  ( ! $this -> MaxSelectedPages )
			return ( true ) ;

		if  ( $this -> MaxSelectedPages  >  0 )
			return  ( $page  <=  $this -> MaxSelectedPages ) ;

		// MaxSelectedPages  <  0
		return ( $page  >  count ( $this -> PageMap -> Pages ) + $this -> MaxSelectedPages ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        PeekAuthorInformation - Gets author information from the specified object data.

	    PROTOTYPE
	        $this -> PeekAuthorInformation ( $object_id, $object_data ) ;

	    DESCRIPTION
	        Try to check if the specified object data contains author information (ie, the /Author, /Creator,
		/Producer, /ModDate, /CreationDate keywords) and sets the corresponding properties accordingly.

	    PARAMETERS
	    	$object_id (integer) -
	    		Object id of this text block.

	    	$object_data (string) -
	    		Stream contents.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  PeekAuthorInformation ( $object_id, $object_data )
	   {
		if  ( ( strpos  ( $object_data, '/Author' )  !==  false  ||  strpos ( $object_data, '/CreationDate' )  !==  false ) )
		   {
			$this -> GotAuthorInformation	=  true ;
			return ( $object_id ) ;
		    }
		else
			return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        RetrieveAuthorInformation - Extracts author information

	    PROTOTYPE
	        $this -> RetriveAuthorInformation ( $object_id, $pdf_objects ) ;

	    DESCRIPTION
	        Extracts the author information. Handles the case where flag values refer to existing objects.

	    PARAMETERS
	        $object_id (integer) -
	                Id of the object containing the author information.

		$pdf_objects (array) -
			Array whose keys are the PDF object ids, and values their corresponding contents.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  RetrieveAuthorInformation ( $object_id, $pdf_objects )
	   {
		static		$re		=  '#
							(?P<info>
								/
								(?P<keyword> (Author) | (Creator) | (Producer) | (Title) | (CreationDate) | (ModDate) | (Keywords) | (Subject) )
								\s*
								(?P<opening> [(<])
							)
						    #imsx' ;
		static		$object_re	=  '#
							(?P<info>
								/
								(?P<keyword> (Author) | (Creator) | (Producer) | (Title) | (CreationDate) | (ModDate) | (Keywords) | (Subject) )
								\s*
								(?P<object_ref>
									(?P<object> \d+)
									\s+
									\d+
									\s+
									R
								 )
							)
						    #imsx' ;

		// Retrieve the object data corresponding to the specified object id
		$object_data	=  $pdf_objects [ $object_id ] ;

		// Pre-process flags whose values refer to existing objects
		if  ( preg_match_all ( $object_re, $object_data, $object_matches ) )
		   {
			$searches		=  array ( ) ;
			$replacements		=  array ( ) ;

			for  ( $i = 0, $count = count ( $object_matches [ 'keyword' ] ) ; $i  <  $count ; $i ++ )
			   {
				$searches []		=  $object_matches [ 'object_ref' ] [$i] ;

				// Some buggy PDF may reference author information objects that do not exist
				$replacements []	=  isset ( $pdf_objects [ $object_matches [ 'object' ] [$i] ] ) ?
								trim ( $pdf_objects [ $object_matches [ 'object' ] [$i] ] ) : '' ;
			    }

			$object_data	=  str_replace ( $searches, $replacements, $object_data ) ;
		    }


		// To execute faster, run the regular expression only if the object data contains a /Author keyword
		if  ( preg_match_all ( $re, $object_data, $matches, PREG_OFFSET_CAPTURE ) )
		   {
			for  ( $i = 0, $count = count ( $matches [ 'keyword' ] ) ; $i  <  $count ; $i ++ )
			   {
				$keyword	=  $matches [ 'keyword' ] [$i] [0] ;
				$opening	=  $matches [ 'opening' ] [$i] [0] ;
				$start_index	=  $matches [ 'info'    ] [$i] [1] + strlen ( $matches [ 'info' ] [$i] [0] ) ;

				// Text between parentheses : the text is written as is
				if  ( $opening  ==  '(' )
				   {
					$parent_level	=  1 ;

					// Since the parameter value can contain any character, including "\" or "(", we will have to find the real closing
					// parenthesis
					$value		=  '' ;

					for  ( $j = $start_index, $object_length = strlen ( $object_data ) ; $j  <  $object_length ; $j ++ )
					   {
						if  ( $object_data [$j]  ==  '\\' )
							$value	.=  '\\' . $object_data [++$j] ;
						else if  ( $object_data [$j]  ==  '(' )
						   {
							$value	.=  '(' ;
							$parent_level ++ ;
						    }
						else if  ( $object_data [$j]  ==  ')' )
						   {
							$parent_level -- ;

							if  ( ! $parent_level )
								break ;
							else
								$value	.=  ')' ;
						    }
						else
							$value  .=  $object_data [$j] ;
					    }
				     }
				// Text within angle brackets, written as hex digits
				else
				   {
					$end_index	=  strpos ( $object_data, '>', $start_index ) ;
					$hexdigits	=  substr ( $object_data, $start_index, $end_index - $start_index ) ;
					$value		=  hex2bin ( str_replace ( array ( "\n", "\r", "\t" ), '', $hexdigits ) ) ;
				    }

				$value		=   $this -> __convert_utf16 ( $this -> __extract_chars_from_block ( $value ) ) ;

				switch ( strtolower ( $keyword ) )
				   {
					case  'author'		:  $this -> Author			=  $value ; break ;
					case  'creator'		:  $this -> CreatorApplication		=  $value ; break ;
					case  'producer'	:  $this -> ProducerApplication		=  $value ; break ;
					case  'title'		:  $this -> Title			=  $value ; break ;
					case  'keywords'	:  $this -> Keywords			=  $value ; break ;
					case  'subject'		:  $this -> Subject			=  $value ; break ;
					case  'creationdate'	:  $this -> CreationDate		=  $this -> GetUTCDate ( $value ) ; break ;
					case  'moddate'		:  $this -> ModificationDate		=  $this -> GetUTCDate ( $value ) ; break ;
				    }
			    }

			if  ( self::$DEBUG )
			   {
		   		echo "\n----------------------------------- AUTHOR INFORMATION\n" ;
				echo ( "Author               : " . $this -> Author . "\n" ) ;
				echo ( "Creator application  : " . $this -> CreatorApplication . "\n" ) ;
				echo ( "Producer application : " . $this -> ProducerApplication . "\n" ) ;
				echo ( "Title                : " . $this -> Title . "\n" ) ;
				echo ( "Subject              : " . $this -> Subject . "\n" ) ;
				echo ( "Keywords             : " . $this -> Keywords . "\n" ) ;
				echo ( "Creation date        : " . $this -> CreationDate . "\n" ) ;
				echo ( "Modification date    : " . $this -> ModificationDate . "\n" ) ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        RetrieveFormData - Retrieves raw form data

	    PROTOTYPE
	        $this -> RetrieveFormData ( $object_id, $object_data ) ;

	    DESCRIPTION
	        Retrieves raw form data (form definition and field values definition).

	    PARAMETERS
	        $object_id (integer) -
	                Id of the object containing the author information.

		$object_data (string) -
			Object contents.

		$pdf_objects (array) -
			Array whose keys are the PDF object ids, and values their corresponding contents.

	    NOTES
	        This function only memorizes the contents of form data definitions. The actual data will be processed
		only if the GetFormData() function is called.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  RetrieveFormData ( $object_id, $object_data, $pdf_objects )
	   {
		// Retrieve the object that contains the field values
		preg_match ( '#\b R \s* \( \s* datasets \s* \) \s* (?P<object> \d+) \s+ \d+ \s+ R#imsx', $object_data, $field_match ) ;
		$field_object		=  $field_match [ 'object' ] ;

		if  ( ! isset ( $pdf_objects [ $field_object ] ) )
		   {
			if  ( self::$DEBUG )
				warning ( "Field definitions object #$field_object not found in object #$object_id." ) ;

			return ;
		    }

		// Retrieve the object that contains the form definition
		preg_match ( '#\b R \s* \( \s* form \s* \) \s* (?P<object> \d+) \s+ \d+ \s+ R#imsx', $object_data, $form_match ) ;
		$form_object		=  $form_match [ 'object' ] ;

		if  ( ! isset ( $pdf_objects [ $form_object ] ) )
		   {
			if  ( self::$DEBUG )
				warning ( "Form definitions object #$form_object not found in object #$object_id." ) ;

			return ;
		    }
		// Add this entry to form data information
		$this -> FormData [ $object_id ]	=  array
		   (
			'values'	=>  ( integer ) $field_object,
			'form'		=>  ( integer ) $form_object
		    ) ;
	    }


    }


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                                      FONT TABLE MANAGEMENT                                       ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/

/*==============================================================================================================

    PdfTexterFontTable class -
        The PdfTexterFontTable class is not supposed to be used outside the context of the PdfToText class.
	Its purposes are to hold a list of font definitions taken from a pdf document, along with their
	associated character mapping tables, if any.
	This is why no provision has been made to design this class a a general purpose class ; its utility
	exists only in the scope of the PdfToText class.

  ==============================================================================================================*/
class 	PdfTexterFontTable 	extends PdfObjectBase
   {
	// Font table
	public		$Fonts		=  array ( ) ;
	private		$DefaultFont	=  false ;
	// Font mapping between a font number and an object number
	private 	$FontMap 	=  array ( ) ;
	// A character map buffer is used to store results from previous calls to the MapCharacter() method of the
	// FontTable object. It dramatically reduces the number of calls needed, from one call for each character
	// defined in the pdf stream, to one call on each DISTINCT character defined in the PDF stream.
	// As an example, imagine a PDF file that contains 200K characters, but only 150 distinct ones. The
	// MapCharacter method will be called 150 times, instead of 200 000...
	private		$CharacterMapBuffer		=  array ( ) ;


	// Constructor -
	//	Well, does not do anything special
	public function  __construct ( )
	   {
		parent::__construct ( ) ;
	    }


	// Add -
	//	Adds the current font declaration to the font table. Handles special cases where font id is not
	//	given by the object id, but rather by <</Rx...>> constructs
	public function  Add ( $object_id, $font_definition, $pdf_objects, $extra_mappings )
	   {
		if  ( PdfToText::$DEBUG )
		   {
	   		echo "\n----------------------------------- FONT #$object_id\n" ;
			echo $font_definition ;
		    }

		$font_type		=  PdfTexterFont::FONT_ENCODING_STANDARD ;
		$cmap_id		=  0 ;
		$secondary_cmap_id	=  0 ;
		$font_variant		=  false ;

		// Font resource id specification
	   	if  ( preg_match ( '#<< \s* (?P<rscdefs> /R\d+ .*) >>#ix', $font_definition, $match ) )
		   {
			$resource_definitions	=  $match [ 'rscdefs' ] ;

			preg_match_all ( '#/R (?P<font_id> \d+) #ix', $resource_definitions, $id_matches ) ;
			preg_match_all ( '#/ToUnicode \s* (?P<cmap_id> \d+)#ix', $resource_definitions, $cmap_matches ) ;

			$count		=  count ( $id_matches [ 'font_id' ] ) ;

			for  ( $i = 0 ;  $i  <  $count ; $i ++ )
			   {
				$font_id	=  $id_matches   [ 'font_id' ] [$i] ;
				$cmap_id	=  $cmap_matches [ 'cmap_id' ] [$i] ;

				$this -> Fonts [ $font_id ]	=  new  PdfTexterFont ( $font_id, $cmap_id, PdfTexterFont::FONT_ENCODING_UNICODE_MAP, $extra_mappings ) ;
			    }

			return ;
		    }
		// Experimental implementation of CID fonts
		else if  ( preg_match ( '#/(Base)?Encoding \s* /Identity-H#ix', $font_definition ) )
		   {
			if  ( preg_match ( '#/BaseFont \s* /(?P<font> [^\s/]+)#ix', $font_definition, $match ) )
				$font_variant	=  $match [ 'font' ] ;

			$font_type	=  PdfTexterFont::FONT_ENCODING_CID_IDENTITY_H ;
		    }
		// Font has an associated Unicode map (using the /ToUnicode keyword)
		else if  ( preg_match ( '#/ToUnicode \s* (?P<cmap> \d+)#ix', $font_definition, $match ) )
		   {
			$cmap_id	=  $match [ 'cmap' ] ;
			$font_type	=  PdfTexterFont::FONT_ENCODING_UNICODE_MAP ;

			if  ( preg_match ( '#/Encoding \s* (?P<cmap> \d+)#ix', $font_definition, $secondary_match ) )
				$secondary_cmap_id	=  $secondary_match [ 'cmap' ] ;
		    }
		// Font has an associated character map (using a cmap id)
		else if  ( preg_match ( '#/Encoding \s* (?P<cmap> \d+) \s+ \d+ #ix', $font_definition, $match ) )
		   {
			$cmap_id 	=  $match [ 'cmap' ] ;
			$font_type	=  PdfTexterFont::FONT_ENCODING_PDF_MAP ;
		    }
		// Font uses the Windows Ansi encoding
		else if  ( preg_match ( '#/(Base)?Encoding \s* /WinAnsiEncoding#ix', $font_definition ) )
		   {
			$font_type	=  PdfTexterFont::FONT_ENCODING_WINANSI ;

			if  ( preg_match ( '# /BaseFont \s* / [a-z0-9_]+ \+ [a-z0-9_]+? Cyr #imsx', $font_definition ) )
				$font_type	|=  PdfTexterFont::FONT_VARIANT_ISO8859_5 ;
		    }
		// Font uses the Mac Roman encoding
		else if  ( preg_match ( '#/(Base)?Encoding \s* /MacRomanEncoding#ix', $font_definition ) )
			$font_type	=  PdfTexterFont::FONT_ENCODING_MAC_ROMAN ;

		$this -> Fonts [ $object_id ]	=  new  PdfTexterFont ( $object_id, $cmap_id, $font_type, $secondary_cmap_id, $pdf_objects, $extra_mappings, $font_variant ) ;

		// Arbitrarily set the default font to the first font encountered in the pdf file
		if  ( $this -> DefaultFont  ===  false )
		   {
			reset ( $this -> Fonts ) ;
			$this -> DefaultFont	=  key ( $this -> Fonts ) ;
		    }
	    }


	// AddFontMap -
	//	Process things like :
	//		<</F1 26 0 R/F2 22 0 R/F3 18 0 R>>
	//	which maps font 1 (when specified with the /Fx instruction) to object 26,
	//	2 to object 22 and 3 to object 18, respectively, in the above example.
	//	Found also a strange way of specifying a font mapping :
	//		<</f-0-0 5 0 R etc.
	//	And yet another one :
	//		<</C0_0 5 0 R
	public function  AddFontMap ( $object_id, $object_data )
	   {
		$object_data	=  self::UnescapeHexCharacters ( $object_data ) ;

		// The same object can hold different notations for font associations
		if  ( preg_match_all ( '# (?P<font> ' . self::$FontSpecifiers . ' ) \s+ (?P<object> \d+) #imsx', $object_data, $matches ) )
		   {
			for ( $i = 0, $count = count ( $matches [ 'font' ] ) ; $i  <  $count ; $i ++ )
			   {
				$font	=  $matches [ 'font'   ] [$i] ;
				$object =  $matches [ 'object' ] [$i] ;

				$this -> FontMap [ $font ]	=  $object ;
			    }
		    }
	    }


	// AddPageFontMap -
	//	Adds font aliases to the current font map, in the form : "page:xobject:font".
	//	The associated value is the font object itself.
	public function  AddPageFontMap ( $map )
	   {
		foreach  ( $map  as  $map_entry )
		   {
			$this -> FontMap [ $map_entry [ 'page' ] . ':' . $map_entry [ 'xobject-name' ] . ':' . $map_entry [ 'font-name' ] ]	=  $map_entry [ 'object' ] ;
		    }
	    }


	// AddCharacterMap -
	//	Associates a character map to a font declaration that referenced it.
	public function  AddCharacterMap ( $cmap )
	   {
		$status		=  false ;

		// We loop through all fonts, since the same character map can be referenced by several font definitions
		foreach  ( $this -> Fonts  as  $font )
		   {
			if  ( $font -> CharacterMapId  ==  $cmap -> ObjectId )
			   {
				$font -> CharacterMap	=  $cmap ;
				$status			=  true ;
			    }
			else if  ( $font -> SecondaryCharacterMapId  ==  $cmap -> ObjectId )
			   {
				$cmap -> Secondary		=  true ;
				$font -> SecondaryCharacterMap	=  $cmap ;
				$status				=  true ;
			    }
		    }

		return ( $status ) ;
	    }


	// GetFontAttributes -
	//	Gets the specified font width in hex digits and whether the font has a character map or not.
	public function  GetFontAttributes ( $page_number, $template, $font, &$font_map_width, &$font_mapped )
	   {
		// Font considered as global to the document
		if  ( isset ( $this -> Fonts [ $font ] ) )
			$key	=  $font ;
		// Font not found : try to use the first one declared in the document
		else
		   {
			reset ( $this -> Fonts ) ;
			$key	=  key ( $this -> Fonts ) ;
		    }

		// Font has an associated character map
		if  ( $key  &&  $this -> Fonts [ $key ] -> CharacterMap )
		   {
			$font_map_width		=  $this -> Fonts [ $key ] -> CharacterMap -> HexCharWidth ;
			$font_mapped		=  true ;

			return ( true ) ;
		    }
		// No character map : characters are specified as two hex digits
		else
		   {
			$font_map_width		=  2 ;
			$font_mapped		=  false ;

			return ( false ) ;
		    }
	    }


	// GetFontByMapId -
	//	Returns the font id (object id) associated with the specified mapped id.
	public function  GetFontByMapId ( $page_number, $template, $id )
	   {
		if  ( isset ( $this -> FontMap [ "$page_number:$template:$id" ] ) )
			$font_object	=  $this -> FontMap [ "$page_number:$template:$id" ] ;
		else if  ( isset ( $this -> FontMap [ $id ] ) )
			$font_object	=  $this -> FontMap [ $id ] ;
		else
			$font_object	=  -1 ;

		return ( $font_object ) ;
	    }


	// GetFontObject -
	//	Returns the PdfTexterFont object for the given page, template and font id (in the form of "/something")
	public function  GetFontObject ( $page_number, $template, $id )
	   {
		if  ( isset ( $this -> FontMap [ "$page_number:$template:$id" ] ) )
			$font_object	=  $this -> FontMap [ "$page_number:$template:$id" ] ;
		else if  ( isset ( $this -> FontMap [ $id ] ) )
			$font_object	=  $this -> FontMap [ $id ] ;
		else
			return ( false ) ;

		if  ( isset ( $this -> Fonts [ $font_object ] ) )
			return ( $this -> Fonts [ $font_object ] ) ;
		else
			return ( false ) ;
	    }


	// MapCharacter -
	//	Returns the character associated to the specified one.
	public function  MapCharacter ( $font, $ch, $return_false_on_failure = false )
	   {
		if  ( isset ( $this -> CharacterMapBuffer [ $font ] [ $ch ] ) )
			return ( $this -> CharacterMapBuffer [ $font ] [ $ch ] ) ;

		// Use the first declared font as the default font, if none defined
		if  ( $font  ==  -1 )
			$font	=  $this -> DefaultFont ;

		$cache	=  true ;

		if  ( isset  ( $this -> Fonts [ $font ] ) )
		   {
			$font_object	=  $this -> Fonts [ $font ] ;

			$code	=  $font_object -> MapCharacter ( $ch, $return_false_on_failure ) ;

			if  ( $font_object -> CharacterMap )
				$cache	=  $font_object -> CharacterMap -> Cache ;
		    }
		else
		   {
			$code	=  $this -> CodePointToUtf8 ( $ch ) ;
		    }

		if  ( $cache )
			$this -> CharacterMapBuffer [ $font ] [ $ch ]	=  $code ;

		return ( $code ) ;
	    }
    }


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                                         FONT MANAGEMENT                                          ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/

/*==============================================================================================================

    PdfTexterFont class -
        The PdfTexterFont class is not supposed to be used outside the context of the PdfToText class.
	It holds an optional character mapping table associted with this font.
	No provision has been made to design this class a a general purpose class ; its utility exists only in
	the scope of the PdfToText class.

  ==============================================================================================================*/
class  PdfTexterFont		extends PdfObjectBase
   {
	// Font encoding types, for fonts that are neither associated with a Unicode character map nor a PDF character map
	const	FONT_ENCODING_STANDARD			=  0 ;			// No character map, use the standard character set
	const	FONT_ENCODING_WINANSI			=  1 ;			// No character map, use the Windows Ansi character set
	const	FONT_ENCODING_MAC_ROMAN			=  2 ;			// No character map, use the MAC OS Roman character set
	const	FONT_ENCODING_UNICODE_MAP		=  3 ;			// Font has an associated unicode character map
	const	FONT_ENCODING_PDF_MAP			=  4 ;			// Font has an associated PDF character map
	const	FONT_ENCODING_CID_IDENTITY_H		=  5 ;			// CID font : IDENTITY-H

	// Font variants
	const   FONT_VARIANT_STANDARD		=  0x0000 ;
	const	FONT_VARIANT_ISO8859_5		=  0x1000 ;		// Cyrillic

	const	FONT_VARIANT_MASK		=  0xF000 ;
	const	FONT_VARIANT_SHIFT		=  12 ;

	// Font resource id (may be an object id, overridden by <</Rx...>> constructs
	public		$Id ;
	// Font type and variant
	public		$FontType ;
	public		$FontVariant ;
	// Character map id, specified by the /ToUnicode flag
	public		$CharacterMapId ;
	// Secondary character map id, specified by the /Encoding flag and that can contain a /Differences flag
	public		$SecondaryCharacterMapId ;
	// Optional character map, that may be set by the PdfToText::Load method just before processing text drawing blocks
	public		$CharacterMap		=  null ;
	public		$SecondaryCharacterMap	=  null ;
	// Character widths
	public		$CharacterWidths	=  array ( ) ;
	// Default character width, if not present in the $CharacterWidths array
	public		$DefaultWidth		=  0 ;
	private		$GotWidthInformation	=  false ;
	// A buffer for remembering character widths
	protected	$CharacterWidthsBuffer	=  array ( ) ;


	// Constructor -
	//	Builds a PdfTexterFont object, using its resource id and optional character map id.
	public function  __construct ( $resource_id, $cmap_id, $font_type, $secondary_cmap_id = null, $pdf_objects = null, $extra_mappings = null, $font_variant = false )
	   {

		parent::__construct ( ) ;

		$this -> Id				=  $resource_id ;
		$this -> CharacterMapId			=  $cmap_id ;
		$this -> SecondaryCharacterMapId	=  $secondary_cmap_id ;
		$this -> FontType			=  $font_type  &  ~self::FONT_VARIANT_MASK ;
		$this -> FontVariant			=  ( $font_type  >>  self::FONT_VARIANT_SHIFT )  &  0x0F ;

		// Instantiate the appropriate character map for this font
		switch  ( $this -> FontType )
		   {
			case	self::FONT_ENCODING_WINANSI :
				$this -> CharacterMap	=  new  PdfTexterAdobeWinAnsiMap ( $resource_id, $this -> FontVariant ) ;
				break ;

			case	self::FONT_ENCODING_MAC_ROMAN :
				$this -> CharacterMap	=  new  PdfTexterAdobeMacRomanMap ( $resource_id, $this -> FontVariant ) ;
				break ;

			case	self::FONT_ENCODING_CID_IDENTITY_H :
				$this -> CharacterMap	=  new PdfTexterIdentityHCIDMap (  $resource_id, $font_variant ) ;
				break ;

			case	self::FONT_ENCODING_PDF_MAP :
				$this -> CharacterMap	=  new  PdfTexterEncodingMap ( $cmap_id, $pdf_objects [ $cmap_id ], $extra_mappings ) ;
				break ;

			case	self::FONT_ENCODING_UNICODE_MAP :
				break ;

			case	self::FONT_ENCODING_STANDARD :
				break ;

			default :
				if  ( PdfToText::$DEBUG )
					warning ( "Unknown font type #$font_type found for object #$resource_id, character map #$cmap_id." ) ;
		    }

		// Get font data ; include font descriptor information if present
		$font_data	=  $pdf_objects [ $resource_id ] ;

		if  ( preg_match ( '/FontDescriptor \s+ (?P<id> \d+) \s+ \d+ \s+ R/imsx', $font_data, $match ) )
		   {
			$descriptor_id		=  $match [ 'id' ] ;

			// Don't care about searching this in that object, or that in this object - simply catenate the font descriptor
			// with the font definition
			if  ( isset ( $pdf_objects [ $descriptor_id ] ) )
				$font_data	.=  $pdf_objects [ $descriptor_id ] ;
		    }

		// Type1 fonts belong to the Adobe 14 standard fonts available. Information about the character widths is never embedded in the PDF
		// file, but must be taken from external data (in the FontMetrics directory).
		if  ( preg_match ( '#/SubType \s* /Type1#ix', $font_data ) )
		   {
			preg_match ( '#/BaseFont \s* / ([\w]+ \+)? (?P<font> [^\s\[</]+)#ix', $font_data, $match ) ;
			$font_name	=  $match [ 'font' ] ;
			$lc_font_name	=  strtolower ( $font_name ) ;

			// Do that only if a font metrics file exists...
			if  ( isset ( PdfToText::$AdobeStandardFontMetrics [ $lc_font_name ] ) )
			   {
				$metrics_file	=  PdfToText::$FontMetricsDirectory . '/' . PdfToText::$AdobeStandardFontMetrics [ $lc_font_name ] ;

				if  ( file_exists ( $metrics_file ) )
				   {
					include ( $metrics_file ) ;

					if  ( isset ( $charwidths ) )
					   {
						// Build the CharacterWidths table
						foreach  ( $charwidths  as  $char => $width )
							$this -> CharacterWidths [ chr ( $char ) ]	=  ( double ) $width ;

						$this -> GotWidthInformation	=  true ;
					    }
				    }
			    }
	 	    }

		// Retrieve the character widths for this font. This means :
		// - Retrieving the /FirstChar, /LastChar and /Widths entries from the font definition. /Widths is an array of individual character
		//   widths, between the /FirstChar and /LastChar entries. A value of zero in this array means "Use the default width"...
		// - ... which is given by the /MissingWidth parameter, normally put in the font descriptor whose object id is given by the
		//   /FontDescriptor entry of the font definition
		// Well, to be considered, given the number of buggy PDFs around the world, we won't care about the /LastChar entry and we won't
		// check whether the /Widths array contains (LastChar - FirstChar + 1) integer values...
		// Get the entries
		$first_char	=  false ;
		$widths		=  false ;
		$missing_width  =  false ;

		if  ( preg_match ( '#/FirstChar \s+ (?P<char> \d+)#imsx', $font_data, $match ) )
			$first_char	=  $match [ 'char' ] ;

		if  ( preg_match ( '#/Widths \s* \[ (?P<widths> [^\]]+) \]#imsx', $font_data, $match ) )
			$widths		=  $match [ 'widths' ] ;

		if  ( preg_match ( '#/MissingWidth \s+ (?P<missing> \d+)#imsx', $font_data, $match ) )
			$missing_width	=  $match [ 'missing' ] ;

		// It would not make sense if one of the two entries /FirstChar and /Widths was missing
		// So ensure they are all there (note that /MissingWidths can be absent)
		if  ( $first_char !==  false  &&  $widths )
		   {
			if  ( $missing_width  !==  false )
				$this -> DefaultWidth		=  ( double ) $missing_width ;

			// Here comes a really tricky part :
			// - The PDF file can contain CharProcs (example names : /a0, /a1, etc.) for which we have no
			//   Unicode equivalent
			// - The caller may have called the AddAdobeExtraMappings method, to providing a mapping between
			//   those char codes (/a0, /a1, etc.) and a Unicode equivalent
			// - Each "charproc" listed in the /Differences array as a specific code, such as :
			//	[0/a1/a2/a3...]
			//   which maps /a1 to code 0, /a2 to code 1, and so on
			// - However, the GetStringWidth() method provides real Unicode characters
			// Consequently, we have to map each CharProc character (/a1, /a2, etc.) to the Unicode value
			// that may have been specified using the AddAdobeExtraMappings() method.
			// The first step below collects the name list of CharProcs.
			$charprocs	=  false ;

			if  ( isset ( $this -> CharacterMap -> Encodings )  &&
				preg_match ( '# /CharProcs \s* << (?P<list> .*?) >>#imsx', $font_data, $match ) )
			   {
				preg_match_all ( '#/ (?P<char> \w+) \s+ \d+ \s+ \d+ \s+ R#msx', $match [ 'list' ], $char_matches ) ;

				$charprocs	=  array_flip ( $char_matches [ 'char' ] ) ;
			    }

			// The /FontMatrix entry defines the scaling to be used for the character widths (among other things)
			if  ( preg_match ( '#/FontMatrix \s* \[ \s* (?P<multiplier> \d+)#imsx', $font_data, $match ) )
				$multiplier	=  1000 * ( double ) $match [ 'multiplier' ] ;
			else
				$multiplier	=  1 ;

			$widths				=  trim ( preg_replace ( '/\s+/', ' ', $widths ) ) ;
			$widths				=  explode ( ' ', $widths ) ;

			for  ( $i = 0, $count = count ( $widths ) ; $i  <  $count ; $i ++ )
			   {
				$value		=  ( double ) trim ( $widths [$i] ) ;
				$chr_index	=  $first_char + $i ;

				// Tricky thing part 2 :
				if  ( $charprocs )
				   {
					// If one of the CharProc characters is listed in the /Differences array then...
					if  ( isset ( $this -> CharacterMap -> DifferencesByPosition [ $chr_index ] ) )
					   {
						$chname		=  $this -> CharacterMap -> DifferencesByPosition [ $chr_index ] ;

						// ... if this CharProcs character is defined in the encoding table (possibly because
						// it was complemeted through a call to the AddAdobeExtraMappings() method), then we
						// will use its Unicode counterpart instead of the character ID coming from the
						// /Differences array)
						if  ( isset ( $charprocs [ $chname ] )  &&  isset ( $this -> CharacterMap -> Encodings [ $chname ] ) )
							$chr_index	=  $this -> CharacterMap -> Encodings [ $chname ] [2] ;
					    }
				    }

				$this -> CharacterWidths [ chr ( $chr_index ) ]		=  ( $value ) ?  ( $value * $multiplier ) : $this -> DefaultWidth ;
			    }

			$this -> GotWidthInformation	=  true ;
		    }
	    }


	// MapCharacter -
	//	Returns the substitution string value for the specified character, if the current font has an
	//	associated character map, or the original character encoded in utf8, if not.
	public function  MapCharacter ( $ch, $return_false_on_failure = false )
	   {
		if  ( $this -> CharacterMap )
		   {
			// Character is defined in the character map ; check if it has been overridden by a /Differences array in
			// a secondary character map
			if  ( isset ( $this -> CharacterMap [ $ch ] ) )
			   {
				// Since a /ToUnicode map can have an associated /Encoding map with a /Differences list, this is the right place
				// to perform the translation (ie, the final Unicode codepoint is impacted by the /Differences list)
				if  ( ! $this -> SecondaryCharacterMap )		// Most common case first !
				   {
					$code	=  $this -> CharacterMap [ $ch ] ;
				    }
				else
				   {
					if  ( isset  ( $this -> SecondaryCharacterMap [ $ch ] ) )
						$code	=  $this -> SecondaryCharacterMap [ $ch ] ;
					else
						$code	=  $this -> CharacterMap [ $ch ] ;
				    }

				return ( $code ) ;
			    }
			// On the contrary, the character may not be defined in the main character map but may exist in the secondary cmap
			else if  ( $this -> SecondaryCharacterMap  &&  isset ( $this -> SecondaryCharacterMap [ $ch ] ) )
			   {
				$code	=  $this -> SecondaryCharacterMap [ $ch ] ;

				return ( $code ) ;
			    }
		    }

		if  ( $return_false_on_failure )
			return ( false ) ;

		return ( $this -> CodePointToUtf8 ( $ch ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetStringWidth - Returns the length of a string, in 1/100 of points

	    PROTOTYPE
	        $width		=  $font -> GetStringWidth ( $text, $extra_percent ) ;

	    DESCRIPTION
	        Returns the length of a string, in 1/100 of points.

	    PARAMETERS
	        $text (string) -
	                String whose length is to be measured.

		$extra_percent (double) -
			Extra percentage to be added to the computed width.

	    RETURN VALUE
	        Returns the length of the specified string in 1/1000 of text points, or 0 if the font does not
		contain any character width information.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetStringWidth ( $text, $extra_percent )
	   {
		// No width information
		if  ( ! $this -> GotWidthInformation )
			return ( false ) ;

		$width		=  0 ;

		// Compute the width of each individual character - use a character width buffer to avoid
		// repeating the same tests again and again for characters whose width has already been processed
		for  ( $i = 0, $length = strlen ( $text ) ; $i  <  $length ; $i ++ )
		   {
			$ch		=  $text [$i] ;

			// Character already in the Widths buffer - Simply retrieve its value
			if  ( isset ( $this -> CharacterWidthsBuffer [ $ch ] ) )
			   {
				$width	+=  $this -> CharacterWidthsBuffer [ $ch ] ;
			    }
			// New character - The width comes either from the CharacterWidths array if an entry is defined
			// for this character, or from the default width property.
			else
			   {
				if  ( isset ( $this -> CharacterWidths [ $ch ] ) )
				   {
					$width	+=  $this -> CharacterWidths [ $ch ] ;
					$this -> CharacterWidthsBuffer [ $ch ]	=  $this -> CharacterWidths [ $ch ] ;
				    }
				else
				   {
					$width	+=  $this -> DefaultWidth ;
					$this -> CharacterWidthsBuffer [ $ch ]	=  $this -> DefaultWidth ;
				    }
			    }
		    }

		// The computed width is actually longer/smaller than its actual width. Adjust by the percentage specified
		// by the ExtraTextWidth property
		$divisor	=  100 - $extra_percent ;

		if  ( $divisor  <  50 )			// Arbitrarily fix a limit
			$divisor	=  50 ;

		// All done, return
		return ( $width / $divisor ) ;
	    }
    }


/*==============================================================================================================

    PdfTexterCharacterMap -
        The PdfTexterFont class is not supposed to be used outside the context of the PdfToText class.
	Describes a character map.
	No provision has been made to design this class a a general purpose class ; its utility exists only in
	the scope of the PdfToText class.

  ==============================================================================================================*/
abstract class	PdfTexterCharacterMap	extends		PdfObjectBase
					implements	ArrayAccess, Countable
   {
	// Object id of the character map
	public		$ObjectId ;
	// Number of hex digits in a character represented in hexadecimal notation
	public 		$HexCharWidth ;
	// Set to true if the values returned by the array access operator can safely be cached
	public		$Cache		=  false ;


	public function  __construct ( $object_id )
	   {
		parent::__construct ( ) ;
		$this -> ObjectId	=  $object_id ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    CreateInstance -
	        Creates a PdfTexterCharacterMap instance of the correct type.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  CreateInstance ( $object_id, $definitions, $extra_mappings )
	   {
		if  ( preg_match ( '# (begincmap) | (beginbfchar) | (beginbfrange) #ix', $definitions ) )
			return ( new PdfTexterUnicodeMap ( $object_id, $definitions ) ) ;
		else if  ( stripos ( $definitions, '/Differences' )  !==  false )
			return ( new PdfTexterEncodingMap ( $object_id, $definitions, $extra_mappings ) ) ;
		else
			return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	        Interface implementations.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  offsetSet ( $offset, $value )
	   { error ( new PdfToTextDecodingException ( "Unsupported operation." ) ) ; }

	public function  offsetUnset ( $offset )
	   { error ( new PdfToTextDecodingException ( "Unsupported operation." ) ) ; }
    }


/*==============================================================================================================

    PdfTexterUnicodeMap -
        A class for fonts having a character map specified with the /ToUnicode parameter.

  ==============================================================================================================*/
class  PdfTexterUnicodeMap 	extends 	PdfTexterCharacterMap
    {
	// Id of the character map (specified by the /Rx flag)
	public		$Id	;
	// Character substitution table, using the beginbfrange/endbfrange notation
	// Only constructs of the form :
	//	<low> <high> <start>
	// are stored in this table. Constructs of the form :
	//	<x> <y> [ <subst_x> <subst_x+1> ... <subst_y> ]
	// are stored in the $DirectMap array, because it is conceptually the same thing in the end as a character substitution being
	// defined with the beginbfchar/endbfchar construct.
	// Note that a dichotomic search in $RangeMap will be performed for each character reference not yet seen in the pdf flow.
	// Once the substitution character has been found, it will be added to the $DirectMap array for later faster access.
	// The reason for this optimization is that some pdf files can contain beginbfrange/endbfrange constructs that may seem useless,
	// except for validation purposes (ie, validating the fact that a character reference really belongs to the character map).
	// However, such constructs can lead to thousands of character substitutions ; consider the following example, that comes
	// from a sample I received :
	//	beginbfrange
	//	<1000> <1FFFF> <1000>
	//	<2000> <2FFFF> <2000>
	//	...
	//	<A000> <AFFFF> <A0000>
	//	...
	//	endbfrange
	// By naively storing a one-to-one character relationship in an associative array, such as :
	//	$array [ 0x1000 ] = 0x1000 ;
	//	$array [ 0x1001 ] = 0x1001 ;
	//	..
	//	$array [ 0x1FFF ] = 0x1FFF ;
	//	etc.
	// you may arrive to a situation where the array becomes so big that it exhausts all of the available memory.
	// This is why the ranges are stored as is and a dichotomic search is performed to go faster.
	// Since it is useless to use this method to search the same character twice, when it has been found once, the
	// substitution pair will be put in the $DirectMap array for subsequent accesses (there is little probability that a PDF
	// file contains so much different characters, unless you are processing the whole Unicode table itself ! - but in this
	// case, you will simply have to adjust the value of the memory_limit setting in your php.ini file. Consider that I am
	// not a magician...).
	protected	$RangeMap		=  array ( ) ;
	private		$RangeCount		=  0 ;				// Avoid unnecessary calls to the count() function
	private		$RangeMin		=  PHP_INT_MAX,			// Min and max values of the character ranges
			$RangeMax		=  -1 ;
	// Character substitution table for tables using the beginbfchar notation
	protected	$DirectMap		=  array ( ) ;


	// Constructor -
	//	Analyzes the text contents of a CMAP and extracts mappings from the beginbfchar/endbfchar and
	//	beginbfrange/endbfrange constructs.
	public function  __construct ( $object_id, $definitions )
	   {
		parent::__construct ( $object_id ) ;

		if  ( PdfToText::$DEBUG )
		   {
	   		echo "\n----------------------------------- UNICODE CMAP #$object_id\n" ;
			echo $definitions;
		    }

		// Retrieve the cmap id, if any
		preg_match ( '# /CMapName \s* /R (?P<num> \d+) #ix', $definitions, $match ) ;
		$this -> Id 		=  isset ( $match [ 'num' ] ) ?  $match [ 'num' ] : -1 ;

		// Get the codespace range, which will give us the width of a character specified in hexadecimal notation
		preg_match ( '# begincodespacerange \s+ <\s* (?P<low> [0-9a-f]+) \s*> \s* <\s* (?P<high> [0-9a-f]+) \s*> \s*endcodespacerange #ix', $definitions, $match ) ;

		if  ( isset ( $match [ 'low' ] ) )
			$this -> HexCharWidth 	=  max ( strlen ( $match [ 'low' ] ), strlen ( $match [ 'high' ] ) ) ;
		else
			$this -> HexCharWidth	=  0 ;

		$max_found_char_width	=  0 ;

		// Process beginbfchar/endbfchar constructs
		if  ( preg_match_all ( '/ beginbfchar \s* (?P<chars> .*?) endbfchar /imsx', $definitions, $char_matches ) )
		    {
		    	foreach  ( $char_matches [ 'chars' ]  as  $char_list )
		    	   {
				// beginbfchar / endbfchar constructs can behave as a kind of beginfbfrange/endbfrange ; example :
				//	<21> <0009 0020 000d>
				// means :
				//	. Map character #21 to #0009
				//	. Map character #22 to #0020
				//	. Map character #23 to #000D
				// There is no clue in the Adobe PDF specification that a single character could be mapped to a range.
				// The normal constructs would be :
				//	<21> <0009>
				//	<22> <0020>
				//	<23> <0000D>
				preg_match_all ( '/< \s* (?P<item> .*?) \s* >/msx', $char_list, $item_matches ) ;

				for  ( $i = 0, $item_count = count ( $item_matches [ 'item' ] ) ; $i  <  $item_count ; $i += 2 )
				   {
					$char		=  hexdec ( $item_matches [ 'item' ] [$i] ) ;
					$char_width	=  strlen ( $item_matches [ 'item' ] [$i] ) ;
					$map		=  explode ( ' ', preg_replace ( '/\s+/', ' ', $item_matches [ 'item' ] [ $i + 1 ] ) ) ;

					if  ( $char_width  >  $max_found_char_width )
						$max_found_char_width	=  $char_width ;

					for  ( $j = 0, $map_count = count ( $map ) ; $j  <  $map_count ; $j ++ )
					   {
						$subst				=  hexdec ( $map [$j] ) ;

						// Check for this very special, not really document feature which maps CIDs to a non-existing Unicode character
						// (but it still corresponds to something...)
						if  ( isset ( PdfTexterAdobeUndocumentedUnicodeMap::$UnicodeMap [ $subst ] ) )
							$subst	=  PdfTexterAdobeUndocumentedUnicodeMap::$UnicodeMap [ $subst ] ;

						$this -> DirectMap [ $char + $j ]	=  $subst ;
					    }
				    }

		    	    }
		     }

		// Process beginbfrange/endbfrange constructs
		if  ( preg_match_all ( '/ beginbfrange \s* (?P<ranges> .*?) endbfrange /imsx', $definitions, $range_matches ) )
		   {
			foreach  ( $range_matches [ 'ranges' ]  as  $range_list )
			   {
				$start_index	=  0 ;

				// There are two forms of syntax in a beginbfrange..endbfrange construct
				// 1) "<x> <y> <z>", which maps character ids x through y to z through (z+y-x)
				// 2) "<x> <y> [<a1> <a2> ... <an>]", which maps character x to a1, x+1 to a2, up to y, which is mapped to an
				// All the values are hex digits.
				// We will loop through the range definitions by first identifying the <x> and <y>, and the character that follows
				// them, which is either a "<" for notation 1), or a "[" for notation 2).
				while  ( preg_match ( '#  < \s* (?P<from> [0-9a-f]+) \s* > \s* < \s* (?P<to> [0-9a-f]+) \s* > \s* (?P<nextchar> .) #imsx',
						$range_list, $range_match, PREG_OFFSET_CAPTURE, $start_index ) )
				   {
					$from			=  hexdec ( $range_match [ 'from' ] [0] ) ;
					$to			=  hexdec ( $range_match [ 'to'   ] [0] ) ;
					$next_char		=  $range_match [ 'nextchar' ] [0] ;
					$next_char_index	=  $range_match [ 'nextchar' ] [1] ;
					$char_width		=  strlen ( $range_match [ 'from' ] [0] ) ;

					if  ( $char_width  >  $max_found_char_width )
						$max_found_char_width	=  $char_width ;

					// Form 1) : catch the third hex value after <x> and <y>
					if  ( $next_char  ==  '<' )
					   {
						if  ( preg_match ( '/ \s* (?P<start> [0-9a-f]+) (?P<tail> \s* > \s*) /imsx', $range_list, $start_match, PREG_OFFSET_CAPTURE, $next_char_index + 1 ) )
						   {
							$subst		=  hexdec ( $start_match [ 'start' ] [0] ) ;

							// Check for this very special, not really document feature which maps CIDs to a non-existing Unicode character
							// (but it still corresponds to something...)
							if  ( isset ( PdfTexterAdobeUndocumentedUnicodeMap::$UnicodeMap [ $subst ] ) )
								$subst	=  PdfTexterAdobeUndocumentedUnicodeMap::$UnicodeMap [ $subst ] ;

							// Don't create a range if <x> and <y> are the same
							if  ( $from  !=  $to )
							   {
								$this -> RangeMap []	=  array ( $from, $to, $subst ) ;

								// Adjust min and max values for the ranges stored in this character map - to avoid unnecessary testing
								if  ( $from  <  $this -> RangeMin )
									$this -> RangeMin	=  $from ;

								if  ( $to  >  $this -> RangeMax )
									$this -> RangeMax	=  $to ;
							    }
							else
								$this -> DirectMap [ $from ]	=  $subst ;

							$start_index	=  $start_match [ 'tail' ] [1] + 1 ;
						    }
						else
							error ( "Character range $from..$to not followed by an hexadecimal value in Unicode map #$object_id." ) ;
					    }
					// Form 2) : catch all the hex values between square brackets after <x> and <y>
					else if  ( $next_char  ==  '[' )
					   {
						if  ( preg_match ( '/ (?P<values> [\s<>0-9a-f]+ ) (?P<tail> \] \s*)/imsx', $range_list, $array_match, PREG_OFFSET_CAPTURE, $next_char_index + 1 ) )
						   {
							preg_match_all ( '/ < \s* (?P<num> [0-9a-f]+) \s* > /imsx', $array_match [ 'values' ] [0], $array_values ) ;

							for  ( $i = $from, $count = 0 ; $i  <=  $to ; $i ++, $count ++ )
								$this -> DirectMap [$i] 	=  hexdec ( $array_values [ 'num' ] [ $count ] ) ;

							$start_index	=  $array_match [ 'tail' ] [1] + 1 ;
						    }
						else
							error ( "Character range $from..$to not followed by an array of hexadecimal values in Unicode map #$object_id." ) ;
					    }
					else
					   {
						error ( "Unexpected character '$next_char' in Unicode map #$object_id." ) ;
						$start_index	=  $range_match [ 'nextchar' ] [1] + 1 ;
					    }
				    }
			    }

			// Sort the ranges by their starting offsets
			$this -> RangeCount	=  count ( $this -> RangeMap ) ;

			if  ( $this -> RangeCount  >  1 )
			   {
				usort ( $this -> RangeMap, array ( $this, '__rangemap_cmpfunc' ) ) ;
			    }
		    }

		if ( $max_found_char_width  &&  $max_found_char_width  !=  $this -> HexCharWidth )
		   {
			if  ( PdfToText::$DEBUG )
				warning ( "Character map #$object_id : specified code width ({$this -> HexCharWidth}) differs from actual width ($max_found_char_width)." ) ;

			$this -> HexCharWidth	=  $max_found_char_width ;
		    }
	     }


	public function  __rangemap_cmpfunc ( $a, $b )
	   { return ( $a [0] - $b [0] ) ; }


	/*--------------------------------------------------------------------------------------------------------------

	        Interface implementations.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  count ( )
	   { return ( count ( $this -> DirectMap ) ) ; }


	public function  offsetExists ( $offset )
	   { return  ( $this -> offsetGetSafe ( $offset )  !==  false ) ; }


	public function  offsetGetSafe ( $offset, $translate = true )
	   {
		// Return value
		$code	=  false ;

		// Character already has an entry (character reference => subtituted character)
		if  ( isset ( $this -> DirectMap [ $offset ] ) )
		   {
			$code	=  ( $translate ) ? $this -> CodePointToUtf8 ( $this -> DirectMap [ $offset ] ) : $this -> DirectMap [ $offset ] ;
		    }
		// Character does not has a direct entry ; have a look in the character ranges defined for this map
		else if  ( $this -> RangeCount  &&  $offset  >=  $this -> RangeMin  &&  $offset  <=  $this -> RangeMax )
		   {
			$low		=  0 ;
			$high		=  count ( $this -> RangeMap ) - 1 ;
			$result		=  false ;

			// Use a dichotomic search through character ranges
			while  ( $low  <=  $high )
			   {
				$middle		=  ( $low + $high )  >>  1 ;

				if  ( $offset  <  $this -> RangeMap [ $middle ] [0] )
					$high	=  $middle - 1 ;
				else if  ( $offset  >  $this -> RangeMap [ $middle ] [1] )
					$low	=  $middle + 1 ;
				else
				   {
					$result	=  $this -> RangeMap [ $middle ] [2] + $offset - $this -> RangeMap [ $middle ] [0] ;
					break ;
				    }
			    }

			// Once a character has been found in the ranges defined by this character map, store it in the DirectMap property
			// so that it will be directly retrieved during subsequent accesses
			if  ( $result  !==  false )
			   {
				$code				=  ( $translate ) ? $this -> CodePointToUtf8 ( $result ) : $result ;
				$this -> DirectMap [ $offset ]	=  $result ;
			    }
		    }

		// All done, return
		return ( $code ) ;
	    }


	public function  offsetGet ( $offset )
	   {
		$code	=  $this -> offsetGetSafe ( $offset ) ;

		if  ( $code  === false )
			$code	=  $this -> CodePointToUtf8 ( $offset ) ;

		return ( $code ) ;
	    }
    }


/*==============================================================================================================

    PdfTexterEncodingMap -
        A class for fonts having a character map specified with the /Encoding parameter.

  ==============================================================================================================*/
class  PdfTexterEncodingMap 	extends  PdfTexterCharacterMap
   {
	// Possible encodings (there is a 5th one, MacExpertEncoding, but used for "expert fonts" ; no need to deal
	// with it here since we only want to extract text)
	// Note that the values of these constants are direct indices to the second dimension of the $Encodings table
	const 	PDF_STANDARD_ENCODING 		=  0 ;
	const 	PDF_MAC_ROMAN_ENCODING 		=  1 ;
	const 	PDF_WIN_ANSI_ENCODING 		=  2 ;
	const 	PDF_DOC_ENCODING 		=  3 ;

	// Correspondance between an encoding name and its corresponding character in the
	// following format : Standard, Mac, Windows, Pdf
	private static 		$GlobalEncodings 	=  false ;
	public			$Encodings ;
	// Encoding type (one of the PDF_*_ENCODING constants)
	public 			$Encoding ;
	// Indicates whether this character map is a secondary one used for Unicode maps ; this must be set at
	// a higher level by the PdfTexterFont because at the time a character map is instantiated, we do not know
	// yet whether it will be a primary (normal) map, or a map secondary to an existing Unicode map
	public			$Secondary ;
	// Differences array (a character substitution table to the standard encodings)
	public 			$Map 			=  array ( ) ;
	// A secondary map for the Differences array, which only contains the differences ; this is used
	// for Unicode fonts that also have an associated /Differences parameter, which should not include the
	// whole standard Adobe character map but only the differences of encodings
	public			$SecondaryMap		=  array ( ) ;
	// Differences by position number
	public			$DifferencesByPosition	=  array ( ) ;


   	// Constructor -
	//	Analyzes the text contents of a CMAP and extracts mappings from the beginbfchar/endbfchar and
	//	beginbfrange/endbfrange constructs.
	public function  __construct ( $object_id, $definitions, $extra_mappings )
	   {
		// Ignore character variants whose names end with these suffixes
		static	$IgnoredVariants	=  array
		   (
			'/\.scalt$/',
			'/\.sc$/',
			'/\.fitted$/',
			'/\.oldstyle$/',
			'/\.taboldstyle$/',
			'/\.alt$/',
			'/alt$/',
		    ) ;

		parent::__construct ( $object_id ) ;

		// Load the default Adobe character sets, if not already done
		if  ( self::$GlobalEncodings  ===  false )
		   {
			$charset_file		=  dirname ( __FILE__ ) .  '/Maps/adobe-charsets.map' ;
			include ( $charset_file ) ;
			self::$GlobalEncodings	=  ( isset ( $adobe_charsets ) ) ?  $adobe_charsets : array ( ) ;
		    }

		$this -> Encodings		=  array_merge ( self::$GlobalEncodings, $extra_mappings ) ;

		// Fonts using default Adobe character sets and hexadecimal representations are one-byte long
		$this -> HexCharWidth	=  2 ;

		if  ( PdfToText::$DEBUG )
		   {
	   		echo "\n----------------------------------- ENCODING CMAP #$object_id\n" ;
			echo $definitions;
		    }

		// Retrieve text encoding
		preg_match ( '# / (?P<encoding> (WinAnsiEncoding) | (PDFDocEncoding) | (MacRomanEncoding) | (StandardEncoding) ) #ix',
				$definitions, $encoding_match ) ;

		if ( ! isset ( $encoding_match [ 'encoding' ] ) )
			$encoding_match [ 'encoding' ]	=  'WinAnsiEncoding' ;

		switch ( strtolower ( $encoding_match [ 'encoding' ] ) )
		   {
		   	case 	'pdfdocencoding' 	:  $this -> Encoding	=  self::PDF_DOC_ENCODING 	; break ;
		   	case 	'macromanencoding' 	:  $this -> Encoding 	=  self::PDF_MAC_ROMAN_ENCODING ; break ;
		   	case 	'standardencoding' 	:  $this -> Encoding 	=  self::PDF_STANDARD_ENCODING 	; break ;
		   	case 	'winansiencoding' 	:
		   	default 		 	:  $this -> Encoding 	=  self::PDF_WIN_ANSI_ENCODING	;
		    }

		// Build a virgin character map using the detected encoding
		foreach  ( $this -> Encodings  as  $code_array )
		   {
			$char 			=  $code_array [ $this -> Encoding ] ;
			$this -> Map [ $char ] 	=  $char ;
		    }

		// Extract the Differences array
	   	preg_match ( '/ \[ \s* (?P<contents> [^\]]*?)  \s* \] /x', $definitions, $match ) ;

		if (  ! isset ( $match [ 'contents' ] ) )
			return ;

		$data 		=  trim ( preg_replace ( '/\s+(\d+)/', '/$1', $match [ 'contents' ] ) ) ;
		$items 		=  explode ( '/', $data ) ;
		$index		=  0 ;

		for  ( $i = 0, $item_count = count ( $items ) ; $i  <  $item_count ; $i ++ )
		   {
		   	$item 		=  PdfToText::DecodeRawName ( trim ( $items [$i] ) ) ;

		   	// Integer value  : index of next character in map
			if  ( is_numeric ( $item ) )
				$index 	=  ( integer ) $item ;
			// String value : a character name, as defined by Adobe
			else
			   {
				// Remove variant part of the character name
				$item	=  preg_replace  ( $IgnoredVariants, '', trim ( $item ) ) ;

			   	// Keyword (character name) exists in the encoding table
				if  ( isset ( $this -> Encodings [ $item ] ) )
				   {
					$this -> Map [ $index ] 		=
					$this -> SecondaryMap [ $index ]	=  $this -> Encodings [ $item ] [ $this -> Encoding ] ;
				    }
				// Not defined ; check if this is the "/gxx" notation, where "xx" is a number
				else if  ( preg_match ( '/g (?P<value> \d+)/x', $item, $match ) )
				   {
					$value		=  ( integer ) $match [ 'value' ] ;

					// In my current state of investigations, the /g notation has the following characteristics :
					// - The value 29 must be added to the number after the "/g" string (why ???)
					// - The value after the "/g" string can be greater than 255, meaning that it could be Unicode codepoint
					// This has to be carefully watched before revision
					$value	+=  29 ;

					$this -> Map [ $index ]			=
					$this -> SecondaryMap [ $index ]	=  $value ;
				    }
				// Some characters can be specified by the "/uni" prefix followed by a sequence of hex digits,
				// which is not described by the PDF specifications. This sequence gives a Unicode code point.
				else if  ( preg_match ( '/uni (?P<value>  [0-9a-f]+)/ix', $item, $match ) )
				   {
					$value		=  hexdec ( $match [ 'value' ] ) ;

					$this -> Map [ $index ]			=
					$this -> SecondaryMap [ $index ]	=  ( integer ) $value ;
				    }
				// Otherwise, put a quotation mark instead
				else
				   {
					if  ( PdfToText::$DEBUG )
						 warning ( "Unknown character name found in a /Differences[] array : [$item]" ) ;

					$this -> Map [ $index ] 		=
					$this -> SecondaryMap [ $index ]	=  ord ( '?' ) ;
				    }

				$this -> DifferencesByPosition [ $index ]	=  $item ;

				$index ++ ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	        Interface implementations.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  count ( )
	   { return ( count ( $this -> Map ) ) ; }


	public function  offsetExists ( $offset )
	   {
		return ( ( ! $this -> Secondary ) ?
				isset ( $this -> Map [ $offset ] ) :
				isset ( $this -> SecondaryMap [ $offset ] ) ) ;
	    }


	public function  offsetGet ( $offset )
	   {
		if  ( ! $this -> Secondary )
		   {
			if  ( isset ( $this -> Map [ $offset ] ) )
				$ord		=  $this -> Map [ $offset ] ;
			else
				$ord		=  $offset ;

			// Check for final character translations (concerns only a few number of characters)
			if  ( $this -> Encoding  ==  self::PDF_WIN_ANSI_ENCODING  &&  isset ( PdfTexterAdobeWinAnsiMap::$WinAnsiCharacterMap [0] [ $ord ] ) )
				$ord	=  PdfTexterAdobeWinAnsiMap::$WinAnsiCharacterMap [0] [ $ord ] ;
			else if  ( $this -> Encoding  ==  self::PDF_MAC_ROMAN_ENCODING  &&  isset ( PdfTexterAdobeMacRomanMap::$MacRomanCharacterMap [0] [ $ord ] ) )
				$ord	=  PdfTexterAdobeMacRomanMap::$MacRomanCharacterMap [0] [ $ord ] ;
			// As far as I have been able to see, the values expressed by the /Differences tag were the only ones used within the
			// Pdf document ; however, handle the case where some characters do not belong to the characters listed by /Differences,
			// and use the official Adobe encoding maps when necessary
			else if  ( isset ( $this -> Encodings [ $ord ] [ $this -> Encoding ] ) )
				$ord	=  $this -> Encodings [ $ord ] [ $this -> Encoding ] ;

			$result		=  $this -> CodePointToUtf8 ( $ord ) ;
		    }
		else if  ( isset ( $this -> SecondaryMap [ $offset ] ) )
		   {
			$ord		=   $this -> SecondaryMap [ $offset ] ;
			$result		=   $this -> CodePointToUtf8 ( $ord ) ;
		    }
		else
			$result		=  false ;

		return ( $result ) ;
	    }
    }


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                                     CHARACTER MAP MANAGEMENT                                     ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/

/*==============================================================================================================

    class PdfTexterAdobeMap -
        Abstract class to handle Adobe-specific fonts.

  ==============================================================================================================*/
abstract class  PdfTexterAdobeMap	extends  PdfTexterCharacterMap
   {
	// Font variant ; one of the PdfTexterFont::FONT_VARIANT_* constants
	public		$Variant ;
	// To be declared by derived classes :
	public		$Map ;


	public function  __construct ( $object_id, $font_variant, $map )
	   {
		parent::__construct ( $object_id ) ;

		$this -> HexCharWidth	=  2 ;
		$this -> Variant	=  $font_variant ;
		$this -> Map		=  $map ;

		if  ( ! isset ( $map [ $font_variant ] ) )
			error ( new  PdfToTextDecodingException ( "Undefined font variant #$font_variant." ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	        Interface implementations.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  count ( )
	   { return ( count ( $this -> $Map [ $this -> Variant ] ) ) ; }


	public function  offsetExists ( $offset )
	   { return ( isset ( $this -> Map [ $this-> Variant ] [ $offset ] ) ) ; }


	public function  offsetGet ( $offset )
	   {
		if  ( isset ( $this -> Map [ $this-> Variant ] [ $offset ] ) )
			$ord		=  $this -> Map [ $this -> Variant ] [ $offset ] ;
		else
			$ord		=  $offset ;

		return ( $this -> CodePointToUtf8 ( $ord ) ) ;
	    }
    }


/*==============================================================================================================

    class PdfTexterAdobeWinAnsiMap -
        Abstract class to handle Adobe-specific Win Ansi fonts.

  ==============================================================================================================*/
class	PdfTexterAdobeWinAnsiMap		extends		PdfTexterAdobeMap
   {
	// Windows Ansi mapping to Unicode. Only substitutions that have no direct equivalent are listed here
	// Source : https://msdn.microsoft.com/en-us/goglobal/cc305145.aspx
	// Only characters from 0x80 to 0x9F have no direct translation
	public static	$WinAnsiCharacterMap	=  array
	   (
		// Normal WinAnsi mapping
		0	=>  array
		   (
			0x80	=>  0x20AC,
			0x82	=>  0x201A,
			0x83	=>  0x0192,
			0x84	=>  0x201E,
			0x85	=>  0x2026,
			0x86	=>  0x2020,
			0x87	=>  0x2021,
			0x88	=>  0x02C6,
			0x89	=>  0x2030,
			0x8A	=>  0x0160,
			0x8B	=>  0x2039,
			0x8C	=>  0x0152,
			0x8E	=>  0x017D,
			0x91	=>  0x2018,
			0x92	=>  0x2019,
			0x93	=>  0x201C,
			0x94	=>  0x201D,
			0x95	=>  0x2022,
			0x96	=>  0x2013,
			0x97	=>  0x2014,
			0x98	=>  0x02DC,
			0x99	=>  0x2122,
			0x9A	=>  0x0161,
			0x9B	=>  0x203A,
			0x9C	=>  0x0153,
			0x9E	=>  0x017E,
			0x9F	=>  0x0178
		     ),
		// Cyrillic (IS08859-5)
		1	=>  array
		   (
			0x93	=> 0x0022,	// Quotes
			0x94	=> 0x0022,
			0xC0	=> 0x0410,
			0xC1	=> 0x0411,
			0xC2	=> 0x0412,
			0xC3	=> 0x0413,
			0xC4	=> 0x0414,
			0xC5	=> 0x0415,
			0xC6	=> 0x0416,
			0xC7	=> 0x0417,
			0xC8	=> 0x0418,
			0xC9	=> 0x0419,
			0xCA	=> 0x041A,
			0xCB	=> 0x041B,
			0xCC	=> 0x041C,
			0xCD	=> 0x041D,
			0xCE	=> 0x041E,
			0xCF	=> 0x041F,
			0xD0	=> 0x0420,
			0xD1	=> 0x0421,
			0xD2	=> 0x0422,
			0xD3	=> 0x0423,
			0xD4	=> 0x0424,
			0xD5	=> 0x0425,
			0xD6	=> 0x0426,
			0xD7	=> 0x0427,
			0xD8	=> 0x0428,
			0xD9	=> 0x0429,
			0xDA	=> 0x042A,
			0xDB	=> 0x042B,
			0xDC	=> 0x042C,
			0xDD	=> 0x042D,
			0xDE	=> 0x042E,
			0xDF	=> 0x042F,
			0xE0	=> 0x0430,
			0xE1	=> 0x0431,
			0xE2	=> 0x0432,
			0xE3	=> 0x0433,
			0xE4	=> 0x0434,
			0xE5	=> 0x0435,
			0xE6	=> 0x0436,
			0xE7	=> 0x0437,
			0xE8	=> 0x0438,
			0xE9	=> 0x0439,
			0xEA	=> 0x043A,
			0xEB	=> 0x043B,
			0xEC	=> 0x043C,
			0xED	=> 0x043D,
			0xEE	=> 0x043E,
			0xEF	=> 0x043F,
			0xF0	=> 0x0440,
			0xF1	=> 0x0441,
			0xF2	=> 0x0442,
			0xF3	=> 0x0443,
			0xF4	=> 0x0444,
			0xF5	=> 0x0445,
			0xF6	=> 0x0446,
			0xF7	=> 0x0447,
			0xF8	=> 0x0448,
			0xF9	=> 0x0449,
			0xFA	=> 0x044A,
			0xFB	=> 0x044B,
			0xFC	=> 0x044C,
			0xFD	=> 0x044D,
			0xFE	=> 0x044E,
			0xFF	=> 0x044F
		    )
	    ) ;

	public function  __construct ( $object_id, $font_variant )
	   {
		parent::__construct ( $object_id, $font_variant, self::$WinAnsiCharacterMap ) ;
	    }
    }


/*==============================================================================================================

    class PdfTexterAdobeMacRomanMap -
        Abstract class to handle Adobe-specific Mac Roman fonts.

  ==============================================================================================================*/
class	PdfTexterAdobeMacRomanMap		extends		PdfTexterAdobeMap
   {
	// Mac roman to Unicode encoding
	// Source : ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT
	public static	$MacRomanCharacterMap	=  array
	   (
		0	=>  array
		   (
			0x80	=>  0x00C4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
			0x81	=>  0x00C5,	# LATIN CAPITAL LETTER A WITH RING ABOVE
			0x82	=>  0x00C7,	# LATIN CAPITAL LETTER C WITH CEDILLA
			0x83	=>  0x00C9,	# LATIN CAPITAL LETTER E WITH ACUTE
			0x84	=>  0x00D1,	# LATIN CAPITAL LETTER N WITH TILDE
			0x85	=>  0x00D6,	# LATIN CAPITAL LETTER O WITH DIAERESIS
			0x86	=>  0x00DC,	# LATIN CAPITAL LETTER U WITH DIAERESIS
			0x87	=>  0x00E1,	# LATIN SMALL LETTER A WITH ACUTE
			0x88	=>  0x00E0,	# LATIN SMALL LETTER A WITH GRAVE
			0x89	=>  0x00E2,	# LATIN SMALL LETTER A WITH CIRCUMFLEX
			0x8A	=>  0x00E4,	# LATIN SMALL LETTER A WITH DIAERESIS
			0x8B	=>  0x00E3,	# LATIN SMALL LETTER A WITH TILDE
			0x8C	=>  0x00E5,	# LATIN SMALL LETTER A WITH RING ABOVE
			0x8D	=>  0x00E7,	# LATIN SMALL LETTER C WITH CEDILLA
			0x8E	=>  0x00E9,	# LATIN SMALL LETTER E WITH ACUTE
			0x8F	=>  0x00E8,	# LATIN SMALL LETTER E WITH GRAVE
			0x90	=>  0x00EA,	# LATIN SMALL LETTER E WITH CIRCUMFLEX
			0x91	=>  0x00EB,	# LATIN SMALL LETTER E WITH DIAERESIS
			0x92	=>  0x00ED,	# LATIN SMALL LETTER I WITH ACUTE
			0x93	=>  0x00EC,	# LATIN SMALL LETTER I WITH GRAVE
			0x94	=>  0x00EE,	# LATIN SMALL LETTER I WITH CIRCUMFLEX
			0x95	=>  0x00EF,	# LATIN SMALL LETTER I WITH DIAERESIS
			0x96	=>  0x00F1,	# LATIN SMALL LETTER N WITH TILDE
			0x97	=>  0x00F3,	# LATIN SMALL LETTER O WITH ACUTE
			0x98	=>  0x00F2,	# LATIN SMALL LETTER O WITH GRAVE
			0x99	=>  0x00F4,	# LATIN SMALL LETTER O WITH CIRCUMFLEX
			0x9A	=>  0x00F6,	# LATIN SMALL LETTER O WITH DIAERESIS
			0x9B	=>  0x00F5,	# LATIN SMALL LETTER O WITH TILDE
			0x9C	=>  0x00FA,	# LATIN SMALL LETTER U WITH ACUTE
			0x9D	=>  0x00F9,	# LATIN SMALL LETTER U WITH GRAVE
			0x9E	=>  0x00FB,	# LATIN SMALL LETTER U WITH CIRCUMFLEX
			0x9F	=>  0x00FC,	# LATIN SMALL LETTER U WITH DIAERESIS
			0xA0	=>  0x2020,	# DAGGER
			0xA1	=>  0x00B0,	# DEGREE SIGN
			0xA2	=>  0x00A2,	# CENT SIGN
			0xA3	=>  0x00A3,	# POUND SIGN
			0xA4	=>  0x00A7,	# SECTION SIGN
			0xA5	=>  0x2022,	# BULLET
			0xA6	=>  0x00B6,	# PILCROW SIGN
			0xA7	=>  0x00DF,	# LATIN SMALL LETTER SHARP S
			0xA8	=>  0x00AE,	# REGISTERED SIGN
			0xA9	=>  0x00A9,	# COPYRIGHT SIGN
			0xAA	=>  0x2122,	# TRADE MARK SIGN
			0xAB	=>  0x00B4,	# ACUTE ACCENT
			0xAC	=>  0x00A8,	# DIAERESIS
			0xAD	=>  0x2260,	# NOT EQUAL TO
			0xAE	=>  0x00C6,	# LATIN CAPITAL LETTER AE
			0xAF	=>  0x00D8,	# LATIN CAPITAL LETTER O WITH STROKE
			0xB0	=>  0x221E,	# INFINITY
			0xB1	=>  0x00B1,	# PLUS-MINUS SIGN
			0xB2	=>  0x2264,	# LESS-THAN OR EQUAL TO
			0xB3	=>  0x2265,	# GREATER-THAN OR EQUAL TO
			0xB4	=>  0x00A5,	# YEN SIGN
			0xB5	=>  0x00B5,	# MICRO SIGN
			0xB6	=>  0x2202,	# PARTIAL DIFFERENTIAL
			0xB7	=>  0x2211,	# N-ARY SUMMATION
			0xB8	=>  0x220F,	# N-ARY PRODUCT
			0xB9	=>  0x03C0,	# GREEK SMALL LETTER PI
			0xBA	=>  0x222B,	# INTEGRAL
			0xBB	=>  0x00AA,	# FEMININE ORDINAL INDICATOR
			0xBC	=>  0x00BA,	# MASCULINE ORDINAL INDICATOR
			0xBD	=>  0x03A9,	# GREEK CAPITAL LETTER OMEGA
			0xBE	=>  0x00E6,	# LATIN SMALL LETTER AE
			0xBF	=>  0x00F8,	# LATIN SMALL LETTER O WITH STROKE
			0xC0	=>  0x00BF,	# INVERTED QUESTION MARK
			0xC1	=>  0x00A1,	# INVERTED EXCLAMATION MARK
			0xC2	=>  0x00AC,	# NOT SIGN
			0xC3	=>  0x221A,	# SQUARE ROOT
			0xC4	=>  0x0192,	# LATIN SMALL LETTER F WITH HOOK
			0xC5	=>  0x2248,	# ALMOST EQUAL TO
			0xC6	=>  0x2206,	# INCREMENT
			0xC7	=>  0x00AB,	# LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
			0xC8	=>  0x00BB,	# RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
			0xC9	=>  0x2026,	# HORIZONTAL ELLIPSIS
			0xCA	=>  0x00A0,	# NO-BREAK SPACE
			0xCB	=>  0x00C0,	# LATIN CAPITAL LETTER A WITH GRAVE
			0xCC	=>  0x00C3,	# LATIN CAPITAL LETTER A WITH TILDE
			0xCD	=>  0x00D5,	# LATIN CAPITAL LETTER O WITH TILDE
			0xCE	=>  0x0152,	# LATIN CAPITAL LIGATURE OE
			0xCF	=>  0x0153,	# LATIN SMALL LIGATURE OE
			0xD0	=>  0x2013,	# EN DASH
			0xD1	=>  0x2014,	# EM DASH
			0xD2	=>  0x201C,	# LEFT DOUBLE QUOTATION MARK
			0xD3	=>  0x201D,	# RIGHT DOUBLE QUOTATION MARK
			0xD4	=>  0x2018,	# LEFT SINGLE QUOTATION MARK
			0xD5	=>  0x2019,	# RIGHT SINGLE QUOTATION MARK
			0xD6	=>  0x00F7,	# DIVISION SIGN
			0xD7	=>  0x25CA,	# LOZENGE
			0xD8	=>  0x00FF,	# LATIN SMALL LETTER Y WITH DIAERESIS
			0xD9	=>  0x0178,	# LATIN CAPITAL LETTER Y WITH DIAERESIS
			0xDA	=>  0x2044,	# FRACTION SLASH
			0xDB	=>  0x20AC,	# EURO SIGN
			0xDC	=>  0x2039,	# SINGLE LEFT-POINTING ANGLE QUOTATION MARK
			0xDD	=>  0x203A,	# SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
			0xDE	=>  0xFB01,	# LATIN SMALL LIGATURE FI
			0xDF	=>  0xFB02,	# LATIN SMALL LIGATURE FL
			0xE0	=>  0x2021,	# DOUBLE DAGGER
			0xE1	=>  0x00B7,	# MIDDLE DOT
			0xE2	=>  0x201A,	# SINGLE LOW-9 QUOTATION MARK
			0xE3	=>  0x201E,	# DOUBLE LOW-9 QUOTATION MARK
			0xE4	=>  0x2030,	# PER MILLE SIGN
			0xE5	=>  0x00C2,	# LATIN CAPITAL LETTER A WITH CIRCUMFLEX
			0xE6	=>  0x00CA,	# LATIN CAPITAL LETTER E WITH CIRCUMFLEX
			0xE7	=>  0x00C1,	# LATIN CAPITAL LETTER A WITH ACUTE
			0xE8	=>  0x00CB,	# LATIN CAPITAL LETTER E WITH DIAERESIS
			0xE9	=>  0x00C8,	# LATIN CAPITAL LETTER E WITH GRAVE
			0xEA	=>  0x00CD,	# LATIN CAPITAL LETTER I WITH ACUTE
			0xEB	=>  0x00CE,	# LATIN CAPITAL LETTER I WITH CIRCUMFLEX
			0xEC	=>  0x00CF,	# LATIN CAPITAL LETTER I WITH DIAERESIS
			0xED	=>  0x00CC,	# LATIN CAPITAL LETTER I WITH GRAVE
			0xEE	=>  0x00D3,	# LATIN CAPITAL LETTER O WITH ACUTE
			0xEF	=>  0x00D4,	# LATIN CAPITAL LETTER O WITH CIRCUMFLEX
			0xF0	=>  0xF8FF,	# Apple logo
			0xF1	=>  0x00D2,	# LATIN CAPITAL LETTER O WITH GRAVE
			0xF2	=>  0x00DA,	# LATIN CAPITAL LETTER U WITH ACUTE
			0xF3	=>  0x00DB,	# LATIN CAPITAL LETTER U WITH CIRCUMFLEX
			0xF4	=>  0x00D9,	# LATIN CAPITAL LETTER U WITH GRAVE
			0xF5	=>  0x0131,	# LATIN SMALL LETTER DOTLESS I
			0xF6	=>  0x02C6,	# MODIFIER LETTER CIRCUMFLEX ACCENT
			0xF7	=>  0x02DC,	# SMALL TILDE
			0xF8	=>  0x00AF,	# MACRON
			0xF9	=>  0x02D8,	# BREVE
			0xFA	=>  0x02D9,	# DOT ABOVE
			0xFB	=>  0x02DA,	# RING ABOVE
			0xFC	=>  0x00B8,	# CEDILLA
			0xFD	=>  0x02DD,	# DOUBLE ACUTE ACCENT
			0xFE	=>  0x02DB,	# OGONEK
			0xFF	=>  0x02C7	# CARON
		    )
	    ) ;


	public function  __construct ( $object_id, $font_variant )
	   {
		parent::__construct ( $object_id, $font_variant, self::$MacRomanCharacterMap ) ;
	    }
    }


/*==============================================================================================================

    class PdfTexterAdobeUndocumentedUnicodeMap -
        Sometimes, Unicode maps translate character ids to something in the range 0xF000..0xF0FF (or maybe more).
	These mapped characters do not correspond to anything else in Unicode, but rather to a special character
	set.
	This class is not meant to be instantiated by anything here, but rather used for its $Map property.
	Note that the $Map array is not complete.

  ==============================================================================================================*/
class	PdfTexterAdobeUndocumentedUnicodeMap		extends		PdfTexterAdobeMap
   {
	public static	$UnicodeMap		=  array
	   (
		0xF0F0 	 =>  0x30,	// '0' through '9'
		0xF0EF 	 =>  0x31,
		0xF0EE 	 =>  0x32,
		0xF0ED 	 =>  0x33,
		0xF0EC 	 =>  0x34,
		0xF0EB 	 =>  0x35,
		0xF0EA 	 =>  0x36,
		0xF0E9 	 =>  0x37,
		0xF0E8 	 =>  0x38,
		0xF0E7 	 =>  0x39,
		0xF0DF 	 =>  0x41,	// 'A' through 'Z'
		0xF0DE 	 =>  0x42,
		0xF0DD 	 =>  0x43,
		0xF0DC 	 =>  0x44,
		0xF0DB 	 =>  0x45,
		0xF0DA 	 =>  0x46,
		0xF0D9 	 =>  0x47,
		0xF0D8 	 =>  0x48,
		0xF0D7 	 =>  0x49,
		0xF0D6 	 =>  0x4A,
		0xF0D5 	 =>  0x4B,
		0xF0D4 	 =>  0x4C,
		0xF0D3 	 =>  0x4D,
		0xF0D2 	 =>  0x4E,
		0xF0D1 	 =>  0x4F,
		0xF0D0 	 =>  0x50,
		0xF0CF 	 =>  0x51,
		0xF0CE 	 =>  0x52,
		0xF0CD 	 =>  0x53,
		0xF0CC 	 =>  0x54,
		0xF0CB 	 =>  0x55,
		0xF0CA 	 =>  0x56,
		0xF0C9 	 =>  0x57,
		0xF0C8 	 =>  0x58,
		0xF0C7 	 =>  0x59,
		0xF0C6 	 =>  0x5A,
		0xF0BF 	 =>  0x61,	// 'a' through 'z'
		0xF0BE 	 =>  0x62,
		0xF0BD 	 =>  0x63,
		0xF0BC 	 =>  0x64,
		0xF0BB 	 =>  0x65,
		0xF0BA 	 =>  0x66,
		0xF0B9 	 =>  0x67,
		0xF0B8 	 =>  0x68,
		0xF0B7 	 =>  0x69,
		0xF0B6 	 =>  0x6A,
		0xF0B5 	 =>  0x6B,
		0xF0B4 	 =>  0x6C,
		0xF0B3 	 =>  0x6D,
		0xF0B2 	 =>  0x6E,
		0xF0B1 	 =>  0x6F,
		0xF0B0 	 =>  0x70,
		0xF0AF 	 =>  0x71,
		0xF0AE 	 =>  0x72,
		0xF0AD 	 =>  0x73,
		0xF0AC 	 =>  0x74,
		0xF0AB 	 =>  0x75,
		0xF0AA 	 =>  0x76,
		0xF0A9 	 =>  0x77,
		0xF0A8 	 =>  0x78,
		0xF0A7 	 =>  0x79,
		0xF0A6 	 =>  0x7A,
		0xF0F1 	 =>  0x2F,	// '/'
		0xF0E6 	 =>  0x3A,	// ':'
		0xF0F3 	 =>  0x2D,	// '-'
		0xF0F8 	 =>  0x28,	// '('
		0xF0F7 	 =>  0x29,	// ')'
		0xF0F2 	 =>  0x2E,	// '.'
		0xF020 	 =>  0x20,	// Space
		0xF0F9 	 =>  0x27,	// "'"
		0xF037 	 =>  0xE9,	// &eacute;
		0xF038 	 =>  0xE8,	// &egrave;
	    ) ;


	public function  __construct ( $object_id, $font_variant )
	   {
		parent::__construct ( $object_id, $font_variant, self::$UnicodeMap ) ;
	    }
    }


/*==============================================================================================================

    PdfTexterCIDMap -
        A class for mapping (or trying to...) CID fonts.

  ==============================================================================================================*/
abstract class	PdfTexterCIDMap		extends  PdfTexterCharacterMap
   {
	// CID maps are associative arrays whose keys are the font CID (currently expressed as a numeric value) and
	// whose values are the corresponding UTF8 representation. The following special values can also be used to
	// initialize certain entries :
	// UNKNOWN_CID :
	//	Indicates that the corresponding CID has no known UTF8 counterpart. When the PdfToText::$DEBUG variable
	//	is true, every character in this case will be replaced with the string : "[UID: abcd]", where "abcd" is
	//	the hex representation of the CID. This way, new CID tables can be built using this information.
	const		UNKNOWN_CID		=  -1 ;
	// ALT_CID :
	//	Sorry, this will remain undocumented so far and will be highligh subject to change, since it is dating
	//	from my first interpretation of CID fonts, which is probably wrong.
	const		ALT_CID			=  -2 ;


	// CID font map file ; the file is a PHP script that must contain an array of the form :
	//	$map	=  array
	//	   (
	//		'plain'		=>  array
	//		   (
	//			$cid1	=>  $utf1,
	//			...
	//		    )
	//	    ) ;
	protected	$MapFile ;
	// Map, loaded into memry
	protected	$Map ;
	// Map cache - the interest is to avoid unnecessary includes
	private static	$CachedMaps		=  array ( ) ;

	// Related to the first experimentatl implementation of CID fonts
	private		$LastAltOffset		=  false ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Loads the specified map.
		If the map files contains a definition such as :

			$map	=  'IDENTITY-H-GQJGLM.cid' ;

		then the specified map will be loaded instead (ony one ndirection is supported).

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $object_id, $map_name, $font_variant )
	   {
		// Initialize parent objects
		parent::__construct ( $object_id ) ;
		$this -> HexCharWidth	=  4 ;			// So far, CIDs are 2-bytes long

		// Since alternate characters can be apparently prefixed by 0x0000 or 0x0001, two calls to the array access operator
		// will be needed to retrieve the exact character in such cases
		// This is why we have to tell the upper layers not to cache the results
		$this -> Cache		=  false ;

		$map_index	=  "$map_name:$font_variant" ;

		// If this font has already been loaded somewhere, then reuse its information
		if  ( isset ( self::$CachedMaps [ $map_index] ) )
		   {
			$map	=  self::$CachedMaps [ $map_index ] [ 'map' ] ;
			$file	=  self::$CachedMaps [ $map_index ] [ 'file' ] ;
		    }
		// Otherwise,
		else
		   {
			$file		=  $this -> __get_cid_file ( $map_name, $font_variant ) ;

			// No CID map found : CID numbers will be mapped as is
			if  ( ! file_exists ( $file ) )
			   {
				if  ( PdfToText::$DEBUG )
					warning ( new PdfToTextDecodingException ( "Could not find CID table \"$map_name\" in directory \"" . PdfToText::$CIDTablesDirectory . "\"." ) ) ;
			    }
			// Otherwise, load the CID map
			else
			   {
				include ( $file ) ;

				if  ( isset ( $map ) )
				   {
					// We authorize one CID map to contain the name of another CID map file, instead of the map itself
					if  ( is_string ( $map ) )
					   {
						$file	=  PdfToText::$CIDTablesDirectory . "/$map" ;
						include ( $file ) ;
					    }

					if  ( isset ( $map ) )
						self::$CachedMaps [ $map_index ]	=  array ( 'file' => $file, 'map' => $map ) ;
				    }
				else if  ( PdfToText::$DEBUG )
					warning ( new PdfToTextDecodingException ( "CID \"$file\" does not contain any definition." ) ) ;
			    }
		    }

		// Save map info for this CID font
		$this -> MapFile	=  $file ;
		$this -> Map		=  ( isset ( $map ) ) ?  $map :  array ( ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    __get_cid_file -
		Searches in the CIDTables directory for the CID map that best matches the specified map name (usually,
		IDENTITY-H) and the optional font variant.

		If a font variant has been specified, like "ABCD+Italic-Arial", then the CID tables directory will be
		searched for the following files, in the following order :
		- IDENTITY-H-ABCD+Italic-Arial.cid
		- IDENTITY-H-ABCD+Italic.cid
		- IDENTITY-H-ABCD.cid
		- If none found, then IDENTITY-H-empty.cid will be used and a warning will be issued in debug mode.

	 *-------------------------------------------------------------------------------------------------------------*/
	private function  __get_cid_file ( $map_name, $font_variant )
	   {
		$files		=  array ( ) ;

		// Search for font variants, if any
		if  ( $font_variant )
		   {
			if  ( preg_match ( '/^ (?P<name> [a-z_][a-z_0-9]*) (?P<rest> [\-+] .*) $/imsx' , $font_variant, $match ) )
			   {
				$basename	=  '-' . $match [ 'name' ] ;

				if  ( preg_match_all ( '/ (?P<sep> [\-+]) (?P<name> [^\-+]+) /ix', $match [ 'rest' ], $other_matches ) )
				   {
					for  ( $i = count ( $other_matches [ 'name' ] ) - 1 ; $i  >=  0 ; $i -- )
					   {
						$new_file	=  $basename ;

						for  ( $j = 0 ; $j  <  $i ; $j ++ )
							$new_file	.=  $other_matches [ 'sep' ] [$i] . $other_matches [ 'name' ] [$i] ;

						$files []		=  array ( PdfToText::$CIDTablesDirectory . "/$map_name$new_file.cid", 'standard' ) ;
					    }
				    }
			    }

			// Last one will be the empty CID font
			$files []	=  array ( PdfToText::$CIDTablesDirectory . "/IDENTITY-H-empty.cid", 'empty' ) ;
		    }

		// Add the specified map file
		$files []	=  array ( PdfToText::$CIDTablesDirectory . "/$map_name.cid", 'default' ) ;

		// The first existing file in the list should be the appropriate one
		foreach  ( $files  as  $file )
		   {
			if  ( file_exists ( $file [0] ) )
			   {
				if  ( PdfToText::$DEBUG )
				   {
					if  ( $file [1]  ===  'empty' )
						warning ( new PdfToTextDecodingException ( "Using empty IDENTITY-H definition for map \"$map_name\", variant \"$font_variant\"." ) ) ;
					else if  ( $file [1]  ===  'default' )
						warning ( new PdfToTextDecodingException ( "Using default IDENTITY-H definition for map \"$map_name\"." ) ) ;
				    }

				return ( $file [0] ) ;
			    }
		    }

		// No CID font found
		return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	        Interface implementations.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  count ( )
	   { return ( count ( $this -> Map ) ) ; }


	public function  offsetExists ( $offset )
	   { return ( isset ( $this -> Map [ 'plain' ] [ $offset ] ) ) ; }


	public function  offsetGet ( $offset )
	   {
		if  ( isset ( $this -> Map [ 'plain' ] [ $offset ] ) )
		   {
			$ch	=  $this -> Map [ 'plain' ] [ $offset ] ;

			switch  ( $ch )
			   {
				case	self::UNKNOWN_CID :
					if  ( PdfToText::$DEBUG )
						echo ( '[UID:' . sprintf ( '%04x', $offset ) . "]" ) ;

					$this -> LastAltOffset	=  false ;

					if  ( ! PdfToText::$DEBUG )
						return ( '' ) ;
					else
						return ( '[UID:' . sprintf ( '%04x', $offset ) . "]" ) ;

				case	self::ALT_CID :
					$this -> LastAltOffset		=  ( integer ) $offset ;

					return ( '' ) ;

				default :
					if  ( $this -> LastAltOffset  ===  false )
						return ( $ch ) ;

					if  ( isset ( $this -> Map [ 'alt' ] [ $this -> LastAltOffset ] [ $offset ] ) )
					   {
						$ch2	=  $this -> Map [ 'alt' ] [ $this -> LastAltOffset ] [ $offset ] ;

						if  ( $ch2  ==  self::UNKNOWN_CID )
						   {
							if  ( PdfToText::$DEBUG )
							   {
								echo ( "[CID{$this -> LastAltOffset}:" . sprintf ( '%04x', $offset ) . "]" ) ;

								$ch2  =  "[CID{$this -> LastAltOffset}: $offset]" ;
							    }
						    }
					    }
					else
						$ch2	=  '' ;

					$this -> LastAltOffset	=  false ;

					return ( $ch2 ) ;
			    }
		    }
		else
		   {
			$this -> LastAltOffset	=  false ;

			return ( '' ) ;
		    }
	    }
    }


/*==============================================================================================================

    PdfTexterIdentityHCIDMap -
        A class for mapping IDENTITY-H CID fonts (or trying to...).

  ==============================================================================================================*/
class  PdfTexterIdentityHCIDMap		extends  PdfTexterCIDMap
   {
	public function  __construct ( $object_id, $font_variant )
	   {
		parent::__construct ( $object_id, 'IDENTITY-H', $font_variant ) ;
	    }
    }


/*==============================================================================================================

    PdfTexterPageMap -
        A class for detecting page objects mappings and retrieving page number for a specified object.
	There is a quadruple level of indirection here :

	- The first level contains a /Type /Catalog parameter, with a /Pages one that references an object which
	  contains a /Count and /Kids. I don't know yet if the /Pages parameter can reference more than one
	  object using the array notation. However, the class is designed to handle such situations.
	- The object containing the /Kids parameter references objects who, in turn, lists the objects contained
	  into one single page.
	- Each object referenced in /Kids has a /Type/Page parameter, together with /Contents, which lists the
	  objects of the current page.

	Object references are of the form : "x y R", where "x" is the object number.

	Of course, anything can be in any order, otherwise it would not be funny ! Consider the following
	example :

		(1) 5 0 obj
			<< ... /Pages 1 0 R ... >>
		    endobj

		(2) 1 0 obj
			<< ... /Count 1 /Kids[6 0 R] ... /Type/Pages ... >>
		    endobj

		(3)  6 0 obj
			<< ... /Type/Page ... /Parent 1 0 R ... /Contents [10 0 R 11 0 R ... x 0 R]
		     endobj

	Object #5 says that object #1 contains the list of page contents (in this example, there is only one page,
	referenced by object #6).
	Object #6 says that the objects #10, #11 through #x are contained into the same page.
	The quadruple indirection comes when you are handling one of the objects referenced in object #6 and you
	need to retrieve their page number...

	Of course, you cannot rely on the fact that all objects appear in logical order.

	And, of course #2, there may be no page catalog at all ! in such cases, objects containing drawing
	instructions will have to be considered as a single page, whose number will be sequential.

	And, of course #3, as this is the case with the official PDF 1.7 Reference from Adobe, there can be a
	reference to a non-existing object which was meant to contain the /Kids parameter (!). In this case,
	taking the ordinal number of objects of type (3) gives the page number minus one.

	One mystery is that the PDF 1.7 Reference file contains 1310 pages but only 1309 are recognized here...

  ==============================================================================================================*/
class  PdfTexterPageMap		extends  PdfObjectBase
   {
	// Page contents are (normally) first described by a catalog
	// Although there should be only one entry for that, this property is defined as an array, as you need to really
	// become paranoid when handling pdf contents...
	protected	$PageCatalogs		=  array ( ) ;
	// Entries that describe which page contains which text objects. Of course, these can be nested otherwise it would not be funny !
	protected	$PageKids		=  array ( ) ;
	// Terminal entries : they directly give the ids of the objects belonging to a page
	public		$PageContents		=  array ( ) ;
	// Note that all the above arrays are indexed by object id and filled with the data collected by calling the Peek() Method...

	// Objects that could be referenced from other text objects as XObjects, using the /TPLx notation
	protected	$TemplateObjects	=  array ( ) ;

	// Once the Peek() method has collected page contents & object information, the MapCatalog() method is called to create this array
	// which contains page numbers as keys, and the list of objects contained in this page as values
	public		$Pages			=  array ( ) ;
	// Holds page attributes
	public		$PageAttributes		=  array ( ) ;

	// Resource mappings can either refer to an object (/Resources 2 0 R) or to inline mappings (/Resources << ... >>)
	// The same object can be referenced by many /Resources parameters throughout the pdf file, so its important to keep
	// the analyzed mappings in a cache, so that later references will reuse the results of the first one
	private		$ResourceMappingCache	=  array ( ) ;
	// List of XObject names - Used by the IsValidTemplate() function
	private		$XObjectNames		=  array ( ) ;


	/*--------------------------------------------------------------------------------------------------------------

	    CONSTRUCTOR
		Creates a PdfTexterPageMap object. Actually, nothing significant is perfomed here, as this class' goal
		is to be used internally by PdfTexter.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( )
	   {
		parent::__construct ( ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        AddTemplateObject - Adds an object that could be referenced as a template/

	    PROTOTYPE
	        $pagemap -> AddTemplateObject ( $object_id, $object_text_data ) ;

	    DESCRIPTION
	        Adds an object that may be referenced as a template from another text object, using the /TPLx notation.

	    PARAMETERS
	        $object_id (integer) -
	                Id of the object that may contain a resource mapping entry.

		$object_data (string) -
			Object contents.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  AddTemplateObject ( $object_id, $object_text_data )
	   {
		$this -> TemplateObjects [ $object_id ]		=  $object_text_data ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetResourceMappings - Gets resource mappings specified after a /Resources parameter.

	    PROTOTYPE
	        $result		=  $this -> GetResourceMappings ( $object_id, $object_data, $parameter, $pdf_object_list ) ;

	    DESCRIPTION
	        Most of the time, objects containing a page description (/Type /Page) also contain a /Resources parameter,
		which may be followed by one of the following constructs :
		- A reference to an object, such as :
			/Resources 2 0 R
		- Or an inline set of parameters, such as font or xobject mappings :
			/Resources << /Font<</F1 10 0 R ...>> /XObject <</Im0 27 0 R ...>>
		This method extracts alias/object mappings for the parameter specified by $parameter (it can be for
		example 'Font' or 'Xobject') and returns these mappings as an associative array.

	    PARAMETERS
	        $object_id (integer) -
	                Id of the object that may contain a resource mapping entry.

		$object_data (string) -
			Object contents.

		$parameter (string) -
			Parameter defining resource mapping, for example /Font or /XObject.

		$pdf_object_list (associative array) -
			Array of object id/object data associations, for all objects defined in the pdf file.

	    RETURN VALUE
	        The list of resource mappings for the specified parameter, as an associative array, whose keys are the
		resource aliases and values are the corresponding object ids.
		The method returns an empty array if the specified object does not contain resource mappings or does
		not contain the specified parameter.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetResourceMappings ( $object_id, $object_data, $parameter, $pdf_object_list )
	   {
		// The /Resources parameter refers to an existing PDF object
		if  ( preg_match ( '#/Resources \s* (?P<object_id> \d+) \s+ \d+ \s+ R#ix', $object_data, $match ) )
		   {
			// Return the cached result if the same object has previously been referenced by a /Resources parameter
			if  ( isset ( $this -> ResourceMappingCache [ $object_id ] [ $parameter ] ) )
				return ( $this -> ResourceMappingCache [ $object_id ] [ $parameter ] ) ;

			// Check that the object that is referred to exists
			if  ( isset ( $pdf_object_list [ $match [ 'object_id' ] ] ) )
				$data	=  $pdf_object_list [ $match [ 'object_id' ] ] ;
			else
				return ( array ( ) ) ;

			$is_object	=  true ;	// to tell that we need to put the results in cache for later use
		    }
		// The /Resources parameter is followed by inline mappings
		else if  ( preg_match ( '#/Resources \s* <#ix', $object_data, $match, PREG_OFFSET_CAPTURE ) )
		   {
			$data		=  substr ( $object_data, $match [0] [1] + strlen ( $match [0] [0] ) - 1 ) ;
			$is_object	=  false ;
		    }
		else
			return ( array ( ) ) ;

		// Whatever we will be analyzing (an object contents or inline contents following the /Resources parameter),
		// the text will be enclosed within double angle brackets (<< ... >>)

		// A small kludge for /XObject which specify an object reference ("15 0 R") instead of XObjects mappings
		// ("<< ...>>" )
		if  ( $parameter   ==  '/XObject'  &&  preg_match ( '#/XObject \s+ (?P<obj> \d+) \s+ \d+ \s+ R#ix', $data, $match ) )
		   {
			$data = '/XObject ' . $pdf_object_list [ $match [ 'obj' ] ] ;
		    }

		if  ( preg_match ( "#$parameter \s* << \s* (?P<mappings> .*?) \s* >>#imsx", $data, $match ) )
		   {
			preg_match_all ( '# (?P<mapping> / [^\s]+) \s+ (?P<object_id> \d+) \s+ \d+ \s+ R#ix', $match [ 'mappings' ], $matches ) ;

			$mappings	=  array ( ) ;

			// Mapping extraction loop
			for  ( $i = 0, $count = count ( $matches [ 'object_id' ] ) ; $i  <  $count ; $i ++ )
				$mappings [ $matches [ 'mapping' ] [$i] ]	=  $matches [ 'object_id' ] [$i] ;

			// Put results for referenced objects in cache
			if  ( $is_object )
				$this -> ResourceMappingCache [ $object_id ] [ $parameter ]	=  $mappings ;

			return ( $mappings ) ;
		    }
		else
			return ( array ( ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        Peek - Peeks page information from a pdf object.

	    PROTOTYPE
	        $pagemap -> Peek ( ) ;

	    DESCRIPTION
	        Retrieves page information which can be of type (1), (2) or (3), as described in the class comments.

	    PARAMETERS
	        $object_id (integer) -
	                Id of the current pdf object.

		$object_data (string) -
			Pdf object contents.

		$pdf_objects (associative array) -
			Objects defined in the pdf file, as an associative array whose keys are object numbers and
			values object data.
			This parameter is used for /Type/Page objects which have a /Resource parameter that references
			an existing object instead of providing font mappings and other XObject mappings inline,
			enclosed within double angle brackets (<< /Font ... >>).

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  Peek ( $object_id, $object_data, $pdf_objects )
	   {
		// Page catalog (/Type/Catalog and /Pages x 0 R)
		if  ( preg_match ( '#/Type \s* /Catalog#ix', $object_data )  &&  $this -> GetObjectReferences ( $object_id, $object_data, '/Pages', $references ) )
			$this -> PageCatalogs	=  array_merge ( $this -> PageCatalogs, $references ) ;
		// Object listing the object numbers that give the list of objects contained in a single page (/Types/Pages and /Count x /Kids[x1 0 R ... xn 0 R]
		else if  ( preg_match ( '#/Type \s* /Pages#ix', $object_data ) )
		   {
			if  ( $this -> GetObjectReferences ( $object_id, $object_data, '/Kids', $references ) )
			   {
				// Sometimes, a reference can be the one of an object that contains the real reference ; in the following example,
				// the actual page contents are not in object 4, but in object 5
				//	/Kids 4 0 R
				//	...
				//	4 0 obj
				//	[5 0 R]
				//	endobj
				$new_references		=  array ( ) ;

				foreach  ( $references  as  $reference )
				   {
					if  ( ! isset ( $pdf_objects [ $reference ] )  ||
						! preg_match ( '/^ \s* (?P<ref> \[ [^]]+ \]) \s*$/imsx', $pdf_objects [ $reference ], $match ) )
					   {
						$new_references []	=  $reference ;
					    }
					else
					   {
						$this -> GetObjectReferences ( $reference, $pdf_objects [ $reference ], '', $sub_references ) ;
						$new_references		=  array_merge ( $new_references, $sub_references ) ;
					    }

				    }

				// Get kid count (knowing that sometimes, it is missing...)
				preg_match ( '#/Count \s+ (?P<count> \d+)#ix', $object_data, $match ) ;
				$page_count				=  ( isset ( $match [ 'count' ] ) ) ?  ( integer ) $match [ 'count' ] : false ;

				// Get parent object id
				preg_match ( '#/Parent \s+ (?P<parent> \d+)#ix', $object_data, $match ) ;
				$parent					=  ( isset ( $match [ 'parent' ] ) ) ?  ( integer ) $match [ 'parent' ] : false ;

				$this -> PageKids [ $object_id ]	=  array
				   (
					'object'	=>  $object_id,
					'parent'	=>  $parent,
					'count'		=>  $page_count,
					'kids'		=>  $new_references
				    ) ;
			    }
		    }
		// Object listing the other objects that are contained in this page (/Type/Page and /Contents[x1 0 R ... xn 0 R]
		else if  ( preg_match ( '#/Type \s* /Page\b#ix', $object_data ) )
		   {
			if  ( $this -> GetObjectReferences ( $object_id, $object_data, '/Contents', $references ) )
			   {
				preg_match ( '#/Parent \s+ (?P<parent> \d+)#ix', $object_data, $match ) ;
				$parent					=  ( isset ( $match [ 'parent' ] ) ) ?  (integer) $match [ 'parent' ] : false ;
				$fonts					=  $this -> GetResourceMappings ( $object_id, $object_data, '/Font', $pdf_objects ) ;
				$xobjects				=  $this -> GetResourceMappings ( $object_id, $object_data, '/XObject', $pdf_objects ) ;

				// Find the width and height of the page (/Mediabox parameter)
				if  ( preg_match ( '#/MediaBox \s* \[ \s* (?P<x1> \d+) \s+ (?P<y1> \d+) \s+ (?P<x2> \d+) \s+ (?P<y2> \d+) \s* \]#imsx', $object_data, $match ) )
				   {
					$width		=  ( double ) ( $match [ 'x2' ] - $match [ 'x1' ] + 1 ) ;
					$height		=  ( double ) ( $match [ 'y2' ] - $match [ 'y1' ] + 1 ) ;
				    }
				// Otherwise, fix an arbitrary width and length (but this should never happen, because all pdf files are correct, isn't it?)
				else
				   {
					$width		=  595 ;
					$height		=  850 ;
				    }

				// Yes ! some /Contents parameters may designate another object which contains references to the real text contents
				// in the form : [x 0 R y 0 R etc.], so we have to dig into it...
				$new_references				=  array ( ) ;

				foreach  ( $references  as  $reference )
				   {
					// We just need to check that the object contains something like :
					//	[x 0 R y 0 R ...]
					// and nothing more
					if  ( isset ( $pdf_objects [ $reference ] )  &&  preg_match ( '#^\s* \[ [^]]+ \]#x', $pdf_objects [ $reference ] )  &&
							$this -> GetObjectReferences ( $reference, $pdf_objects [ $reference ], '', $nested_references ) )
						$new_references		=  array_merge ( $new_references, $nested_references ) ;
					else
						$new_references []	=  $reference ;
				    }

				$this -> PageContents [ $object_id ]	=  array
				   (
					'object'	=>  $object_id,
					'parent'	=>  $parent,
					'contents'	=>  $new_references,
					'fonts'		=>  $fonts,
					'xobjects'	=>  $xobjects,
					'width'		=>  $width,
					'height'	=>  $height
				    ) ;
			    }
		    }
		// None of the above, but object contains /Xobject's and maybe more...
		else if  ( preg_match ( '#/Type \s* /XObject\b#ix', $object_data ) )
		   {
			preg_match ( '#/Parent \s+ (?P<parent> \d+)#ix', $object_data, $match ) ;
			$parent					=  ( isset ( $match [ 'parent' ] ) ) ?  (integer) $match [ 'parent' ] : false ;
			$fonts					=  $this -> GetResourceMappings ( $object_id, $object_data, '/Font', $pdf_objects ) ;
			$xobjects				=  $this -> GetResourceMappings ( $object_id, $object_data, '/XObject', $pdf_objects ) ;

			$this -> GetObjectReferences ( $object_id, $object_data, '/Contents', $references ) ;

			$this -> PageContents [ $object_id ]	=  array
			   (
				'object'	=>  $object_id,
				'parent'	=>  $parent,
				'contents'	=>  $references,
				'fonts'		=>  $fonts,
				'xobjects'	=>  $xobjects
			    ) ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        ProcessTemplateReferences - Replace template references with actual text contents.

	    PROTOTYPE
	        $text		=  $pagemap -> ReplaceTemplateReferences ( $page_number, $text_data ) ;

	    DESCRIPTION
	        Replaces template references of the form "/TPLx Do" with the actual text contents.

	    PARAMETERS
	        $page_number (integer) -
	                Page number of the object that contains the supplied object data.

		$text_data (string)
			Text drawing instructions that are to be processed.

	    RETURN VALUE
	        Returns the original text, where all template references have been replaced with the contents of the
		object they refer to.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  ProcessTemplateReferences ( $page_number, $text_data )
	    {
		// Many paranoid checks in this piece of code...
		if  ( isset ( $this -> Pages [ $page_number ] ) )
		   {
			// Loop through the PageContents array to find which one(s) may be subject to template reference replacements
			foreach  ( $this -> PageContents  as  $page_contents )
			   {
				// If the current object relates to the specified page number, AND it has xobjects, then the supplied text data
				// may contain template reference of the form : /TPLx.
				// In this case, we replace such a reference with the actual contents of the object they refer to
				if  ( isset ( $page_contents [ 'page' ] )  &&  $page_contents [ 'page' ]  ==  $page_number  &&  count ( $page_contents [ 'xobjects' ] ) )
				   {
					$template_searches	=  array ( ) ;
					$template_replacements	=  array ( ) ;

					$this ->  __get_replacements ( $page_contents, $template_searches, $template_replacements ) ;
					$text_data	=  self::PregStrReplace ( $template_searches, $template_replacements, $text_data ) ;
				    }
			    }
		    }

		return ( $text_data ) ;
	     }


	// __get_replacements -
	//	Recursively gets the search/replacement strings for template references.
	private function  __get_replacements ( $page_contents, &$searches, &$replacements, $objects_seen = array ( ) )
	   {
		foreach  ( $page_contents [ 'xobjects' ]  as  $template_name => $template_object )
		   {
			if  ( isset ( $this -> TemplateObjects [ $template_object ] )  &&  ! isset ( $objects_seen [ $template_object ] ) )
			   {
				$template				=  $this -> TemplateObjects [ $template_object ] ;
				$searches []				=  '#(' . $template_name . ' \s+ Do\b )#msx' ;
				$replacements []			=  '!PDFTOTEXT_TEMPLATE_' . substr ( $template_name, 1 ) . ' ' . $template ;
				$objects_seen [ $template_object ]	=  $template_object ;

				if  ( isset ( $this -> PageContents [ $template_object ] ) )
					$this -> __get_replacements ( $this -> PageContents [ $template_object ], $searches, $replacements, $objects_seen ) ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        MapObjects - Builds a correspondance between object and page numbers.

	    PROTOTYPE
	        $pagemap -> MapObjects ( ) ;

	    DESCRIPTION
	        Builds a correspondance between object and page numbers. The page number corresponding to an object id
		will after that be available using the array notation.

	    NOTES
		This method behaves as if there could be more than one page catalog in the same file, but I've not yet
		encountered this case.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  MapObjects ( $objects )
	   {
		$kid_count	=  count ( $this -> PageKids ) ;

		// PDF files created short after the birth of Earth may have neither a page catalog nor page contents descriptions
		if  ( ! count ( $this -> PageCatalogs  ) )
		   {
			// Later, during Pleistocen, references to page kids started to appear...
			if  ( $kid_count )
			   {
				foreach  ( array_keys ( $this -> PageKids )  as  $catalog )
					$this -> MapKids ( $catalog, $current_page ) ;
			    }
			else
				$this -> Pages [1]	=  array_keys ( $objects ) ;
		    }
		// This is the ideal situation : there is a catalog that allows us to gather indirectly all page data
		else
		   {
			$current_page		=  1 ;

			foreach  ( $this -> PageCatalogs  as  $catalog )
			   {
				if  ( isset ( $this -> PageKids [ $catalog ] ) )
					$this -> MapKids ( $catalog, $current_page ) ;
				// Well, almost ideal : it may happen that the page catalog refers to a non-existing object :
				// in this case, we behave the same as if there were no page catalog at all : group everything
				// onto one page
				else
					$this -> Pages [1]	=  array_keys ( $objects ) ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        MapKids - Establishes a correspondance between page kids and a current page number.

	    PROTOTYPE
	        $pagemap -> MapObjects ( $catalog, &$page ) ;

	    DESCRIPTION
	  	Tries to assign a page number to all page description objects that have been collected by the Peek()
		method.
	  	Also creates the Pages associative array, whose keys are page numbers and whose values are the ids of
		the objects that the page contains.

	    EXAMPLE
	  	The following example gives an overview of a possible layout for page catalogs ; it describes which
		objects contain	what.
	  	Lines starting with "#x", where "x" is a number, stands for a PDF object definition, which will start
		with "x 0 obj" in the PDF file.
	  	Whenever numbers are referenced (other than those prefixed with a "#"), it means "reference to the
		specified object.
	  	For example, "54" will refer to object #54, and will be given as "54 0 R" in the PDF file.
	  	The numbers at the beginning of each line are just "step numbers", which will be referenced in the
		explanations after the example :

			(01) #1 : /Type/Catalog /Pages 54
			(02)    -> #54 : /Type/Pages /Kids[3 28 32 58] /Count 5
			(03)           -> #3 : /Type/Page /Parent 54 /Contents[26]
			(04)		     -> #26 : page contents
			(05)           -> #28 : /Type/Page /Parent 54 /Contents[30 100 101 102 103 104]
			(06)		     -> #30 : page contents
			(07)	       -> #32 : /Type/Page /Parent 54 /Contents[34]
			(08)		     -> #34 : page contents
			(09)	       -> #58 : /Type/Pages /Parent 54 /Count 2 /Kids[36 40]
			(10)		     -> #36 : /Type/Page /Parent 58 /Contents[38]
			(11)			    -> #38 : page contents
			(12)		     -> #40 : /Type/Page /Parent 58 /Contents[42]
			(13)			    -> #42 : page contents

		 Explanations :
			(01) Object #1 contains the page catalog ; it states that a further description of the page
			     contents is given by object #54.
			     Note that it could reference multiple page descriptions, such as : /Pages [54 68 99...]
			     (although I did not met the case so far)
			(02) Object #54 in turn says that it as "kids", described by objects #3, #28, #32 and #58. It
			     also says that it has 5 pages (/Count parameter) ; but wait... the /Kids parameter references
			     4 objects while the /Count parameter states that we have 5 pages : what happens ? we will
			     discover it in the explanations below.
			(03) Object #3 states that it is aimed for page description (/Type/Page) ; the page contents
			     will be found in object #26, specified after the /Contents parameter. Note that here again,
			     multiple objects could be referenced by the /Contents parameter but, in our case, there is
			     only one, 26. Object #3 also says that its parent object (in the page catalog) is object
			     #54, defined in (01).
			     Since this is the first page we met, it will have page number 1.
			(04) ... object #26 contains the Postscript instructions to draw page #1
			(05) Object #28 has the same type as #3 ; its page contents can be located in object #30 (06)
			     The same applies for object #32 (07), whose page contents are given by object #34 (08).
			     So, (05) and (07) will be pages 2 and 3, respectively.
			(09) Now, it starts to become interesting : object #58 does not directly lead to an object
			     containing Postscript instructions as did objects #3, #28 and #32 whose parent is #54, but
			     to yet another page catalog which contains 2 pages (/Count 2), described by objects #36 and
			     #40. It's not located at the same position as object #54 in the hierarchy, so it shows that
			     page content descriptions can be recursively nested.
			(10) Object #36 says that we will find the page contents in object #38 (which will be page 4)
			(12) ... and object #40 says that we will find the page contents in object #42 (and our final
			     page, 5)

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  MapKids ( $catalog, &$page )
	   {
		if  ( ! isset ( $this -> PageKids [ $catalog ] ) )
			return ;

		$entry		=  $this -> PageKids [ $catalog ] ;

		// The PDF file contains an object containing a /Type/Pages/Kids[] construct, specified by another object containing a
		// /Type/Catalog/Pages construct : we will rely on its contents to find which page contains what
		if  ( isset ( $this -> PageContents [ $entry [ 'kids' ] [0] ] ) )
		   {
			foreach  ( $entry [ 'kids' ]  as  $item )
			   {
				// Some objects given by a /Page /Contents[] construct do not directly lead to an object describing PDF contents,
				// but rather to an object containing in turn a /Pages /Kids[] construct ; this adds a level of indirection, and
				// we have to recursively process it
				if  ( isset ( $this -> PageKids [ $item ] ) )
				   {
					$this -> MapKids ( $item, $page ) ;
				    }
				// The referenced object actually defines page contents (no indirection)
				else
				   {
					$this -> PageContents [ $item ]	[ 'page' ]	=  $page ;
					$this -> Pages [ $page ]			=  ( isset ( $this -> PageContents [ $item ] [ 'contents' ] ) ) ?
												$this -> PageContents [ $item ] [ 'contents' ] : array ( ) ;
					if ( isset ( $this -> PageContents [ $item ] [ 'width' ] ) )
					   {
						$this -> PageAttributes [ $page ]		=  array
						   (
							'width'			=>  $this -> PageContents [ $item ] [ 'width' ],
							'height'		=>  $this -> PageContents [ $item ] [ 'height' ]
						    ) ;
					    }

					$page ++ ;
				    }
			    }
		    }
		// No page catalog at all : consider everything is on the same page (this class does not use the WheresMyCrystalBall trait)
		else
		   {
			foreach  ( $entry [ 'kids' ]  as  $kid )
				$this -> MapKids ( $kid, $page ) ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetMappedFonts - Retrieves the mapped fonts per page

	    PROTOTYPE
	        $array	=  $pagemap -> GetMappedFonts ( ) ;

	    DESCRIPTION
	        Gets the mapped fonts, per page. XObjects are traversed, to retrieved additional font aliases defined
		by them.
		This function is used by the PdfTexter class to add additional entries to the FontMap object,
		ensuring that each reference to a font remains local to a page.

	    RETURN VALUE
	        Returns an array of associative arrays which have the following entries :
		- 'page' :
			Page number.
		- 'xobject-name' :
			XObject name, that can define further font aliases. This entry is set to the empty string for
			global font aliases.
		- 'font-name' :
			Font name (eg, "/F1", "/C1_0", etc.).
		- 'object' :
			Object defining the font attributes, such as character map, etc.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetMappedFonts ( )
	   {
		$mapped_fonts	=  array ( ) ;
		$current_page	=  0 ;

		foreach  ( $this -> PageCatalogs  as  $catalog )
		   {
			if  ( ! isset ( $this -> PageKids [ $catalog ] ) )
				continue ;

			foreach  ( $this -> PageKids [ $catalog ] [ 'kids' ]  as  $page_object )
			   {
				$current_page ++ ;

				if  ( isset ( $this -> PageContents [ $page_object ] ) )
				   {
					$page_contents	=  $this -> PageContents [ $page_object ] ;
					$associations	=  array ( ) ;

					if  ( isset ( $page_contents [ 'fonts' ] ) )
					   {
						foreach  ( $page_contents [ 'fonts' ]  as  $font_name => $font_object )
						   {
							$mapped_fonts []		=  array
							   (
								'page'		=>  $current_page,
								'xobject-name'	=>  '',
								'font-name'	=>  $font_name,
								'object'	=>  $font_object
							    ) ;

							$associations [ ":$font_name" ]	=  $font_object ;

							$this -> __map_recursive ( $current_page, $page_contents [ 'xobjects' ], $mapped_fonts, $associations ) ;
						    }
					    }
				    }
			    }
		    }

		return ( $mapped_fonts ) ;
	    }


	// __map_recursive -
	//	Recursively collects font aliases for XObjects.
	private function  __map_recursive ( $page_number, $xobjects, &$mapped_fonts, &$associations )
	   {
		foreach  ( $xobjects  as  $xobject_name => $xobject_value )
		   {
			if  ( isset ( $this -> PageContents [ $xobject_value ] ) )
			   {
				foreach  ( $this -> PageContents [ $xobject_value ] [ 'fonts' ]  as  $font_name => $font_object )
				   {
					if  ( ! isset ( $associations [ "$xobject_name:$font_name" ] ) )
					   {
						$mapped_fonts []		=  array
						   (
							'page'		=>  $page_number,
							'xobject-name'	=>  $xobject_name,
							'font-name'	=>  $font_name,
							'object'	=>  $font_object
						    ) ;

						$associations [ "$xobject_name:$font_name" ]	=  $font_object ;
					    }
				    }

				$this -> XObjectNames [ $xobject_name ]		=  1 ;
				$this -> __map_recursive ( $page_number, $this -> PageContents [ $xobject_value ] [ 'xobjects' ], $mapped_fonts, $associations ) ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        IsValidXObject - Checks if the specified object is a valid XObject.

	    PROTOTYPE
	        $status		=  $pagemap -> IsValidXObjectName ( $name ) ;

	    DESCRIPTION
	        Checks if the specified name is a valid XObject defining its own set of font aliases.

	    PARAMETERS
	        $name (string) -
	                Name of the XObject to be checked.

	    RETURN VALUE
	        Returns true if the specified XObject exists and defines its own set of font aliases, false otherwise.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  IsValidXObjectName ( $name )
	   { return ( isset ( $this -> XObjectNames [ $name ] ) ) ; }
    }


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                                         IMAGE MANAGEMENT                                         ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/

/*==============================================================================================================

    class PdfImage -
        Holds image data coming from pdf.

  ==============================================================================================================*/
abstract class  PdfImage			extends  PdfObjectBase
   {
	// Image resource that can be used to process image data, using the php imagexxx() functions
	public		$ImageResource		=  false ;
	// Original image data
	protected	$ImageData ;
	// Tells if the image resource has been created - false when the autosave feature is on and the image is pure JPEG data
	protected	$NoResourceCreated ;


	/*--------------------------------------------------------------------------------------------------------------

	    CONSTRUCTOR
	        Creates a PdfImage object with a resource that can be used with imagexxx() php functions.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $image_data, $no_resource_created = false )
	   {
		$this -> ImageData		=  $image_data ;
		$this -> NoResourceCreated	=  $no_resource_created ;

		if  ( ! $no_resource_created )
			$this -> ImageResource		=  $this -> CreateImageResource ( $image_data ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    DESTRUCTOR
	        Destroys the associated image resource.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __destruct ( )
	   {
		$this -> DestroyImageResource ( ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        CreateImageResource - creates an image resource from the supplied image data.

	    PROTOTYPE
	        $resource	=  $this -> CreateImageResource ( $data ) ;

	    DESCRIPTION
	        Creates an image resource from the supplied image data.
		Whatever the input format, the internal format will be the one used by the gd library.

	    PARAMETERS
	        $data (string) -
	                Image data.

	 *-------------------------------------------------------------------------------------------------------------*/
	abstract protected function  CreateImageResource ( $image_data ) ;


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        DestroyImageResource - Destroys the allocated image resource.

	    PROTOTYPE
	        $this -> DestroyImageResource ( ) ;

	    DESCRIPTION
	        Destroys the allocated image resource, using the libgd imagedestroy() function. This method can be
		overridden by derived class if the underlying image resource does not come from the gd lib.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  DestroyImageResource ( )
	   {
		if  ( $this -> ImageResource )
			imagedestroy ( $this -> ImageResource ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        SaveAs - Saves the current image to a file.

	    PROTOTYPE
	        $pdfimage -> SaveAs ( $output_file, $image_type = IMG_JPEG ) ;

	    DESCRIPTION
	        Saves the current image resource to the specified output file, in the specified format.

	    PARAMETERS
	        $output_file (string) -
	                Output filename.

		$image_type (integer) -
			Output format. Can be any of the predefined php constants IMG_*.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  SaveAs ( $output_file, $image_type = IMG_JPEG )
	   {
		if  ( ! $this -> ImageResource )
		   {
			if  ( $this -> NoResourceCreated  &&  $image_type  ==  IMG_JPEG )
				file_put_contents ( $output_file, $this -> ImageData ) ;
			else if  ( PdfToText::$DEBUG )
				warning ( new PdfToTextDecodingException ( "No image resource allocated." ) ) ;

			return ;
		    }

		$image_types		=  imagetypes ( ) ;

		switch  ( $image_type )
		   {
			case	IMG_JPEG :
			case	IMG_JPG :
				if  ( ! ( $image_types & IMG_JPEG )  &&  ! ( $image_types & IMG_JPG ) )
					error ( new PdfToTextDecodingException ( "Your current PHP version does not support JPG images." ) ) ;

				imagejpeg ( $this -> ImageResource, $output_file, 100 ) ;
				break ;

			case	IMG_GIF :
				if  ( ! ( $image_types & IMG_GIF ) )
					error ( new PdfToTextDecodingException ( "Your current PHP version does not support GIF images." ) ) ;

				imagegif ( $this -> ImageResource, $output_file ) ;
				break ;

			case	IMG_PNG :
				if  ( ! ( $image_types & IMG_PNG ) )
					error ( new PdfToTextDecodingException ( "Your current PHP version does not support PNG images." ) ) ;

				imagepng ( $this -> ImageResource, $output_file, 0 ) ;
				break ;

			case	IMG_WBMP :
				if  ( ! ( $image_types & IMG_WBMP ) )
					error ( new PdfToTextDecodingException ( "Your current PHP version does not support WBMP images." ) ) ;

				imagewbmp ( $this -> ImageResource, $output_file ) ;
				break ;

			case	IMG_XPM :
				if  ( ! ( $image_types & IMG_XPM ) )
					error ( new PdfToTextDecodingException ( "Your current PHP version does not support XPM images." ) ) ;

				imagexbm ( $this -> ImageResource, $output_file ) ;
				break ;

			default :
				error ( new PdfToTextDecodingException ( "Unknown image type #$image_type." ) ) ;
		    }
	    }


	public function  Output ( )
	   {
		$this -> SaveAs ( null ) ;
	    }
    }


/*==============================================================================================================

    class PdfJpegImage -
        Handles encoded JPG images.

  ==============================================================================================================*/
class  PdfJpegImage		extends  PdfImage
   {
	public function  __construct ( $image_data, $autosave )
	   {
		parent::__construct ( $image_data, $autosave ) ;
	    }


	protected function  CreateImageResource ( $image_data )
	   {
		return ( imagecreatefromstring ( $image_data ) ) ;
	    }
    }


/*==============================================================================================================

    class PdfInlinedImage -
        Decodes raw image data in objects having the /FlateDecode flag.

  ==============================================================================================================*/
class  PdfInlinedImage		extends  PdfImage
   {
	// Supported color schemes
	const		COLOR_SCHEME_RGB		=  1 ;
	const		COLOR_SCHEME_CMYK		=  2 ;
	const		COLOR_SCHEME_GRAY		=  3 ;

	// Color scheme names, for debugging only
	private static	$DecoderNames		=  array
	   (
		self::COLOR_SCHEME_RGB		=>  'RGB',
		self::COLOR_SCHEME_CMYK		=>  'CMYK',
		self::COLOR_SCHEME_GRAY		=>  'Gray'
	    ) ;

	// Currently implemented image decoders
	private static	$Decoders		=  array
	   (
		self::COLOR_SCHEME_RGB		=>  array
		   (
			8	=>  '__decode_rgb8'
		    ),
		self::COLOR_SCHEME_GRAY		=>  array
		   (
			8	=>  '__decode_gray8'
		    ),
		self::COLOR_SCHEME_CMYK		=>  array
		   (
			8	=>  '__decode_cmyk8'
		    ),
	    ) ;

	// Image width and height
	public		$Width,
			$Height ;
	// Color scheme
	public		$ColorScheme ;
	// Number of bits per color component
	public		$BitsPerComponent ;
	// Decoding function, varying upon the supplied image type
	public		$DecodingFunction	=  false ;


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        Constructor - Builds an image from the supplied data.

	    PROTOTYPE
	        $image	=  new  PdfInlinedImage ( $image_data, $width, $height, $bits_per_component, $color_scheme ) ;

	    DESCRIPTION
	        Builds an image from the supplied data. Checks that the image flags are supported.

	    PARAMETERS
	        $image_data (string) -
	                Uncompressed image data.

		$width (integer) -
			Image width, in pixels.

		$height (integer) -
			Image height, in pixels.

		$bits_per_components (integer) -
			Number of bits per color component.

		$color_scheme (integer) -
			One of the COLOR_SCHEME_* constants, specifying the initial data format.

	    NOTES
	        Processed images are always converted to JPEG format.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $image_data, $width, $height, $bits_per_component, $color_scheme )
	   {
		$this -> Width			=  $width ;
		$this -> Height			=  $height ;
		$this -> BitsPerComponent	=  $bits_per_component ;
		$this -> ColorScheme		=  $color_scheme ;

		// Check that we have a decoding function for the supplied parameters
		if  ( isset ( self::$Decoders [ $color_scheme ] ) )
		   {
			if  ( isset ( self::$Decoders [ $color_scheme ] [ $bits_per_component ] ) )
				$this -> DecodingFunction	=  self::$Decoders [ $color_scheme ] [ $bits_per_component ] ;
			else
				error ( new PdfToTextDecodingException ( "No decoding function has been implemented for image objects having the " .
						self::$DecoderNames [ $color_scheme ] . " color scheme with $bits_per_component bits per color component." ) ) ;
		    }
		else
			error ( new PdfToTextDecodingException ( "Unknown color scheme $color_scheme." ) ) ;

		parent::__construct ( $image_data ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        CreateInstance - Creates an appropriate instance of a PdfImage class.

	    PROTOTYPE
	        $image	=  PdfInlinedImage ( $stream_data, $object_data ) ;

	    DESCRIPTION
	        Creates an instance of either :
		- A PdfJpegImage class, if the image specifications in $object_data indicate that the compressed stream
		  contents are only JPEG data
		- A PdfInlinedImage class, if the image specifications state that the compressed stream data contain
		  only color values.

		The class currently supports (in $stream_data) :
		- Pure JPEG contents
		- RGB values
		- CMYK values
		- Gray scale values (in the current version, the resulting image does not correctly reproduce the
		  initial colors, if interpolation is to be used).

	    PARAMETERS
	        $stream_data (string) -
	                Compressed image data.

		$object_data (string) -
			Object containing the stream data.

	    RETURN VALUE
	        Returns :
		- A PdfJpegImage object, if the stream data contains only pure JPEG contents
		- A PdfInlinedImage object, in other cases.
		- False if the supplied image data is not currently supported.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  CreateInstance ( $stream_data, $object_data, $autosave )
	   {
		// Remove stream data from the supplied object data, to speed up the searches below
		$index		=  strpos ( $object_data, 'stream' ) ;

		if  ( $index  !==   false )
			$object_data	=  substr ( $object_data, 0, $index ) ;

		// Uncompress stream data
		$image_data	=  gzuncompress ( $stream_data ) ;

		// The /DCTDecode flag indicates JPEG contents - returns a PdfJpegImage object
		if  ( stripos ( $object_data, '/DCTDecode' ) )
			return ( new PdfJpegImage ( $image_data, $autosave ) ) ;

		// Get the image width & height
		$match		=  null ;
		preg_match ( '#/Width \s+ (?P<value> \d+)#ix', $object_data, $match ) ;
		$width		=  ( integer ) $match [ 'value' ] ;

		$match		=  null ;
		preg_match ( '#/Height \s+ (?P<value> \d+)#ix', $object_data, $match ) ;
		$height		=  ( integer ) $match [ 'value' ] ;

		// Get the number of bits per color component
		$match		=  null ;
		preg_match ( '#/BitsPerComponent \s+ (?P<value> \d+)#ix', $object_data, $match ) ;
		$bits_per_component	=  ( integer ) $match [ 'value' ] ;

		// Get the target color space
		// Sometimes, this refers to an object in the PDF file, which can also be embedded in a compound object
		// We don't handle such cases for now
		$match		=  null ;
		preg_match ( '#/ColorSpace \s* / (?P<value> \w+)#ix', $object_data, $match ) ;

		if  ( ! isset ( $match [ 'value' ] ) )
			return ( false ) ;

		$color_space_name	=  $match [ 'value' ] ;

		// Check that we are able to handle the specified color space
		switch ( strtolower ( $color_space_name ) )
		   {
			case	'devicergb' :
				$color_space	=  self::COLOR_SCHEME_RGB ;
				break ;

			case	'devicegray' :
				$color_space	=  self::COLOR_SCHEME_GRAY ;
				break ;

			case	'devicecmyk' :
				$color_space	=  self::COLOR_SCHEME_CMYK ;
				break ;

			default :
				if  ( PdfToText::$DEBUG )
					warning ( new PdfToTextDecodingException ( "Unsupported color space \"$color_space_name\"." ) ) ;

				return ( false ) ;
		    }

		// Also check that we can handle the specified number of bits per component
		switch ( $bits_per_component )
		   {
			case	8 :
				break ;

			default :
				if  ( PdfToText::$DEBUG )
					warning ( new PdfToTextDecodingException ( "Unsupported bits per component : $bits_per_component." ) ) ;

				return ( false ) ;
		    }

		// All done, return a PdfInlinedImage object
		return ( new PdfInlinedImage ( $image_data, $width, $height, $bits_per_component, $color_space ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        CreateImageResource - Creates the image resource.

	    PROTOTYPE
	        $resource	=  $image -> CreateImageResource ( $image_data ) ;

	    DESCRIPTION
	        Creates a GD image according to the supplied image data, and the parameters supplied to the class
		constructor.

	    PARAMETERS
	        $image_data (string) -
	                Image to be decoded.

	    RETURN VALUE
	        Returns a GD graphics resource in true color, or false if there is currently no implemented decoding
		function for this kind of images.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  CreateImageResource ( $image_data )
	   {
		$decoder	=  $this -> DecodingFunction ;

		if  ( $decoder )
			return ( $this -> $decoder ( $image_data ) ) ;
		else
			return ( false ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Decoding functions.

	 *-------------------------------------------------------------------------------------------------------------*/

	// __decode_rgb8 -
	//	Decodes image data consisting of 8-bits RGB values (one byte for each color component).
	private function  __decode_rgb8 ( $data )
	   {
		$data_length	=  strlen ( $data ) ;
		$colors		=  array ( ) ;
		$width		=  $this -> Width ;
		$height		=  $this -> Height ;
		$image		=  imagecreatetruecolor ( $width, $height ) ;

		for  ( $i = 0, $pixel_x = 0, $pixel_y = 0 ; $i + 3  <=  $data_length ; $i += 3, $pixel_x ++ )
		   {
			$red	=  ord ( $data [$i] ) ;
			$green	=  ord ( $data [$i+1] ) ;
			$blue	=  ord ( $data [$i+2] ) ;

			$color	=  ( $red  <<  16 ) | ( $green  <<  8 )  | ( $blue ) ;

			if  ( isset ( $colors [ $color ] ) )
				$pixel_color	=  $colors [ $color ] ;
			else
			   {
				$pixel_color		=  imagecolorallocate ( $image, $red, $green, $blue ) ;
				$colors [ $color ]	=  $pixel_color ;
			    }

			if  ( $pixel_x  >=  $width )
			   {
				$pixel_x	=  0 ;
				$pixel_y ++ ;
			    }

			imagesetpixel ( $image, $pixel_x, $pixel_y, $pixel_color ) ;
		    }

		return ( $image ) ;
	    }


	// __decode_cmyk8 -
	//	Decodes image data consisting of 8-bits CMYK values (one byte for each color component).
	private function  __decode_cmyk8 ( $data )
	   {
		$data_length	=  strlen ( $data ) ;
		$colors		=  array ( ) ;
		$width		=  $this -> Width ;
		$height		=  $this -> Height ;
		$image		=  imagecreatetruecolor ( $width, $height ) ;

		for  ( $i = 0, $pixel_x = 0, $pixel_y = 0 ; $i + 4  <=  $data_length ; $i += 4, $pixel_x ++ )
		   {
			$cyan		=  ord ( $data [$i] ) ;
			$magenta	=  ord ( $data [$i+1] ) ;
			$yellow		=  ord ( $data [$i+2] ) ;
			$black		=  ord ( $data [$i+3] ) ;

			$color	=  ( $cyan  <<  24 ) | ( $magenta  <<  16 ) | ( $yellow  << 8 ) | ( $black ) ;

			if  ( isset ( $colors [ $color ] ) )
				$pixel_color	=  $colors [ $color ] ;
			else
			   {
				$rgb			=  $this -> __convert_cmyk_to_rgb ( $cyan, $magenta, $yellow, $black ) ;
				$pixel_color		=  imagecolorallocate ( $image, $rgb [0], $rgb [1], $rgb [2] ) ;
				$colors [ $color ]	=  $pixel_color ;
			    }

			if  ( $pixel_x  >=  $width )
			   {
				$pixel_x	=  0 ;
				$pixel_y ++ ;
			    }

			imagesetpixel ( $image, $pixel_x, $pixel_y, $pixel_color ) ;
		    }

		return ( $image ) ;
	    }


	// __decode_gray8 -
	//	Decodes image data consisting of 8-bits gray values.
	private function  __decode_gray8 ( $data )
	   {
		$data_length	=  strlen ( $data ) ;
		$colors		=  array ( ) ;
		$width		=  $this -> Width ;
		$height		=  $this -> Height ;
		$image		=  imagecreatetruecolor ( $width, $height ) ;

		for  ( $i = 0, $pixel_x = 0, $pixel_y = 0 ; $i  <  $data_length ; $i ++, $pixel_x ++ )
		   {
			$color	=  ord ( $data [$i] ) ;

			if  ( isset ( $colors [ $color ] ) )
				$pixel_color	=  $colors [ $color ] ;
			else
			   {
				$pixel_color		=  imagecolorallocate ( $image, $color, $color, $color ) ;
				$colors [ $color ]	=  $pixel_color ;
			    }

			if  ( $pixel_x  >=  $width )
			   {
				$pixel_x	=  0 ;
				$pixel_y ++ ;
			    }

			imagesetpixel ( $image, $pixel_x, $pixel_y, $pixel_color ) ;
		    }

		return ( $image ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Support functions.

	 *-------------------------------------------------------------------------------------------------------------*/

	// __convert_cmyk_to_rgb -
	//	Converts CMYK color value to RGB.
	private function  __convert_cmyk_to_rgb ( $C, $M, $Y, $K )
	   {
		if  ( $C  >  1  ||  $M  >  1  ||  $Y  >  1  ||  $K  >  1 )
		   {
			$C /= 100.0 ;
			$M /= 100.0 ;
			$Y /= 100.0 ;
			$K /= 100.0 ;
		    }

   		$R 	=  ( 1 - $C * ( 1 - $K ) - $K ) * 256 ;
   		$G 	=  ( 1 - $M * ( 1 - $K ) - $K ) * 256 ;
   		$B 	=  ( 1 - $Y * ( 1 - $K ) - $K ) * 256 ;

		$result =  array ( round ( $R ), round ( $G ), round ( $B ) ) ;

		return ( $result ) ;
  	    }
    }


/*==============================================================================================================

    class PdfFaxImage -
        Handles encoded CCITT Fax images.

  ==============================================================================================================*/
class  PdfFaxImage		extends  PdfImage
   {
	public function  __construct ( $image_data )
	   {
		parent::__construct ( $image_data ) ;
	    }


	protected function  CreateImageResource ( $image_data )
	   {
		warning ( new PdfToTextDecodingException ( "Decoding of CCITT Fax image format is not yet implemented." ) ) ;
		//return ( imagecreatefromstring ( $image_data ) ) ;
	    }
    }


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                                      ENCRYPTION MANAGEMENT                                       ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/

/*==============================================================================================================

    class EncryptionData -
        Holds encryption data and allows for decryption.

  ==============================================================================================================*/
class  PdfEncryptionData		extends  PdfObjectBase
   {
	// Encryption modes
	const		PDFMODE_UNKNOWN				=  0 ;
	const		PDFMODE_STANDARD			=  1 ;

	// Encryption algorithms
	const		PDFCRYPT_ALGORITHM_RC4			=  0 ;
	const		PDFCRYPT_ALGORITHM_AES			=  1 ;
	const		PDFCRYPT_ALGORITHM_AES256		=  2 ;

	// A 32-bytes hardcoded padding used when computing encryption keys
	const		PDF_ENCRYPTION_PADDING			=  "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A" ;

	// Permission bits for encrypted files. Comments come from the PDF specification
	const		PDFPERM_PRINT				=  0x0004 ;		// bit 3 :
											//	(Revision 2) Print the document.
											//	(Revision 3 or greater) Print the document (possibly not at the highest quality level,
											//	depending on whether bit 12 is also set).
	const		PDFPERM_MODIFY				=  0x0008 ;		// bit 4 :
											//	Modify the contents of the document by operations other than those controlled by bits 6, 9, and 11.
	const		PDFPERM_COPY				=  0x0010 ;		// bit 5 :
											//	(Revision 2) Copy or otherwise extract text and graphics from the document, including extracting text
											//	and graphics (in support of accessibility to users with disabilities or for other purposes).
											//	(Revision 3 or greater) Copy or otherwise extract text and graphics from the document by operations
											//	other than that controlled by bit 10.
	const		PDFPERM_MODIFY_EXTRA			=  0x0020 ;		// bit 6 :
											//	Add or modify text annotations, fill in interactive form fields, and, if bit 4 is also set,
											//	create or modify interactive form fields (including signature fields).
	const		PDFPERM_FILL_FORM			=  0x0100 ;		// bit 9 :
											//	(Revision 3 or greater) Fill in existing interactive form fields (including signature fields),
											//	even if bit 6 is clear.
	const		PDFPERM_EXTRACT				=  0x0200 ;		// bit 10 :
											//	(Revision 3 or greater) Fill in existing interactive form fields (including signature fields),
											//	even if bit 6 is clear.
	const		PDFPERM_ASSEMBLE			=  0x0400 ;		// bit 11 :
											//	(Revision 3 or greater) Assemble the document (insert, rotate, or delete pages and create bookmarks
											//	or thumbnail images), even if bit 4 is clear.
	const		PDFPERM_HIGH_QUALITY_PRINT		=  0x0800 ;		// bit 12 :
											//	(Revision 3 or greater) Print the document to a representation from which a faithful digital copy of
											//	the PDF content could be generated. When this bit is clear (and bit 3 is set), printing is limited to
											//	a low-level representation of the appearance, possibly of degraded quality.

	public		$FileId ;							// File ID, as specified by the /ID flag
	public		$ObjectId ;							// Object id and text contents
	private		$ObjectData ;
	public		$Mode ;								// Encryption mode - currently, only the "Standard" keyword is accepted
	public		$EncryptionAlgorithm ;						// Encryption algorithm - one of the PDFCRYPT_* constants
	public		$AlgorithmVersion,						// Encryption algorithm version & revision
			$AlgorithmRevision ;
	public		$Flags ;							// Protection flags, when an owner password has been specified - one of the PDFPERM_* constants
	public		$KeyLength ;							// Encryption key length
	public		$UserKey,							// User and owner password keys
			$OwnerKey ;
	public		$UserEncryptionString,						// Not sure yet of the real usage of these ones
			$OwnerEncryptionString ;
	public		$EncryptMetadata ;						// True if metadata is also encrypted
	public		$FileKeyLength ;						// Key length / 5

	protected	$Decrypter ;							// Decrypter object

	private		$UnsupportedEncryptionAlgorithm		=  false ;		// True if the encryption algorithm used in the PDF file is not yet supported


	/**************************************************************************************************************

	    NAME
	        Constructor

	    PROTOTYPE
	        obj	=  new  PdfEncryptionData ( $mode, $object_id, $object_data ) ;

	    DESCRIPTION
		Creates an instance of a PdfEncryptionData class, using the information parsed from the supplied object
		data.

	    PARAMETERS
		$mode (integer) -
			One of the PDFMODE_* constants.

		$object_id (integer) -
			Id of the object containing enryption parameters.

		$object_data (string) -
			Encryption parameters.

	    AUTHOR
	        Christian Vigh, 03/2017.

	    HISTORY
	    [Version : 1.0]	[Date : 2017-03-14]     [Author : CV]
	        Initial version.

	 **************************************************************************************************************/
	public function  __construct ( $file_id, $mode, $object_id, $object_data )
	   {
		$this -> FileId			=  $file_id ;
		$this -> ObjectId		=  $object_id ;
		$this -> ObjectData		=  $object_data ;
		$this -> Mode			=  $mode ;

		// Encryption algorithm version & revision
		preg_match ( '#/V \s+ (?P<value> \d+)#ix', $object_data, $algorithm_match ) ;
		$this -> AlgorithmVersion	=  ( integer ) $algorithm_match [ 'value' ] ;

		preg_match ( '#/R \s+ (?P<value> \d+)#ix', $object_data, $algorithm_revision_match ) ;
		$this -> AlgorithmRevision	=  ( integer ) $algorithm_revision_match [ 'value' ] ;

		// Encryption flags
		preg_match ( '#/P \s+ (?P<value> \-? \d+)#ix', $object_data, $flags_match ) ;
		$this -> Flags			=  ( integer) $flags_match [ 'value' ] ;

		// Key length (40 bits, if not specified)
		if  ( preg_match ( '#/Length \s+ (?P<value> \d+)#ix', $object_data, $key_length_match ) )
			$this -> KeyLength	=  $key_length_match [ 'value' ] ;
		else
			$this -> KeyLength	=  40 ;

		// Owner and user passwords
		$this -> UserKey		=  $this -> GetStringParameter ( '/U', $object_data ) ;
		$this -> OwnerKey		=  $this -> GetStringParameter ( '/O', $object_data ) ;

		// Owner and user encryption strings
		$this -> UserEncryptionString	=  $this -> GetStringParameter ( '/UE', $object_data ) ;
		$this -> OwnerEncryptionString	=  $this -> GetStringParameter ( '/OE', $object_data ) ;

		// EncryptMetadata flag
		if  ( preg_match ( '# /EncryptMetadata (?P<value> (true) | (1) | (false) | (0) )#imsx', $object_data, $encryption_match ) )
		   {
			if  ( ! strcasecmp ( $encryption_match [ 'value' ], 'true' )  ||  ! strcasecmp ( $encryption_match [ 'value' ], 'false' ) )
				$this -> EncryptMetadata		=  true ;
			else
				$this -> EncryptMetadata		=  false ;
		    }
		else
			$this -> EncryptMetadata	=  false ;

		// Now, try to determine the encryption algorithm to be used
		$user_key_length		=  strlen ( $this -> UserKey ) ;
		$owner_key_length		=  strlen ( $this -> OwnerKey ) ;
		$user_encryption_string_length	=  strlen ( $this -> UserEncryptionString ) ;
		$owner_encryption_string_length	=  strlen ( $this -> OwnerEncryptionString ) ;

		$error_unhandled_version	=  false ;
		$error_unhandled_revision	=  false ;

		switch  ( $this -> AlgorithmVersion )
		   {
			case	1 :
				switch  ( $this -> AlgorithmRevision )
				   {
					case	2 :
						if  ( $user_key_length  !=  32  &&  $owner_key_length  !=  32 )
						   {
							if  ( PdfToText::$DEBUG )
								error ( new PdfToTextDecryptionException ( "Invalid user and/or owner key length ($user_key_length/$owner_key_length)", $object_id ) ) ;
						    }

						$this -> EncryptionAlgorithm	=  self::PDFCRYPT_ALGORITHM_RC4 ;
						$this -> FileKeyLength		=  5 ;
						break ;

					default :
						$error_unhandled_revision	=  true ;
				    }
				break ;

			default :
				$error_unhandled_version	=  true ;
		    }

		// Report unsupported versions/revisions
		if  ( $error_unhandled_version  ||  $error_unhandled_revision )
		   {
			if  ( PdfToText::$DEBUG )
				error ( new PdfToTextDecryptionException ( "Unsupported encryption algorithm version {$this -> AlgorithmVersion} revision {$this -> AlgorithmRevision}.",
						$object_id ) ) ;

			$this -> UnSupportedEncryptionAlgorithm		=  true ;

			return ;
		    }

		// Build the object key
		$this -> Decrypter		=  PdfDecryptionAlgorithm::GetInstance ( $this ) ;

		if  ( $this -> Decrypter  ===  false )
		   {
			if  ( PdfToText::$DEBUG )
				warning ( new PdfToTextDecryptionException ( "Unsupported encryption algorithm #{$this -> EncryptionAlgorithm}, " .
						"version {$this -> AlgorithmVersion} revision {$this -> AlgorithmRevision}.",
						$object_id ) ) ;

			$this -> UnsupportedEncryptionAlgorithm		=  true ;

			return ;
		    }
		//dump ( $this ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetInstance - Creates an instance of a PdfEncryptionData object.

	    PROTOTYPE
	        $obj		=  PdfEncryptionData::GetInstance ( $object_id, $object_data ) ;

	    DESCRIPTION
	        Returns an instance of encryption data

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  GetInstance ( $file_id, $object_id, $object_data )
	   {
		// Encryption mode
		if  ( ! preg_match ( '#/Filter \s* / (?P<mode> \w+)#ix', $object_data, $object_data_match ) )
			return  (false ) ;

		switch ( strtolower ( $object_data_match [ 'mode' ] ) )
		   {
			case	'standard' :
				$mode		=  self::PDFMODE_STANDARD ;
				break ;

			default :
				if  ( self::$DEBUG  >  1 )
					error ( new PdfToTextDecodingException ( "Unhandled encryption mode '{$object_data [ 'mode' ]}'", $object_id ) ) ;

				return ( false ) ;

		    }

		// Basic checks have been performed, return an instance of encryption data
		return ( new PdfEncryptionData ( $file_id, $mode, $object_id, $object_data ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        Decrypt - Decrypts object data.

	    PROTOTYPE
	        $data		=  $this -> Decrypt ( $object_id, $object_data ) ;

	    DESCRIPTION
	        Decrypts object data, when the PDF file is password-protected.

	    PARAMETERS
	        $object_id (integer) -
	                Pdf object number.

		$object_data (string) -
			Object data.

	    RETURN VALUE
	        Returns the decrypted object data, or false if the encrypted object could not be decrypted.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  Decrypt ( $object_id, $object_data )
	   {
		if  ( $this -> UnsupportedEncryptionAlgorithm )
			return ( false ) ;

		return ( false ) ;
		//return ( $this -> Decrypter -> Decrypt ( $object_data ) ) ;
		//return ( "BT (coucou)Tj ET" ) ;
	    }
    }


/*==============================================================================================================

    class PdfDecryptionAlgorithm -
        Base class for algorithm decrypters.

  ==============================================================================================================*/
abstract class	PdfDecryptionAlgorithm		//extends  Object
   {
	protected		$EncryptionData ;
	protected		$ObjectKey ;
	protected		$ObjectKeyBytes ;
	protected		$ObjectKeyLength ;


	public function  __construct ( $encryption_data )
	   {
		$this -> EncryptionData		=  $encryption_data ;

		$objkey		=  '' ;

		for  ( $i = 0 ; $i  <  $this -> EncryptionData -> FileKeyLength ; $i ++ )
			$objkey 	.=  $this -> EncryptionData -> FileId [$i] ;

		$objkey 			.=  chr ( ( $this -> EncryptionData -> ObjectId ) & 0xFF ) ;
		$objkey 			.=  chr ( ( $this -> EncryptionData -> ObjectId  >>   8 )  &  0xFF ) ;
		$objkey 			.=  chr ( ( $this -> EncryptionData -> ObjectId  >>  16 )  &  0xFF ) ;
		$objkey 			.=  chr ( 0 ) ;		// obj generation number & 0xFF
		$objkey 			.=  chr ( 0 ) ;		// obj generation number >> 8  &  0xFF

		$md5				=  md5 ( $objkey, true ) ;
		$this -> ObjectKey		=  $md5 ;
		$this -> ObjectKeyLength	=  16 ;

		$this -> ObjectKeyBytes		=  array ( ) ;

		for  ( $i = 0 ; $i  <  $this -> ObjectKeyLength ; $i ++ )
			$this -> ObjectKeyBytes  []	=  ord ( $this -> ObjectKey [$i] ) ;
	    }


	public static function  GetInstance  ( $encryption_data )
	   {
		switch  ( $encryption_data -> EncryptionAlgorithm )
		   {
			case	PdfEncryptionData::PDFCRYPT_ALGORITHM_RC4 :
				return ( new PdfRC4DecryptionAlgorithm ( $encryption_data ) ) ;

			default :
				return ( false ) ;
		    }
	    }


	abstract public function  Reset		( ) ;
	abstract public function  Decrypt	( $data ) ;

    }


/*==============================================================================================================

    class PdfRC4DecryptionAlgorithm -
        A decrypter class for RC4 encoding.

  ==============================================================================================================*/
class	PdfRC4DecryptionAlgorithm		extends  PdfDecryptionAlgorithm
   {
	private	static		$InitialState		=  false ;
	protected		$State ;


	public function  __construct ( $encryption_data )
	   {
		parent::__construct ( $encryption_data ) ;

		if  ( self::$InitialState  ===  false )
			self::$InitialState	=  range ( 0, 255 ) ;
	    }


	public function  Reset ( )
	   {
		$this -> State		=  self::$InitialState ;
		$index1			=
		$index2			=  0 ;

		for  ( $i = 0 ; $i  <  256 ; $i ++ )
		   {
			$index2		=  ( $this -> ObjectKeyBytes [ $index1 ] + $this -> State [$i] + $index2 )  &  0xFF ;

			// Swap elements $index2 and $i from $State
			$x				=  $this -> State [$i] ;
			$this -> State [$i]		=  $this -> State [ $index2 ] ;
			$this -> State [ $index2 ]	=  $x ;

			$index1  =  ( $index1 + 1 ) % $this -> ObjectKeyLength ;
		    }
	    }


	public function  Decrypt ( $data )
	   {
		$this -> Reset ( ) ;
		$length		=  strlen ( $data ) ;
		$x		=  0 ;
		$y		=  0 ;
		$result		=  '' ;

		for  ( $i = 0 ; $i  <  $length ; $i ++ )
		   {
			$ord	=  ord ( $data [$i] ) ;
			$x	=  ( $x + 1 ) & 0xFF ;
			$y	=  ( $this -> State [$x] + $y ) & 0xFF ;

			$tx	=  $this -> State [$x] ;
			$ty	=  $this -> State [$y] ;

			$this -> State [$x]	=  $ty ;
			$this -> State [$y]	=  $tx ;

			$new_ord		=  $ord ^ $this -> State [ ( $tx + $ty ) & 0xFF ] ;
			$result		.=  chr ( $new_ord ) ;
		    }

		return ( $result ) ;
	    }
    }

    /*
static Guchar rc4DecryptByte(Guchar *state, Guchar *x, Guchar *y, Guchar c) {
  Guchar x1, y1, tx, ty;

  x1 = *x = (*x + 1) % 256;
  y1 = *y = (state[*x] + *y) % 256;
  tx = state[x1];
  ty = state[y1];
  state[x1] = ty;
  state[y1] = tx;
  return c ^ state[(tx + ty) % 256];
}
*/


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                                       FORM DATA MANAGEMENT                                       ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/


/*==============================================================================================================

    class PdfToTextFormDefinitions -
        Analyzes a template XML file that describes PDF form data and maps PDF field names to human-readable
	names.
	The GetFormData() returns an object containing the mapped properties with their respective values.

  ==============================================================================================================*/
class  PdftoTextFormDefinitions		// extends		Object
					implements	ArrayAccess, Countable, IteratorAggregate
   {
	static private	$ClassDefinitionCount	=  0 ;

	// Class name, as specified in the XML template
	protected	$ClassName ;
	// Form definitions (a template may contain several versions of the same for definition)
	protected	$Definitions ;
	// Form definitions coming from the PDF file
	protected	$PdfDefinitions ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Parses the supplied XML template.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $xml_data, $pdf_xml_data )
	   {
		// Get PDF XML form data definitions
		$this -> __get_pdf_form_definitions ( $pdf_xml_data ) ;

		// Create XML data from scratch, if none specified
		if  ( ! $xml_data  )
			$xml_data	=  $this -> __create_default_xml_data ( $this -> PdfDefinitions ) ;

		// Decode XML the hard way, without XSD
		$xml		=  simplexml_load_string ( $xml_data ) ;
		$root_entry	=  $xml -> getName ( ) ;
		$definitions	=  array ( ) ;
		$class_name	=  "PdfFormData" ;

		if  ( strcasecmp ( $root_entry, "forms" ) )
			error ( new PdfToTextFormException ( "Root entry must be <forms>, <$root_entry> was found." ) ) ;

		// Get the attribute values of the <forms> tag
		foreach  ( $xml -> attributes ( )  as  $attribute_name => $attribute_value )
		   {
			switch  ( strtolower ( $attribute_name ) )
			   {
				case	'class' :
					$class_name	=  ( string ) $attribute_value ;

					if  ( class_exists ( $class_name, false ) )
						error ( new PdfToTextFormException ( "Class \"$class_name\" specified in XML template already exists." ) ) ;

					break ;

				default :
					error ( new PdfToTextFormException ( "Invalid attribute \"$attribute_name\" in <forms> tag." ) ) ;
			    }
		    }

		// Don't know if it will be useful, but try to avoid class name collisions by appending a sequential number if necessary
		if  ( class_exists ( $class_name, false ) )
		   {
			self::$ClassDefinitionCount ++ ;
			$class_name	.=  '_' . self::$ClassDefinitionCount ;
		    }

		// Loop through each child <form> entry
		foreach  ( $xml -> children ( )  as  $child )
		   {
			$child_name	=  $child -> getName ( ) ;

			switch ( strtolower ( $child_name ) )
			   {
				case	'form' :
					$definitions []		=  new PdfToTextFormDefinition ( $class_name, $child, $this -> PdfDefinitions ) ;
					break ;

				default :
					error ( new PdfToTextFormException ( "Invalid tag <$child_name>." ) ) ;
			    }
		    }

		// Ensure that there is at least one form definition
		if  ( ! count ( $definitions ) )
			error ( new PdfToTextFormException ( "No <form> definition found." ) ) ;

		// Save to properties
		$this -> ClassName		=  $class_name ;
		$this -> Definitions		=  $definitions ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Internal methods.

	 *-------------------------------------------------------------------------------------------------------------*/

	// __get_pdf_form_definitions -
	//	Retrieves the form field definitions coming from the PDF file.
	private function  __get_pdf_form_definitions ( $pdf_data )
	   {
		preg_match_all ( '#(?P<field> <field .*? </field \s* >)#imsx', $pdf_data, $matches ) ;

		foreach  ( $matches [ 'field' ]  as  $field )
		   {
			$xml_field	=  simplexml_load_string ( $field ) ;

			foreach  ( $xml_field -> attributes ( )  as  $attribute_name => $attribute_value )
			   {
				switch  ( strtolower ( $attribute_name ) )
				   {
					case	'name' :
						$field_name		=  ( string ) $attribute_value ;

						if  ( isset ( $this -> PdfDefinitions [ $field_name ] ) )
							$this -> PdfDefinitions [ $field_name ] [ 'occurrences' ] ++ ;
						else
						   {
							$this -> PdfDefinitions [ $field_name ]		=  array
							   (
								'name'		=>  $field_name,
								'occurrences'	=>  1
							    ) ;
						    }

						break ;
				    }
			    }
		    }
	    }


	// __create_default_xml_data -
	//	When no XML template has been specified, creates a default one based of the form definitions located in the PDF file.
	private function  __create_default_xml_data ( $pdf_definitions )
	   {
		$result		=  "<forms>" . PHP_EOL .
				   "\t<form version=\"1.0\">" . PHP_EOL ;

		foreach  ( $pdf_definitions  as  $name => $field )
		   {
			$name		 =  str_replace ( '-', '_', $name ) ;		// Just in case of
			$result		.=  "\t\t<field name=\"$name\" form-field=\"$name\" type=\"string\"/>" . PHP_EOL ;
		    }

		$result		.=  "\t</form>" . PHP_EOL .
				    "</forms>" . PHP_EOL ;

		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Interfaces implementations to retrieve form definitions.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  count ( )
	   { return ( count ( $this - Definitions ) ) ; }


	public function  getIterator ( )
	   { return ( new ArrayIterator ( $this -> Definitions ) ) ; }


	public function  offsetExists ( $offset )
	   { return ( $offset  >=  0  &&  $offset  <  count ( $this -> Definitions ) ) ; }


	public function  offsetGet ( $offset )
	   { return ( $this -> Definitions [ $offset ] ) ; }


	public function  offsetSet ( $offset, $value )
	   { error ( new PdfToTextException ( "Unsupported operation." ) ) ; }


	public function  offsetunset ( $offset )
	   { error ( new PdfToTextException ( "Unsupported operation." ) ) ; }
    }


/*==============================================================================================================

    class PdfToTextFormDefinition -
        Holds the description of a form inside a form XML template.

  ==============================================================================================================*/
class  PdfToTextFormDefinition		// extends  Object
   {
	// Class of the object returned by GetFormData( )
	public		$ClassName ;

	// Form version
	public		$Version ;

	// Field definitions
	public		$FieldDefinitions		=  array ( ) ;

	// Field groups (ie, fields that are the results of the concatenation of several form fields)
	public		$Groups				=  array ( ) ;

	// Pdf field definitions
	public		$PdfDefinitions ;

	// Class definition in PHP, whose instance will be returned by GetFormData()
	private		$ClassDefinition		=  false ;

	// Direct access to field definitions either through their template name or PDF name
	private		$FieldDefinitionsByName		=  array ( ) ;
	private		$FieldDefinitionsByPdfName	=  array ( ) ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Analyze the contents of an XML template form definition.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $class_name, $form_definition, $pdf_definitions )
	   {
		$this -> ClassName		=  $class_name ;
		$this -> PdfDefinitions		=  $pdf_definitions ;
		$field_count			=  0 ;

		// Get <form> tag attributes
		foreach  ( $form_definition -> attributes ( )  as  $attribute_name => $attribute_value )
		   {
			switch ( strtolower ( $attribute_name ) )
			   {
				case	'version' :
					$this -> Version	=  ( string ) $attribute_value ;
					break ;

				default :
					error ( new PdfToTextFormException ( "Invalid attribute \"$attribute_name\" in <form> tag." ) ) ;
			    }
		    }

		// Loop through subtags
		foreach  ( $form_definition -> children ( )  as  $child )
		   {
			$tag_name	=  $child -> getName ( ) ;

			// Check subtags
			switch  ( strtolower ( $tag_name ) )
			   {
				// <group> :
				//	A group is used to create a property that is the concatenation of several existing properties.
				case	'group' :
					$fields		=  array ( ) ;
					$separator	=  '' ;
					$name		=  false ;

					// Loop through attribute names
					foreach  ( $child -> attributes ( )  as  $attribute_name => $attribute_value )
					   {
						switch  ( $attribute_name )
						   {
							// "name" attribute" :
							//	The name of the property, as it will appear in the output object.
							case	'name' :
								$name		=  PdfToTextFormDefinition::ValidatePhpName ( ( string ) $attribute_value ) ;
								break ;

							// "separator" attribute :
							//	Separator to be used when concatenating the underlying properties.
							case	'separator' :
								$separator	=  ( string ) $attribute_value ;
								break ;

							// "fields" :
							//	A list of comma-separated field names, whose values will be concatenated together
							//	using the specified separator.
							case	'fields' :
								$items		=  explode ( ',', ( string ) $attribute_value ) ;

								if  ( ! count ( $items ) )
									error ( new PdfToTextFormException ( "Empty \"fields\" attribute in <group> tag." ) ) ;

								foreach  ( $items  as  $item )
									$fields []	=  PdfToTextFormDefinition::ValidatePhpName ( $item ) ;

								break ;

							// Other attribute names : not allowed
							default :
								error ( new PdfToTextFormException ( "Invalid attribute \"$attribute_name\" in <group> tag." ) ) ;
						    }
					    }

					// Check that at least one field has been specified
					if  ( ! count ( $fields ) )
						error ( new PdfToTextFormException ( "Empty \"fields\" attribute in <group> tag." ) ) ;

					// Check that the mandatory property name has been specified
					if  ( ! $name )
						error ( new PdfToTextFormException ( "The \"name\" attribute is mandatory in <group> tag." ) ) ;

					// Add this new grouped property to the list of existing groups
					$this -> Groups []	=  array
					   (
						'name'		=>  $name,
						'separator'	=>  $separator,
						'fields'	=>  $fields
					    ) ;

					break ;

				// <field> :
				//	Field definition.
				case	'field' :
					$field_def							=  new PdfToTextFormFieldDefinition ( $child ) ;
					$this -> FieldDefinitions []					=  $field_def ;
					$this -> FieldDefinitionsByName [ $field_def -> Name ]		=
					$this -> FieldDefinitionsByPdfName [ $field_def -> PdfName ]	=  $field_count ;
					$field_count ++ ;
					break ;

				// Don't allow other attribute names
				default :
					error ( new PdfToTextFormException ( "Invalid tag <$tag_name> in <form> definition." ) ) ;
			    }
		    }

		// Check that everything is ok (ie, that there is no duplicate fields)
		$this -> __paranoid_checks ( ) ;
	    }


	    public function ValidatePhpName ( $name )
	   {
		$name	=  trim ( $name ) ;

		if  ( ! preg_match ( '/^ [a-z_][a-z0-9_]* $/ix', $name ) )
			error ( new PdfToTextFormException ( "Invalid PHP name \"$name\"." ) ) ;

		return ( $name ) ;
	    }

	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetClassDefinition - Returns the class definition for the urrent form.

	    PROTOTYPE
	        $def	=   $form_def -> GetClassDefinition ( ) ;

	    DESCRIPTION
	        Returns a string containing the PHP class definition that will contain the properties defined in the XML
		form template.

	    RETURN VALUE
	        Returns a string containing the PHP class definition for the current form.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetClassDefinition ( )
	   {
		// Return the existing definition, if this method has been called more than once
		if  ( $this -> ClassDefinition )
			return ( $this -> ClassDefinition ) ;

		$class_def	=  "// Class " . $this -> ClassName . " : " . $this -> Version . PHP_EOL .
				   "class {$this -> ClassName}\t\textends PdfToTextFormData" . PHP_EOL .
				   "   {" . PHP_EOL ;

		// Get the maximum width of constant and field names
		$max_width	=  0 ;

		foreach  ( $this -> FieldDefinitions  as  $def )
		   {
			$length1	=  strlen ( $def -> Name ) ;
			$length2	=  strlen ( $def -> PdfName ) ;

			if  ( $length1  >  $max_width  ||  $length2  >  $max_width )
				$max_width	=  max ( $length1, $length2 ) ;

			foreach  ( $def -> Constants  as  $constant )
			   {
				$length		=  strlen ( $constant [ 'name' ] ) ;

				if  ( $length  >  $max_width )
					$max_width	=  $length ;
			    }
		    }

		// First, write out the constant definitions
		$all_constants	=  array ( ) ;

		foreach  ( $this -> FieldDefinitions  as  $def )
		   {
			foreach  ( $def -> Constants  as  $constant )
			   {
				$name	=  $constant [ 'name' ] ;
				$value	=  $constant [ 'value' ] ;

				if  ( isset ( $all_constants [ $name ] ) )
				   {
					if  ( $all_constants [ $name ]  !=  $value )
						error ( new PdfToTextFormException ( "Constant \"$name\" is defined more than once with different values." ) ) ;
				    }
				else
				   {
					$all_constants [ $name ]	 =  $value ;

					if  ( ! is_numeric ( $value ) )
						$value		=  '"' . addslashes ( $value ) . '"' ;

					$class_def			.=  "\tconst\t" . str_pad ( $name, $max_width, " ", STR_PAD_RIGHT ) . "\t = $value ; " . PHP_EOL ;
				    }
			    }
		    }

		$class_def	.=  PHP_EOL . PHP_EOL ;

		// Then write property definitions
		foreach  ( $this -> FieldDefinitions  as  $def )
		   {
			$class_def	.=  "\t/** @formdata */" . PHP_EOL .
					     "\tprotected\t\t\${$def -> Name} ;" . PHP_EOL ;
		    }

		$class_def	.=  PHP_EOL . PHP_EOL ;

		// And finally, grouped properties
		foreach  ( $this -> Groups  as  $group )
		   {
			$class_def	.=  "\t/**" . PHP_EOL .
					    "\t\t@formdata" . PHP_EOL .
					    "\t\t@group(" . implode ( ',', $group [ 'fields' ] ) . ')' . PHP_EOL .
					    "\t\t@separator(" . str_replace ( ')', '\)', $group [ 'separator' ] ) . ')' . PHP_EOL .
					    "\t */" . PHP_EOL .
					    "\tprotected\t\t\${$group [ 'name' ]} ;" . PHP_EOL .PHP_EOL ;
		    }

		// Constructor
		$class_def	.=  PHP_EOL . PHP_EOL .
				    "\t// Class constructor" . PHP_EOL .
				    "\tpublic function  __construct ( )" . PHP_EOL .
				    "\t   {" . PHP_EOL .
				    "\t\tparent::__construct ( ) ;" . PHP_EOL .
				    "\t    }" . PHP_EOL ;

		$class_def	.=  "    }" . PHP_EOL ;

		// Save the definition, if a second call occurs
		$this -> ClassDefinition	=  $class_def ;

		// All done, return
		return ( $class_def ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetFormData - Returns a form data object containing properties mapped to the form data.

	    PROTOTYPE
	        $object		=  $form_def -> GetFormData ( $fields ) ;

	    DESCRIPTION
	        Returns an object containing properties mapped to actual form data.

	    PARAMETERS
	        $fields (array) -
	                An associative array whoses keys are the PDF form field names, and values their values as stored
			in the PDF file.

	    RETURN VALUE
	        Returns an object of the class, as defined by the template specified to PdfToTextFormDefinitions
		class constructor.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetFormData ( $fields = array ( ) )
	   {
		if  ( ! class_exists ( $this -> ClassName, false ) )
		   {
			$class_def	=  $this -> GetClassDefinition ( ) ;
			eval ( $class_def ) ;
		    }

		$class_name	=  $this -> ClassName ;
		$object		=  new  $class_name ( ) ;

		foreach  ( $fields  as  $name => $value )
		   {
			if  ( isset ( $this -> FieldDefinitionsByPdfName [ $name ] ) )
			   {
				$property		=  $this -> FieldDefinitions [ $this -> FieldDefinitionsByPdfName [ $name ] ] -> Name ;
				$object -> $property	=  $this -> __process_field_value ( $value ) ;
			    }
		    }

		return ( $object ) ;
	    }


	// __process_field_values -
	//	Translates html entities and removes carriage returns (which are apparently used for multiline field) to
	//	replace them with newlines.
	private function  __process_field_value ( $value )
	   {
		$value		=  html_entity_decode ( $value ) ;
		$result		=  '' ;

		for  ( $i = 0, $length = strlen ( $value ) ; $i  <  $length ; $i ++ )
		   {
			if  ( $value [$i]  !==  "\r" )
				$result		.=  $value [$i] ;
			else
			   {
				if  ( isset ( $value [ $i + 1 ] ) )
				   {
					if  ( $value [ $i + 1 ]  !==  "\n" )
						$result		.=  "\n" ;
				    }
				else
					$result		.=  "\n" ;
			    }
		    }

		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetformDataFromPdfObject - Same as GetFormData(), except that it operates on XML data.

	    PROTOTYPE
	        $object		=  $pdf -> GetFormDataFromPdfObject ( $pdf_data ) ;

	    DESCRIPTION
	        Behaves the same as GetFormData(), except that it takes as input the XML contents of a PDF object.

	    PARAMETERS
	        $pdf_data (string) -
	                XML data coming from the PDF file.

	    RETURN VALUE
	        Returns an object of the class, as defined by the template specified to PdfToTextFormDefinitions
		class constructor.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetFormDataFromPdfObject ( $pdf_data )
	   {
		// simplexml_ functions do not like tags that contain a colon - replace them with a dash
		$pdf_data	=  preg_replace ( '/(<[^:]+?)(:)/', '$1-', $pdf_data ) ;

		// Load the xml data
		$xml		=  simplexml_load_string ( $pdf_data ) ;

		// Get the form field values
		$fields		=  array ( ) ;

		$this -> __get_pdfform_data ( $fields, $xml ) ;

		// Return the object
		return ( $this -> GetFormData ( $fields ) ) ;
	    }


	// __getpdfform_data -
	//	Retrieve the form field values from the specified PDF object, specified as XML
	private function  __get_pdfform_data ( &$fields, $xml )
	   {
		$tag_name		=  $xml -> getName ( ) ;

		if  ( isset ( $this -> PdfDefinitions [ $tag_name ] ) )
			$fields [ $tag_name ]	=  ( string )  $xml ;
		else
		   {
			foreach  ( $xml -> children ( )  as  $child )
			   {
				$this -> __get_pdfform_data ( $fields, $child ) ;
			    }
		    }
	    }


	// __paranoid_checks -
	//	Checks for several kinds of inconsistencies in the supplied XML template.
	private function  __paranoid_checks ( )
	   {
		// Check that field names, PDF field names and constant names are unique
		$names			=  array ( ) ;
		$pdf_names		=  array ( ) ;
		$constant_names		=  array ( ) ;

		foreach  ( $this -> FieldDefinitions  as  $def )
		   {
			if  ( ! isset ( $this -> PdfDefinitions [ $def -> PdfName ] ) )
				error ( new PdfToTextFormException ( "Field \"{$def -> PdfName}\" is not defined in the PDF file." ) ) ;

			if  ( isset ( $names [ $def -> Name ] ) )
				error ( new PdfToTextFormException ( "Field \"{$def -> Name}\" is defined more than once." ) ) ;

			$names [ $def -> Name ]		=  true ;

			if  ( isset ( $pdf_names [ $def -> PdfName ] ) )
				error ( new PdfToTextFormException ( "PDF Field \"{$def -> PdfName}\" is referenced more than once." ) ) ;

			$pdf_names [ $def -> PdfName ]	=  true ;

			foreach  ( $def -> Constants  as  $constant )
			   {
				$constant_name		=  $constant [ 'name' ] ;

				if  ( isset ( $constant_names [ $constant_name ] )  &&  $constant_names [ $constant_name ]  !=  $constant [ 'value' ] )
					error ( new PdfToTextFormException ( "Constant \"$constant_name\" is defined more than once with different values." ) ) ;

				$constant_names [ $constant_name ]	=  $constant [ 'value' ] ;
			    }
		    }

		// Check that group names are unique and that the fields they are referencing exist
		$group_names	=  array ( ) ;

		foreach ( $this -> Groups  as  $group )
		   {
			if  ( isset ( $group_names [ $group [ 'name' ] ] ) )
				error ( new PdfToTextFormException ( "Group \"{$group [ 'name' ]}\" is defined more than once." ) ) ;

			if  ( isset ( $names [ $group [ 'name' ] ] ) )
				error ( new PdfToTextFormException ( "Group \"{$group [ 'name' ]}\" has the same name as an existing field." ) ) ;

			foreach ( $group [ 'fields' ]  as  $field_name )
			   {
				if  ( ! isset ( $names [ $field_name ] ) )
					error ( new PdfToTextFormException ( "Field \"$field_name\" of group \"{$group [ 'name' ]}\" does not exist." ) ) ;
			    }
		    }
	    }
    }


/*==============================================================================================================

    class PdfToTextFormFieldDefinition -
        Contains an XML template form field definition.

  ==============================================================================================================*/
class  PdfToTextFormFieldDefinition	// extends  Object
   {
	// Supported field types
	const		TYPE_STRING		=  1 ;			// String
	const		TYPE_CHOICE		=  2 ;			// Choice (must have <constant> subtags)

	// Official name (as it will appear in the class based on the XML template)
	public		$Name			=  false ;
	// Field name, as specified in the input PDF file
	public		$PdfName		=  false ;
	// Field type
	public		$Type			=  self::TYPE_STRING ;
	// Available constant values for this field when the "type" attribute has the value "choice"
	public		$Constants		=  array ( ) ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Builds the field definition object.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $field_node )
	   {
		// Loop through attributes
		foreach  ( $field_node -> attributes ( )  as  $attribute_name => $attribute_value )
		   {
			switch  ( strtolower ( $attribute_name ) )
			   {
				// "name" attribute :
				//	Specifies the field name as it will appear in the output class. Must be a valid PHP name.
				case	'name' :
					$this -> Name		=  PdfToTextFormDefinition::ValidatePhpName ( ( string ) $attribute_value ) ;
					break ;

				// "form-field" attribute :
				//	Corresponding field name in the input PDF form.
				case	'form-field' :
					$this -> PdfName	=  ( string ) $attribute_value ;
					break ;

				// "type" :
				//	Field type. Can be either :
				//	- "string" :
				//		The field value can be any type of string.
				//	- "choice" :
				//		The field value has one of the values defined by the <case> or <default> subtags.
				case	'type' :
					switch ( strtolower ( ( string ) $attribute_value ) )
					   {
						case	'string' :
							$this -> Type	=  self::TYPE_STRING ;
							break ;

						case	'choice' :
							$this -> Type	=  self::TYPE_CHOICE ;
							break ;

						default :
							error ( new PdfToTextFormException ( "Invalid value \"$attribute_value\" for the \"$attribute_name\" attribute of the <field> tag." ) ) ;
					    }
			    }
		    }

		// The "name" and "form-field" attributes are mandatory
		if  ( ! $this -> Name )
			error ( new PdfToTextFormException ( "The \"name\" attribute is mandatory for the <field> tag." ) ) ;

		if  ( ! $this -> PdfName )
			error ( new PdfToTextFormException ( "The \"form-field\" attribute is mandatory for the <field> tag." ) ) ;

		// For "type=choice" entries, we have to look for <case> or <default> subtags
		if  ( $this -> Type  ===  self::TYPE_CHOICE )
		   {
			foreach  ( $field_node -> children ( )  as  $child )
			   {
				$tag_name	=  $child -> getName ( ) ;
				$lcname		=  strtolower ( $tag_name ) ;
				$is_default	=  false ;

				switch ( $lcname )
				   {
					// Default value to be used when no PDF field value matches the defined constants
					case	'default' :
						$is_default		=  true ;

					// "case" attribute :
					//	Maps a value to  constant name that will be defined in the generated class.
					case	'case' :
						$constant_value		=  "" ;
						$constant_name		=  false ;

						// Retrieve attributes
						foreach  ( $child -> attributes ( )  as  $attribute_name => $attribute_value )
						   {
							switch  ( strtolower ( $attribute_name ) )
							   {
								// "value" attribute :
								//	PDF form field value.
								case	'value'	:
									$constant_value		=  ( string ) $attribute_value ;
									break ;

								// "constant" attribute :
								//	Associated constant.
								case	'constant' :
									$constant_name		=  PdfToTextFormDefinition::ValidatePhpName ( ( string ) $attribute_value ) ;
									break ;

								// Bail out if any unrecognized attribute has been specified
								default :
									error ( new PdfToTextFormException ( "Invalid tag <$tag_name> in <field> definition." ) ) ;
							    }
						    }

						// Each <case> entry must have a "constant" attribute
						if  ( $constant_value  ===  false  &&  ! $is_default )
							error ( new PdfToTextFormException ( "Missing constant value in <case> tag." ) ) ;

						if  ( $constant_name  ===  false )
							error ( new PdfToTextFormException ( "Attribute \"constant-name\" is required for <$tag_name> tag." ) ) ;

						// Add this to the list of existing constants
						$this -> Constants []	=  array
						   (
							'name'		=>  $constant_name,
							'value'		=>  $constant_value,
							'default'	=>  $is_default
						    ) ;

						break ;

					// Check for unrecognized tags
					default :
						error ( new PdfToTextFormException ( "Invalid tag <$tag_name> in <field> definition." ) ) ;
				    }
			    }
		    }
	    }
    }


/*==============================================================================================================

    class PdfToTextFormData -
        Base class for all Pdf form templates data.

  ==============================================================================================================*/
class  PdfToTextFormData		// extends  Object
   {
	// Doc comments provide information about form data fields (mainly to handle grouped field values)
	// The $__Properties array gives information about the form data fields themselves
	private		$__Properties	=  array ( ) ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Retrieve information about the derived class properties, which are specified by the derived class
		generated on the fly.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( )
	   {
		// Get class properties
		$reflection	=  new ReflectionClass ( $this ) ;
		$properties	=  $reflection -> getProperties ( ) ;

		// Loop through class properties
		foreach  ( $properties  as  $property )
		   {
			$propname	=  $property -> getName ( ) ;
			$doc_comment	=  $property -> getDocComment ( ) ;

			$fields		=  false ;
			$separator	=  false ;

			// A doc comment may indicate either :
			// - A form data field (@formdata)
			// - A grouped field ; in this case, we will have the following tags :
			//	. @formdata
			//	. @group(field_list) : list of fields grouped for this property
			//	. @separator(string) : a separator used when catenating grouped fields
			if  ( $doc_comment )
			   {
				// The @formdata tag must be present
				if  ( strpos ( $doc_comment, '@formdata' )  ===  false )
					continue ;

				// @group(fields) pattern
				if  ( preg_match ( '/group \s* \( \s* (?P<fields> [^)]+) \)/imsx', $doc_comment, $match ) )
				   {
					$items	=  explode ( ',', $match [ 'fields' ] ) ;
					$fields =  array ( ) ;

					foreach  ( $items  as  $item )
						$fields	[]	=  $item ;
				    }

				// @separator(string) pattern
				if  ( preg_match ( '/separator \s* \( \s* (?P<separator> ( (\\\)) | (.) )+  \) /imsx', $doc_comment, $match ) )
				   {
					$separator	=  stripslashes ( $match [ 'separator' ]) ;
				    }
			     }
			// Ignore non-formdata properties
			else
				continue ;

			// Property belongs to the form - add it to the list of available properties
			$this -> __Properties [ $propname ]	=  array
			   (
				'name'		=>  $propname,
				'fields'	=>  $fields,
				'separator'	=>  $separator
			    ) ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    __get -
		Returns the underlying property value for this PDF data field.
	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __get ( $member )
	   {
		if  ( ! isset ( $this -> __Properties [ $member ] ) )
			warning ( new PdfToTextFormException ( "Undefined property \"$member\"." ) ) ;

		return ( $this -> $member ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    __set -
		Sets the underlying property value for this PDF data field.
		When the property is a compound one, sets individual members as well.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __set  ( $member, $value )
	   {
		// Property exists : some special processing will be needed
		if  ( isset ( $this -> __Properties [ $member ] ) )
		   {
			$prop_entry	=  $this -> __Properties [ $member ] ;

			// Non-compound property
			if  ( ! $prop_entry [ 'fields' ] )
			   {
				$this -> $member	=  $value ;

				// However, we have to check that this property belongs to a compound property and change
				// the compound property valu accordingly
				foreach  ( $this -> __Properties  as  $name => $property )
				   {
					if  ( $property [ 'fields' ] )
					   {
						if  ( in_array ( $member, $property [ 'fields' ] ) )
						   {
							$values		=  array ( ) ;

							foreach  ( $property [ 'fields' ]  as  $value )
								$values	[]	=  $this -> $value ;

							// Change compound property value accordingly, using the specified separator
							$this -> $name	=  implode ( $property [ 'separator' ], $values ) ;
						    }
					    }
				    }
			    }
			// Compound property : we will have to explode it in separate parts, using the compound property separator,
			// then set individual property values
			else
			   {
				$values		=  explode ( $prop_entry [ 'separator' ], $value ) ;
				$value_count	=  count ( $values ) ;
				$field_count	=  count ( $prop_entry [ 'fields' ] ) ;

				if  ( $value_count  <  $field_count )
					error ( new PdfToTextFormException ( "Not enough value parts specified for the \"$member\" property ($value)." ) ) ;
				else if  ( $value_count  >  $field_count )
					error ( new PdfToTextFormException ( "Too much value parts specified for the \"$member\" property ($value)." ) ) ;

				$this -> $member	=  $value ;

				for  ( $i = 0 ; $i  <  $value_count ; $i ++ )
				   {
					$sub_member		=  $prop_entry [ 'fields' ] [$i] ;
					$this -> $sub_member	=  $values [$i] ;
				    }
			    }
		    }
		// Property does not exist : let PHP act as the default way
		else
			$this -> $member	=  $value ;
	    }
    }


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                                  CAPTURE DEFINITION MANAGEMENT                                   ******
 ******         (none of the classes listed here are meant to be instantiated outside this file)         ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/

/*==============================================================================================================

    class PdfToTextCaptureDefinitions -
        Holds text capture definitions, whose XML data has been supplied to the PdfToText::SetCapture() method.

  ==============================================================================================================*/
class  PdfToTextCaptureDefinitions	// extends		Object
					implements	ArrayAccess, Countable, Iterator
   {
	// Shape definitions - The actual objects populating this array depend on the definitions supplied
	// (rectangle, etc.)
	protected	$ShapeDefinitions		=  array ( ) ;

	// Shape field names - used for iteration
	private		$ShapeNames ;

	// Page count
	private		$PageCount			=  false ;


	/*--------------------------------------------------------------------------------------------------------------

	    CONSTRUCTOR -
		Analyzes the XML data defining the areas to be captured.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $xml_data )
	   {
		$xml		=  simplexml_load_string ( $xml_data ) ;
		$root_entry	=  $xml -> getName ( ) ;

		// Root tag must be <captures>
		if  ( strcasecmp ( $root_entry, "captures" ) )
			error ( new PdfToTextCaptureException ( "Root entry must be <captures>, <$root_entry> was found." ) ) ;

		// Process the child nodes
		foreach  ( $xml -> children ( )   as  $child )
		   {
			$tag_name		=  $child -> getName ( ) ;

			switch  ( strtolower ( $tag_name ) )
			   {
				// <rectangle> :
				//	An rectangle whose dimensions are given in the <page> subtags.
				case	'rectangle' :
					$shape_object	=  new PdfToTextCaptureRectangleDefinition ( $child ) ;
					break ;

				// <columns> :
				//	A definition of columns and their applicable pages.
				case	'lines' :
					$shape_object	=  new PdfToTextCaptureLinesDefinition ( $child ) ;
					break ;

				// Complain if an unknown tag is found
				default :
					error ( new PdfToTextCaptureException ( "Invalid tag <$tag_name> found in root tag <captures>." ) ) ;
			    }

			// Shape names must be unique within the definitinos
			if  ( isset ( $this -> ShapeDefinitions [ $shape_object -> Name ] ) )
				error ( new PdfToTextCaptureLinesDefinition ( "The shape named \"{$shape_object -> Name}\" has been defined more than once." ) ) ;
			else
				$this -> ShapeDefinitions [ $shape_object -> Name ]	=  $shape_object ;
		    }

		// Build an array of shape names for the iterator interface
		$this -> ShapeNames	=  array_keys ( $this -> ShapeDefinitions ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetCapturedObject - Creates an object reflecting the captured data.

	    PROTOTYPE
	        $captures	=  $capture_definitions -> GetCapturedObject ( $document_fragments ) ;

	    DESCRIPTION
	        Returns an object of type PdfToTextCapturedData,containing the data that has been captured, based on
		the capture definitions.

	    PARAMETERS
	        $document_fragments (type) -
	                Document text fragments collected during the text layout rendering process.

	    RETURN VALUE
	        An object of type PdfToTextCaptures, cntaining the captured data.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetCapturedObject ( $document_fragments )
	   {
		$captures	=  array ( ) ;

		foreach  ( $this -> ShapeDefinitions  as  $shape )
		   {
			$capture	=  $shape -> ExtractAreas ( $document_fragments ) ;

			foreach  ( $capture  as  $page => $items )
			   {
				$captures [ $page ] []	=  $items ;
			    }
		    }

		 $captured_object	=  new PdfToTextCaptures ( $captures ) ;

		 return ( $captured_object ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        SetPageCount - Defines the total number of pages in the document.

	    PROTOTYPE
	        $shape -> SetPageCount ( $count ) ;

	    DESCRIPTION
	        At the time when XML definitions are processed, the total number of pages in the document is not yet
		known. Moreover, page ranges or page numbers can be expressed relative to the last page of the
		document (for example : 1..$-1, which means "from the first page to the last page - 1).
		Setting the page count once it is known allows to process the expressions specified in the "number"
		attribute of the <pages> tag so that the expressions are transformed into actual page numbers.

	    PARAMETERS
	        $count (integer) -
	                Number of pages in the document.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  SetPageCount ( $count )
	   {
		$this -> PageCount	=  $count ;

		foreach  ( $this -> ShapeDefinitions  as  $def )
		   {
			$def ->  SetPageCount ( $count ) ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetNodeAttributes - Retrieves an XML node's attributes.

	    PROTOTYPE
	        $result		=  PdfToTextCaptureDefinitions::GetNodeAttributes ( $node, $attributes ) ;

	    DESCRIPTION
	        Retrieves the attributes defined for the specified XML node.

	    PARAMETERS
	        $node (SimpleXMLElement) -
	                Node whose attributes are to be extracted.

		$attributes (associative array) -
			Associative array whose keys are the attribute names and whose values define a boolean
			indicating whether the attribute is mandatory or not.

	    RETURN VALUE
	        Returns an associative whose key are the attribute names and whose values are the attribute values,
		specified as a string.
		For optional unspecified attributes, the value will be boolean false.

	    NOTES
	        The method throws an exception if the node contains an unknown attribute, or if a mandatory attribute
		is missing.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  GetNodeAttributes  ( $node, $attributes )
	   {
		$tag_name	=  $node -> getName ( ) ;

		// Build the initial value for the resulting array
		$result		=  array ( ) ;

		foreach  ( array_keys ( $attributes )  as  $name )
			$result	[ $name ]	=  false ;

		// Loop through node attributes
		foreach  ( $node -> attributes ( )  as  $attribute_name => $attribute_value )
		   {
			$attribute_name		=  strtolower ( $attribute_name ) ;

			// Check that the attributes exists ; if yes, add it to the resulting array
			if  ( isset ( $attributes [ $attribute_name ] ) )
				$result [ $attribute_name ]	=  ( string ) $attribute_value ;
			// Otherwise, throw an exception
			else
				error ( new PdfToTextCaptureLinesDefinition ( "Undefined attribute \"$attribute_name\" for node <$tag_name>." ) ) ;
		    }

		// Check that all mandatory attributes have been specified
		foreach  ( $attributes  as  $attribute_name => $mandatory )
		   {
			if  ( $mandatory  &&  $result [ $attribute_name ]  ===  false )
				error ( new PdfToTextCaptureLinesDefinition ( "Undefined attribute \"$attribute_name\" for node <$tag_name>." ) ) ;
		    }

		// All done, return
		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        GetBooleanAttribute - Returns a boolean value associated to a string.

	    PROTOTYPE
	        $bool	=  PdfToTextCaptureDefinitions::GetBooleanValue ( $value ) ;

	    DESCRIPTION
	        Returns a boolean value corresponding to a boolean specified as a string.

	    PARAMETERS
	        $value (string) -
	                A boolean value represented as a string.
			The strings 'true', 'yes', 'on' and '1' will be interpreted as boolean true.
			The strings 'false', 'no', 'off' and '0' will be interpreted as boolean false.

	    RETURN VALUE
	        The boolean value corresponding to the specified string.

	    NOTES
	        An exception is thrown if the supplied string is incorrect.

	 *-------------------------------------------------------------------------------------------------------------*/
	public static function  GetBooleanAttribute ( $value )
	   {
		$lcvalue	=  strtolower ( $value ) ;

		if  ( $lcvalue  ===  'true'  ||  $lcvalue  ===  'on'  ||  $lcvalue  === 'yes'  ||  $lcvalue  ===  '1'  ||  $value  ===  true )
			return ( true ) ;
		else if  ( $lcvalue  ===  'false'  ||  $lcvalue  ===  'off'  ||  $lcvalue  === 'no'  ||  $lcvalue  ===  '0'  ||  $value  ===  false )
			return(  false ) ;
		else
			error ( new PdfToTextCaptureLinesDefinition ( "Invalid boolean value \"$value\"." ) ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Interfaces implementations.

	 *-------------------------------------------------------------------------------------------------------------*/

	// Countable interface
	public function  count ( )
	   { return ( count ( $this -> ShapeDefinitions ) ) ; }


	// ArrayAccess interface
	public function  offsetExists ( $offset )
	   { return ( isset ( $this -> ShapeDefinitions [ $offset ] ) ) ; }


	public function  offsetGet ( $offset )
	   { return ( $this -> ShapeDefinitions [ $offset ] ) ; }


	public function  offsetSet ( $offset, $value )
	   { error ( new PdfToTextException ( "Unsupported operation" ) ) ; }


	public function  offsetunset ( $offset )
	   { error ( new PdfToTextException ( "Unsupported operation" ) ) ; }


	// Iterator interface -
	//	Iteration is made through shape names, which are supplied by the $ShapeNames property
	private		$__iterator_index	=  0 ;

	public function  rewind ( )
	   { $this -> __iterator_index = 0 ; }

	public function  valid ( )
	   { return ( $this -> __iterator_index  >=  0  &&  $this -> __iterator_index  <  count ( $this -> ShapeNames ) ) ; }

	public function  key ( )
	   { return ( $this -> ShapeNames [ $this -> __iterator_index ] ) ; }

	public function  next ( )
	   { $this -> __iterator_index ++ ; }

	public function  current ( )
	   { return ( $this -> ShapeDefinitions [ $this -> ShapeNames [ $this -> __iterator_index ] ] ) ; }
    }


/*==============================================================================================================

    class PdfToTextCaptureShapeDefinition -
        Base class for capturing shapes.

  ==============================================================================================================*/
abstract class  PdfToTextCaptureShapeDefinition		//extends  Object
   {
	const	SHAPE_RECTANGLE		=  1 ;
	const	SHAPE_COLUMN		=  2 ;
	const	SHAPE_LINE		=  3 ;

	// Capture name
	public		$Name ;
	// Capture type - one of the SHAPE_* constants, assigned by derived classes.
	public		$Type ;
	// Applicable pages for this capture
	public		$ApplicablePages ;
	// Areas per page for this shape
	public		$Areas		=  array ( ) ;
	// Separator used when multiple elements are covered by the same shape
	public		$Separator	=  " " ;


	/*--------------------------------------------------------------------------------------------------------------

	     Constructor -
		Initializes the base capture class.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $type )
	   {
		$this -> Type			=  $type ;
		$this -> ApplicablePages	=  new PdfToTextCaptureApplicablePages ( ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	     SetPageCount -
		Sets the page count, so that all the applicable pages can be determined.
		Derived classes can implement this function if some additional work is needed.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  SetPageCount ( $count )
	   {
		$this -> ApplicablePages -> SetPageCount ( $count ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	     GetFragmentData -
		Extracts data from a text fragment (text + coordinates).

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetFragmentData ( $fragment, &$text, &$left, &$top, &$right, &$bottom )
	   {
		$left		=  ( double ) $fragment [ 'x' ] ;
		$top		=  ( double ) $fragment [ 'y' ] ;
		$right		=  $left + ( double ) $fragment [ 'width' ] - 1 ;
		$bottom		=  $top  - ( double ) $fragment [ 'font-height' ] ;
		$text		=  $fragment [ 'text' ] ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	     GetAttributes -
		Retrieves the attributes of the given XML node. Processes the following attributes, which are common to
		all shapes :
		- Name
		- Separator

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetAttributes ( $node, $attributes  =  array ( ) )
	   {
		$attributes		=  array_merge ( $attributes, array ( 'name' => true, 'separator' => false ) ) ;
		$shape_attributes	=  PdfToTextCaptureDefinitions::GetNodeAttributes ( $node, $attributes ) ;
		$this -> Name		=  $shape_attributes [ 'name' ] ;

		if  ( $shape_attributes [ 'separator' ]  !==  false )
			$this -> Separator	=  PdfToText::Unescape ( $shape_attributes [ 'separator' ] ) ;

		return ( $shape_attributes ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	     ExtractAreas -
		Extracts text contents from the document fragments.

	 *-------------------------------------------------------------------------------------------------------------*/
	public abstract function  ExtractAreas ( $document_fragments ) ;
    }


/*==============================================================================================================

    class PdfToTextCaptureRectangleDefinition -
        A shape for capturing text in rectangle areas.

  ==============================================================================================================*/
class	PdfToTextCaptureRectangleDefinition		extends  PdfToTextCaptureShapeDefinition
   {
	/*--------------------------------------------------------------------------------------------------------------

	    CONSTRUCTOR -
		Analyzes the contents of a <rectangle> XML node, which contains <page> child node giving the
		applicable pages and the rectangle dimensions.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct  ( $node )
	   {
		parent::__construct ( self::SHAPE_RECTANGLE ) ;

		$this -> GetAttributes ( $node ) ;

		// Loop through node's children
		foreach  ( $node -> children ( )  as  $child )
		   {
			$tag_name	=  $child -> getName ( ) ;

			switch  ( strtolower ( $tag_name ) )
			   {
				// <page> tag : applicable page(s)
				case	'page' :
					// Retrieve the specified attributes
					$page_attributes	=  PdfToTextCaptureDefinitions::GetNodeAttributes
					   (
						$child,
						array
						   (
							'number'	=>  true,
							'left'		=>  true,
							'right'		=>  false,
							'top'		=>  true,
							'bottom'	=>  false,
							'width'		=>  false,
							'height'	=>  false
						    )
					    ) ;

					$page_number	=  $page_attributes [ 'number' ] ;

					// Add this page to the list of applicable pages for this shape
					$this -> ApplicablePages -> Add ( $page_number, $page_attributes ) ;

					break ;

				// Other tag : throw an exception
				default :
					error ( new PdfToTextCaptureException ( "Invalid tag <$tag_name> found in root tag <rectangle>." ) ) ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	     ExtractAreas -
		Extracts text contents from the document fragments.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  ExtractAreas ( $document_fragments )
	   {
		$result		=  array ( ) ;

		// Loop through document fragments
		foreach  ( $document_fragments  as  $page  =>  $page_contents )
		   {
			$fragments		=  $page_contents [ 'fragments' ] ;

			// Ignore pages that are not applicable
			if  ( ! isset ( $this -> ApplicablePages -> PageMap [ $page ] ) )
				continue ;

			// Loop through each text fragment of the page
			foreach  ( $fragments  as  $fragment )
			   {
				$this -> GetFragmentData ( $fragment, $text, $left, $top, $right, $bottom ) ;

				// Only handle text fragments that are within the specified area
				if  ( $this -> Areas [ $page ] -> Contains ( $left, $top, $right, $bottom ) )
				   {
					// Normally, rectangle shapes are used to capture a single line...
					if  ( ! isset ( $result [ $page ] ) )
						$result [ $page ]	=  new PdfToTextCapturedRectangle ( $page, $this -> Name, $text, $left, $top, $right, $bottom, $this ) ;
					// ... but you can also use them to capture multiple lines ; in this case, the "separator" attribute of the <rectangle> tag will
					// be used to separate items
					else
					   {
						$existing_area	=  $result [ $page ] ;

						$existing_area -> Top			=  max ( $existing_area -> Top   , $top    ) ;
						$existing_area -> Bottom		=  min ( $existing_area -> Bottom, $bottom ) ;
						$existing_area -> Left			=  min ( $existing_area -> Left  , $left   ) ;
						$existing_area -> Right			=  max ( $existing_area -> Right , $right  ) ;
						$existing_area -> Text		       .=  $this -> Separator . $text ;
					    }
				    }
			    }
		    }


		// Provide empty values for pages which did not capture a rectangle shape
		$added_missing_pages	=  false ;

		foreach  ( $this -> ApplicablePages  as  $page => $applicable )
		   {
			if  ( ! isset ( $result [ $page ] ) )
			   {
				$result [ $page ]	=  new PdfToTextCapturedRectangle ( $page, $this -> Name, '', 0, 0, 0, 0, $this ) ;
				$added_missing_pages	=  true ;
			    }
		    }

		if  ( $added_missing_pages )	// Sort by page number if empty values were added
			ksort ( $result ) ;

		// All done, return
		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	     SetPageCount -
		Ensures that an Area is created for each related page.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  SetPageCount ( $count )
	   {
		parent::SetPageCount ( $count ) ;

		// Create a rectangle area for each page concerned - this can only be done when the number of pages is known
		// (and the ApplicablePages object updated accordingly)
		foreach  ( $this -> ApplicablePages -> ExtraPageMapData  as  $page => $data )
			$this -> Areas [ $page ]		=  new PdfToTextCaptureArea ( $data ) ;
	    }
    }


/*==============================================================================================================

    class PdfToTextCaptureLinesDefinition -
        A shape for capturing text in rectangle areas.

  ==============================================================================================================*/
class	PdfToTextCaptureLinesDefinition		extends  PdfToTextCaptureShapeDefinition
   {
	// Column areas
	public		$Columns		=  array ( ) ;
	// Top and bottom lines
	public		$Tops			=  array ( ) ;
	public		$Bottoms		=  array ( ) ;
	// Column names
	private		$ColumnNames		=  array ( ) ;


	/*--------------------------------------------------------------------------------------------------------------

	    CONSTRUCTOR -
		Analyzes the contents of a <columns> XML node, which contains <page> nodes giving a part of the column
		dimensions, and <column> nodes which specify the name of the column and the remaining coordinates,
		such as "left" or "width"

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct  ( $node )
	   {
		parent::__construct ( self::SHAPE_COLUMN ) ;

		$shape_attributes	=  $this -> GetAttributes ( $node, array ( 'default' => false ) ) ;
		$column_default		=  ( $shape_attributes [ 'default' ] )  ?  $shape_attributes [ 'default' ] : '' ;

		// Loop through node's children
		foreach  ( $node -> children ( )  as  $child )
		   {
			$tag_name	=  $child -> getName ( ) ;

			switch  ( strtolower ( $tag_name ) )
			   {
				// <page> tag
				case	'page' :
					// Retrieve the specified attributes
					$page_attributes	=  PdfToTextCaptureDefinitions::GetNodeAttributes
					   (
						$child,
						array
						   (
							'number'	=>  true,
							'top'		=>  true,
							'height'	=>  true,
							'bottom'	=>  false
						    )
					    ) ;

					// We have to store the y-coordinate of the first and last lines, to determine until which
					// position we have to check for column contents.
					// The "top" and "bottom" attributes of the <page> tag actually determine the top and bottom
					// y-coordinates where to search for columns. However, we will have to rename the "bottom"
					// attribute to "column-bottom", in order for it not to be mistaken with actual column rectangle
					// (only the "height" attribute of the <page> tag gives the height of a line)
					$page_attributes [ 'column-top' ]	=  $page_attributes [ 'top' ] ;
					$page_attributes [ 'column-bottom' ]	=  ( double ) $page_attributes [ 'bottom' ] ;
					unset ( $page_attributes [ 'bottom' ] ) ;

					// Add this page to the list of applicable pages for this shape
					$this -> ApplicablePages -> Add ( $page_attributes [ 'number' ], $page_attributes ) ;

					break ;

				// <column> tag :
				case	'column' :
					$column_attributes	=  PdfToTextCaptureDefinitions::GetNodeAttributes
					   (
						$child,
						array
						   (
							'name'		=>  true,
							'left'		=>  false,
							'right'		=>  false,
							'width'		=>  false,
							'default'	=>  false
						    )
					    ) ;

					$column_name		=  $column_attributes [ 'name' ] ;

					// Build the final default value, if any one is specified ; the following special constructs are processed :
					// - "%c" :
					//	Replaced by the column name.
					// - "%n" :
					//	Replaced by the column index (starting from zero).
					if  ( ! $column_attributes [ 'default' ] )
						$column_attributes [ 'default' ]	=  $column_default ;

					$substitutes		=  array
					   (
						'%c'		=>  $column_name,
						'%n'		=>  count ( $this -> Columns )
					    ) ;

					$column_attributes [ 'default' ]	=  str_replace
					   (
						array_keys ( $substitutes ),
						array_values ( $substitutes ),
						$column_attributes [ 'default' ]
					    ) ;

					// Add the column definition to this object
					if  ( ! isset ( $this -> Columns [ $column_name ] ) )
					   {
						$this -> Columns [ $column_attributes [ 'name' ] ]	=  $column_attributes ;
						$this -> ColumnNames []					=  $column_attributes [ 'name' ] ;
					    }
					else
						error ( new PdfToTextCaptureException ( "Column \"$column_name\" is defined more than once." ) ) ;

					break ;

				// Other tag : throw an exception
				default :
					error ( new PdfToTextCaptureException ( "Invalid tag <$tag_name> found in root tag <rectangle>." ) ) ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	     ExtractAreas -
		Extracts text contents from the document fragments.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  ExtractAreas ( $document_fragments )
	   {
		$result		=  array ( ) ;

		// Loop through each page of document fragments
		foreach  ( $document_fragments  as  $page  =>  $page_contents )
		   {
			$fragments		=  $page_contents [ 'fragments' ] ;

			// Ignore this page if not included in the <columns> definition
			if  ( ! isset ( $this -> ApplicablePages -> PageMap [ $page ] ) )
				continue ;

			// <columns> definition only gives the location of the first line of each column, together
			// with its height.
			// We will build as many new column areas as can fit on one page
			$this_page_areas	=  $this -> Areas [ $page ] ;
			$column_areas		=  array ( ) ;

			for  ( $i = 0, $count = count ( $this_page_areas ) ; $i  <  $count ; $i ++ )
			   {
				// For now, duplicate the existing column areas - they will represent the 1st line of columns
				$this_page_area		=  $this_page_areas [$i] ;
				$new_area		=  clone ( $this_page_area ) ;
				$column_areas [0] []	=  $new_area ;
				$line_height		=  $new_area -> Height ;
				$current_top		=  $new_area -> Top - $line_height ;
				$current_line		=  0 ;

				// Then build new column areas for each successive lines
				while  ( $current_top - $line_height  >=  0 )
				   {
					$current_line ++ ;
					$new_area				 =  clone ( $new_area ) ;
					$new_area -> Top			-=  $line_height ;
					$new_area -> Bottom			-=  $line_height ;

					$column_areas [ $current_line ]	[]	 =  $new_area ;
					$current_top				-=  $line_height ;
				    }
			    }

			// Now extract the columns, line per line, from the current page's text fragments
			$found_lines		=  array ( ) ;

			foreach  ( $fragments  as  $fragment )
			   {
				$this -> GetFragmentData ( $fragment, $text, $left, $top, $right, $bottom ) ;

				// Loop through each line of column areas, built from the above step
				foreach ( $column_areas  as  $line => $column_areas_per_name )
				    {
					$index	=  0 ;			// Column index

					// Process each column area
					foreach  ( $column_areas_per_name  as  $column_area )
					   {
						// ... but only do something if the current column area is contained in the current fragment
						if  ( $column_area -> Contains ( $left, $top, $right, $bottom ) )
						   {
							// The normal usage will be to capture one-line columns...
							if  ( ! isset ( $found_lines [ $line ] [ $column_area -> Name ] ) )
							   {
								$found_lines [ $line ] [ $column_area -> Name ]	=
									new PdfToTextCapturedColumn ( $page, $column_area -> Name, $text,
										$left, $top, $right, $bottom, $this ) ;
							    }
							// ... but you can also use them to capture multiple lines ; in this case, the "separator" attribute of the <lines> or
							// <column> tag will be used to separate items
							else
							   {
								$existing_area	=  $found_lines [ $line ] [ $column_area -> Name ] ;

								$existing_area -> Top			=  max ( $existing_area -> Top   , $column_area -> Top    ) ;
								$existing_area -> Bottom		=  min ( $existing_area -> Bottom, $column_area -> Bottom ) ;
								$existing_area -> Left			=  min ( $existing_area -> Left  , $column_area -> Left   ) ;
								$existing_area -> Right			=  max ( $existing_area -> Right , $column_area -> Right  ) ;
								$existing_area -> Text		       .=  $this -> Separator . $text ;
							    }
						    }

						$index ++ ;
					    }
				     }
			    }

			// A final pass to provide default values for empty columns (usually, column values that are not represented in the PDF file)
			// Also get the surrounding box for the whole line
			$final_lines		=  array ( ) ;

			foreach  ( $found_lines  as  $line => $columns_line )
			   {
				foreach  ( $this -> ColumnNames  as  $column_name )
				   {
					if  ( ! isset ( $columns_line [ $column_name ] ) )
					   {
						$columns_line [ $column_name ]	=
							new PdfToTextCapturedColumn ( $page, $column_name, $this -> Columns [ $column_name ] [ 'default' ], 0, 0, 0, 0, $this ) ;
					    }
				    }

				// Get the (left,top) coordinates of the line
				$line_left	=  $found_lines [ $line ] [ $this -> ColumnNames [0] ] -> Left ;
				$line_top	=  $found_lines [ $line ] [ $this -> ColumnNames [0] ] -> Top ;

				// Get the (right,bottom) coordinates - we have to find the last column whose value is not a default value
				// (and therefore, has a non-zero Right coordinate)
				$last		=  count ( $this -> ColumnNames ) - 1 ;
				$line_right	=  0 ;
				$line_bottom	=  0 ;

				while  ( $last  >=  0  &&  ! $columns_line [ $this -> ColumnNames [ $last ] ] -> Right )
					$last -- ;

				if  ( $last  >  0 )
				   {
					$line_right	=  $columns_line [ $this -> ColumnNames [ $last ] ] -> Right ;
					$line_bottom	=  $columns_line [ $this -> ColumnNames [ $last ] ] -> Bottom ;
				    }

				// Create a CaptureLine entry
				$final_lines []	=  new PdfToTextCapturedLine ( $page, $this -> Name, $columns_line, $line_left, $line_top, $line_right, $line_bottom, $this ) ;
			    }

			// The result for this page will be a CapturedLines object
			$result [ $page ]	=  new  PdfToTextCapturedLines ( $this -> Name, $page, $final_lines ) ;
		    }

		// All done, return
		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	     SetPageCount -
		Extracts text contents from the document fragments.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  SetPageCount ( $count )
	   {
		parent::SetPageCount ( $count ) ;

		foreach  ( $this -> ApplicablePages  as $page => $applicable )
		   {
			if  ( ! $applicable )
				continue ;

			foreach  ( $this -> Columns  as  $column )
			   {
				if  ( ! isset ( $this -> Tops [ $page ] ) )
				   {
					$this -> Tops    [ $page ]		=  ( double ) $this -> ApplicablePages -> ExtraPageMapData [ $page ] [ 'column-top' ] ;
					$this -> Bottoms [ $page ]		=  ( double ) $this -> ApplicablePages -> ExtraPageMapData [ $page ] [ 'column-bottom' ] ;
				    }

				$area	=  new PdfToTextCaptureArea ( $column, $this -> ApplicablePages -> ExtraPageMapData [ $page ], $column [ 'name' ] ) ;

				$this -> Areas [ $page ] []	=  $area ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Support functions.

	 *-------------------------------------------------------------------------------------------------------------*/
    }


/*==============================================================================================================

    class PdfToTextCaptureApplicablePages -
        Holds a list of applicable pages given by the "number" attribute of <page> tags.

  ==============================================================================================================*/
class  PdfToTextCaptureApplicablePages		//extends		Object
						implements	ArrayAccess, Countable, Iterator
   {
	// Ranges of pages, as given by the "number" attribute of the <page> tag. Since a page number expression
	// can refer to the last page ("$"), and the total number of pages in the document is not yet known at the
	// time of object instantiation, we have to store all the page ranges as is.
	protected	$PageRanges		=  array ( ) ;

	// Once the SetPageCount() method has been called (ie, once the total number of pages in the document is
	// known), then a PageMap is built ; each key is the page number, indicating whether the page applies or not.
	public		$PageMap		=  array ( ) ;

	// Extra data associated, this time, with each page in PageMap
	public		$ExtraPageMapData	=  array ( ) ;

	// Page count - set by the SetPageCount() method
	public		$PageCount		=  false ;


	/*--------------------------------------------------------------------------------------------------------------

	    CONSTRUCTOR
	        Initializes the object.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( )
	   {
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        Add - Add a page number(s) definition.

	    PROTOTYPE
	        $applicable_pages -> Add ( $page_number ) ;

	    DESCRIPTION
	        Add the page number(s) specified by the "number" attribute of the <pages> tag to the list of applicable
		pages.

	    PARAMETERS
	        $page_number (string) -
	                A string defining which pages are applicable. This can be a single page number :

				<page number="1" .../>

			or a comma-separated list of pages :

				<page number="1, 2, 10" .../>

			or range(s) of pages :

				<page number="1..10, 12..20" .../>

			The special "$" character means "last page" ; thus the following example :

				<page number="1, $-9..$" .../>

			means : "applicable pages are 1, plus the last ten pages f the document".

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  Add ( $page_number, $extra_data = false )
	   {
		$this -> __parse_page_numbers ( $page_number, $extra_data ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        SetPageCount - Sets the total number of pages in the document.

	    PROTOTYPE
	        $applicable_pages -> SetPageCount ( $count ) ;

	    DESCRIPTION
	        Sets the total number of pages in the document and builds a map of which pages are applicable or not.

	    PARAMETERS
	        $count (integer) -
	                Total number of pages in the document.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  SetPageCount ( $count )
	   {
		$this -> PageCount		=  $count ;
		$this -> PageMap		=  array ( ) ;

		// Loop through the page ranges - every single value in the ranges has been converted to an integer ;
		// the other ones, built as expressions (using "$" for example) are processed here to give the actual
		// page number
		foreach  ( $this -> PageRanges  as  $range )
		   {
			$low		=  $range [0] ;
			$high		=  $range [1] ;

			// Translate expression to an actual value for the low and high parts of the range, if not already integers
			if  ( ! is_integer ( $low ) )
				$low	=  $this -> __check_expression ( $low, $count ) ;

			if  ( ! is_integer ( $high ) )
				$high	=  $this -> __check_expression ( $high, $count ) ;

			// Expressions using "$" may lead to negative values - adjust them
			if  ( $low  <  1 )
			   {
				if  ( $high  <  1 )
					$high	=  1 ;

				$low	=  1 ;
			    }

			// Check that the range is consistent
			if  ( $low  >  $high )
				error ( new PdfToTextCaptureException ( "Low value ($low) must be less or equal to high value ($high) " .
						"in page range specification \"{$range [0]}..{$range [1]}\"." ) ) ;

			// Ignore ranges where the 'low' value is higher than the number of pages in the document
			if  ( $low  >  $count )
			    {
				warning ( new PdfToTextCaptureException ( "Low value ($low) is greater than page count ($count) " .
						"in page range specification \"{$range [0]}..{$range [1]}\"." ) ) ;
				continue ;
			     }

			// Normalize the 'high' value, so that it's not bigger than the number of pages in the document
			if  ( $high  >  $count )
				$high	=  $count ;

			// Complement the page map using this range
			for  ( $i = $low ; $i  <=  $high ; $i ++ )
			   {
				$this -> PageMap [$i]		=  true ;
				$this -> ExtraPageMapData [$i]	=  $range [2] ;
			    }
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Interfaces implementations.

	 *-------------------------------------------------------------------------------------------------------------*/

	// Countable interface
	public function  count ( )
	   { return ( count ( $this -> PageMap ) ) ; }


	// Array access interface
	public function  offsetExists ( $offset )
	   { return ( isset ( $this -> PageMap [ $offset ] ) ) ; }


	public function  offsetGet ( $offset )
	   { return ( ( isset ( $this -> PageMap [ $offset ] ) ) ?  true : false ) ; }


	public function  offsetSet ( $offset, $value )
	   { error ( new PdfToTextException ( "Unsupported operation" ) ) ; }


	public function  offsetunset ( $offset )
	   { error ( new PdfToTextException ( "Unsupported operation" ) ) ; }


	// Iterator interface
	private		$__iterator_value	=  1 ;

	public function   rewind ( )
	   { $this -> __iterator_value = 1 ; }


	public function  valid ( )
	   { return ( $this -> __iterator_value  >=  1  &&  $this -> __iterator_value  <=  $this -> PageCount ) ; }


	public function  key ( )
	   { return ( $this -> __iterator_value ) ; }


	public function  next ( )
	   { $this -> __iterator_value ++ ; }


	public function  current ( )
	   { return ( ( isset ( $this -> PageMap [ $this -> __iterator_value ] ) ) ?  true : false ) ; }


	/*--------------------------------------------------------------------------------------------------------------

		Helper functions.

	 *-------------------------------------------------------------------------------------------------------------*/

	// __parse_page_numbers -
	//	Performs a first pass on the value of the "number" attribute of the <page> tag. Transforms range expressions
	//	when possible to integers ; keep the expression string intact when either the low or high value of a range
	//	is itself an expression, probably using the "$" (page count) character.
	private function  __parse_page_numbers ( $text, $extra_data )
	   {
		$ranges		=  explode ( ',', $text ) ;

		// Loop through comma-separated ranges
		foreach  ( $ranges  as  $range )
		   {
			$items		=  explode ( '..', $range ) ;

			// Check if current item is a range
			switch  ( count ( $items ) )
			   {
				// If not a range (ie, a single value) then make a range using that value
				// (low and high range values will be the same)
				case	1 :
					if  ( is_numeric ( $items [0] ) )
						$low	=  $high	=  ( integer ) $items [0] ;
					else
						$low	=  $high	=  trim ( $items [0] ) ;

					break ;

				// If range, store the low and high values
				case	2 :
					$low	=  ( is_numeric ( $items [0] ) ) ?  ( integer ) $items [0] : trim ( $items [0] ) ;
					$high	=  ( is_numeric ( $items [1] ) ) ?  ( integer ) $items [1] : trim ( $items [1] ) ;
					break ;

				// Other cases : throw an exception
				default :
					error ( new PdfToTextCaptureException ( "Invalid page range specification \"$range\"." ) ) ;
			    }

			// If the low or high range value is an expression, check at this stage that it is correct
			if  ( is_string ( $low )  &&  $this -> __check_expression ( $low )  ===  false )
				error ( new PdfToTextCaptureException ( "Invalid expression \"$low\" in page range specification \"$range\"." ) ) ;

			if  ( is_string ( $high )  &&  $this -> __check_expression ( $high )  ===  false )
				error ( new PdfToTextCaptureException ( "Invalid expression \"$high\" in page range specification \"$range\"." ) ) ;

			// Add the page range and the extra data
			$this -> PageRanges []	=  array ( $low, $high, $extra_data ) ;
		    }
	    }


	// __check_expression -
	//	Checks that a syntactically correct
	private function  __check_expression ( $str, $count = 1 )
	    {
		$new_str	=  str_replace ( '$', $count, $str ) ;
		$value		=  @eval ( "return ( $new_str ) ;" ) ;

		return ( $value ) ;
	     }
    }


/*==============================================================================================================

    class PdfToTextCaptureArea -
        A capture area describes a rectangle, either by its top, left, right and bottom coordinates, or by
	its top/left coordinates, and its width and height.

  ==============================================================================================================*/
class  PdfToTextCaptureArea	//extends  Object
   {
	// List of authorzed keyword for defining the rectangle dimensions
	static private		$Keys		=  array ( 'left', 'top', 'right', 'bottom', 'width', 'height' ) ;

	// Rectangle dimensions
	private			$Left		=  false,
				$Top		=  false,
				$Right		=  false,
				$Bottom		=  false ;

	// Area name (for internal purposes)
	public			$Name ;


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        Constructor

	    PROTOTYPE
	        $area	=  new PdfToTextCaptureArea ( $area, $default_area = null, $name = '' ) ;

	    DESCRIPTION
	        Initialize an area (a rectangle) using the supplied coordinates

	    PARAMETERS
	        $area (array) -
	                An associative array that may contain the following entries :

			- 'left' (double) :
				Left x-coordinate (mandatory).

			- 'top' (double) :
				Top y-coordinate (mandatory).

			- 'right (double) :
				Right x-coordinate.

			- 'bottom' (double) :
				Bottom y-coordinate.

			- 'width' (double) :
				Width of the rectangle, starting from 'left'.

			- 'height' (double) :
				Height of the rectangle, starting from 'top'.

			Either the 'right' or 'width' entries must be specified. This is the same for the 'bottom' and
			'height' entries.

		$default_area (array) -
			An array that can be used to supply default values when absent from $area.

		$name (string) -
			An optional name for this area. This information is not used by the class.

	    NOTES
	        Coordinate (0,0) is located at the left bottom of the page.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $area, $default_area = null, $name = '' )
	   {
		$left		=
		$top		=
		$right		=
		$bottom		=
		$width		=
		$height		=  false ;

		// Retrieve each entry that allows to specify a coordinate component, using $default_area if needed
		foreach  ( self::$Keys  as  $key )
		   {
			if  ( isset ( $area [ $key ] ) )
			   {
				if  ( $area [ $key ]  ===  false )
				   {
					if  ( isset ( $default_area [ $key ] ) )
						$$key	=  $default_area [ $key ] ;
					else
						$$key	=  false ;
				    }
				else
					$$key	=  $area [ $key ] ;
			    }
			else if  ( isset ( $default_area [ $key ] ) )
				$$key	=  $default_area [ $key ] ;
		    }

		// Check for mandatory coordinates
		if  ( $left  ===  false )
			error ( new PdfToTextCaptureException ( "Attribute \"left\" is mandatory." ) );
		else
			$left	=  ( double ) $left ;

		if  ( $top  ===  false )
			error ( new PdfToTextCaptureException ( "Attribute \"top\" is mandatory." ) ) ;
		else
			$top	=  ( double ) $top ;

		// Either the 'right' or 'width' entries are required
		if  ( $right  ===  false )
		   {
			if  ( $width  ===  false )
				error ( new PdfToTextCaptureException ( "Either the \"right\" or the \"width\" attribute must be specified." ) ) ;
			else
				$right		=  $left + ( double ) $width - 1 ;
		    }
		else
			$right	=  ( double ) $right ;

		// Same for 'bottom' and 'height'
		if  ( $bottom  ===  false )
		   {
			if  ( $height  ===  false )
				error ( new PdfToTextCaptureException ( "Either the \"bottom\" or the \"height\" attribute must be specified." ) ) ;
			else
				$bottom		=  $top - ( double ) $height + 1 ;
		    }
		else
			$bottom	=  ( double ) $bottom ;

		// All done, we have the coordinates we wanted
		$this -> Left		=  $left ;
		$this -> Right		=  $right ;
		$this -> Top		=  $top ;
		$this -> Bottom		=  $bottom ;

		$this -> Name		=  $name ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        __get, __set - Implement the Width and Height properties.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __get ( $member )
	   {
		switch  ( $member )
		   {
			case	'Left'		:
			case	'Top'		:
			case	'Right'		:
			case	'Bottom'	:
				return ( $this -> $member ) ;

			case	'Width'		:
				return ( $this -> Right - $this -> Left + 1 ) ;

			case	'Height'	:
				return ( $this -> Top - $this -> Bottom + 1 ) ;

			default :
				trigger_error ( "Undefined property \"$member\"." ) ;
		    }
	    }


	public function  __set ( $member, $value )
	   {
		$value		=  ( double ) $value ;

		switch  ( $member )
		   {
			case	'Top'		:
			case	'Left'		:
			case	'Right'		:
			case	'Bottom'	:
				$this -> $member	=  $value ;
				break ;

			case	'Width'		:
				$this -> Right		=  $this -> Left + $value - 1 ;
				break ;

			case	'Height'	:
				$this -> Bottom		=  $this -> Top - $value + 1 ;
				break ;

			default :
				trigger_error ( "Undefined property \"$member\"." ) ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    NAME
	        Contains - Check if this area contains the specified rectangle.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  Contains ( $left, $top, $right, $bottom )
	   {
		if  ( $left  >=  $this -> Left  &&  $right  <=  $this -> Right  &&
				$top  <=  $this -> Top  &&  $bottom  >=  $this -> Bottom )
			return ( true ) ;
		else
			return ( false ) ;
	    }
    }


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                                     CAPTURED TEXT MANAGEMENT                                     ******
 ******         (none of the classes listed here are meant to be instantiated outside this file)         ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/

 /*==============================================================================================================

     class PdfToTextCapturedText -
         Base class for captured text enclosed by shapes.

   ==============================================================================================================*/
 abstract class  PdfToTextCapturedText		//extends  Object
    {
	// Shape name (as specified by the "name" attribute of the <rectangle> or <lines> tags, for example)
	public		$Name ;
	// Number of the page where the text was found (starts from 1)
	public		$Page ;
	// Shape type (one of the PfToTextCaptureShape::SHAPE_* constants)
	public		$Type ;
	// Shape definition object (not really used, but in case of...)
	private		$ShapeDefinition ;
	// Captured text
	public		$Text ;
	// Surrounding rectangle in the PDF file
	public		$Left,
			$Top,
			$Right,
			$Bottom ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Initializes a captured text object, whatever the original shape.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct  ( $page, $name, $text, $left, $top, $right, $bottom, $definition )
	   {
		$this -> Name			=  $name ;
		$this -> Page			=  $page ;
		$this -> ShapeDefinition	=  $definition ;
		$this -> Text			=  $text ;
		$this -> Left			=  $left ;
		$this -> Top			=  $top ;
		$this -> Right			=  $right ;
		$this -> Bottom			=  $bottom ;
		$this -> Type			=  $definition -> Type ;
	    }
     }


 /*==============================================================================================================

     class PdfToTextCapturedRectangle -
         Implements a text captured by a rectangle shape.

   ==============================================================================================================*/
class  PdfToTextCapturedRectangle		extends  PdfToTextCapturedText
   {
	public function  __construct ( $page, $name, $text, $left, $top, $right, $bottom, $definition )
	   {
		parent::__construct ( $page, $name, $text, $left, $top, $right, $bottom, $definition ) ;
	    }


	public function  __tostring ( )
	   { return ( $this -> Text ) ; }
    }


 /*==============================================================================================================

     class PdfToTextCapturedColumn -
         Implements a text captured by a lines/column shape.
	 Actually behaves like the PdfToTextCapturedRectangle class

   ==============================================================================================================*/
class  PdfToTextCapturedColumn			extends  PdfToTextCapturedText
   {
	public function  __construct ( $page, $name, $text, $left, $top, $right, $bottom, $definition )
	   {
		parent::__construct ( $page, $name, $text, $left, $top, $right, $bottom, $definition ) ;
	    }


	public function  __tostring ( )
	   { return ( $this -> Text ) ; }
    }


 /*==============================================================================================================

     class PdfToTextCapturedLine -
         Implements a text captured by a lines shape.

   ==============================================================================================================*/
class  PdfToTextCapturedLine			extends		PdfToTextCapturedText
 						implements	ArrayAccess, Countable, IteratorAggregate
  {
	// Column objects
	public		$Columns ;
	// Array of column names, to allow access by either index or column name
	private		$ColumnsByNames		=  array ( ) ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Builds a Line object based on the supplied columns.
		Also builds the Text property, which contains the columns text separated by the separator string
		specified in the XML definition.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $page, $name, $columns, $left, $top, $right, $bottom, $definition )
	   {
		// Although the Columns property is most likely to be used, build a text representation of the whole ine
		$text			=  array ( ) ;
		$count			=  0 ;

		foreach  ( $columns  as  $column )
		   {
			$text []					=  $column -> Text ;
			$this -> ColumnsByNames [ $column -> Name ]	=  $count ++ ;
		    }

		// Provide this information to the parent constructor
		parent::__construct ( $page, $name, implode ( $definition -> Separator, $text ), $left, $top, $right, $bottom, $definition ) ;

		// Store the column definitions
		$this -> Columns	=  $columns ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    __get -
		Returns access to a column by its name.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __get ( $member )
	   {
		if  ( isset ( $this -> ColumnsByNames [ $member ] ) )
			return ( $this -> Columns [ $this -> ColumnsByNames [ $offset ] ] ) ;
		else
			trigger_error ( "Undefined property \"$member\"." ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Interfaces implementations.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  count ( )
	   { return ( $this -> Columns ) ; }


	public function  getIterator ( )
	   { return ( new ArrayIterator ( $this -> Columns ) ) ; }


	public function  offsetExists ( $offset )
	   {
		if  ( is_numeric ( $offset ) )
			return ( $offset  >=  0  &&  $offset  <  count ( $this -> Columns ) ) ;
		else
			return ( isset ( $this -> ColumnsByNames [ $offset ] ) ) ;
	    }


	public function  offsetGet ( $offset )
	   {
		if  ( is_numeric ( $offset ) )
			return ( $this -> Columns [ $offset ] ) ;
		else
			return ( $this -> Columns [ $this -> ColumnsByNames [ $offset ] ] ) ;
	    }


	public function  offsetSet ( $offset, $value )
	   { error ( new PdfToTextCaptureException ( "Unsupported operation." ) ) ; }


	public function  offsetUnset ( $offset )
	   { error ( new PdfToTextCaptureException ( "Unsupported operation." ) ) ; }
    }


 /*==============================================================================================================

     class PdfToTextCapturedLines -
         Implements a set of lines.

   ==============================================================================================================*/
class  PdfToTextCapturedLines			//extends		Object
						implements	ArrayAccess, Countable, IteratorAggregate
   {
	// Capture name, as specified by the "name" attribute of the <lines> tag
	public		$Name ;
	// Page number of the capture
	public		$Page ;
	// Captured lines
	public		$Lines ;
	// Content type (mimics a little bit the PdfToTextCapturedText class)
	public		$Type			=  PdfToTextCaptureShapeDefinition::SHAPE_LINE ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Instantiates a PdfToTextCapturedLines object.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $name, $page, $lines )
	   {
		$this -> Name		=  $name ;
		$this -> Page		=  $page ;
		$this -> Lines		=  $lines ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Interfaces implementations.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  count ( )
	   { return ( $this -> Lines ) ; }


	public function  getIterator ( )
	   { return ( new ArrayIterator ( $this -> Lines ) ) ; }


	public function  offsetExists ( $offset )
	   { return ( $offset  >=  0  &&  $offset  <  count ( $this -> Lines ) ) ; }


	public function  offsetGet ( $offset )
	   { return ( $this -> Captures [ $offset ] ) ; }


	public function  offsetSet ( $offset, $value )
	   { error ( new PdfToTextCaptureException ( "Unsupported operation." ) ) ; }


	public function  offsetUnset ( $offset )
	   { error ( new PdfToTextCaptureException ( "Unsupported operation." ) ) ; }
    }


/**************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************
 ******                                                                                                  ******
 ******                                                                                                  ******
 ******                               CAPTURE INTERFACE FOR THE DEVELOPER                                ******
 ******         (none of the classes listed here are meant to be instantiated outside this file)         ******
 ******                                                                                                  ******
 ******                                                                                                  ******
 **************************************************************************************************************
 **************************************************************************************************************
 **************************************************************************************************************/

/*==============================================================================================================

    class PdfToTextCaptures -
        Represents all the areas in a PDF file captured by the supplied XML definitions.

  ==============================================================================================================*/
class  PdfToTextCaptures			//extends  Object
   {
	// Captured objects - May not exactly reflect the PdfToTextCapture*Shape classes
	private		$CapturedObjects ;
	// Allows faster access by capture name
	private		$ObjectsByName			=  array ( ) ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Instantiates a PdfToTextCaptures object.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $captures )
	   {
		$this -> CapturedObjects	=  $captures ;

		// Build an array of objects indexed by their names
		foreach  ( $captures  as  $page => $shapes )
		   {
			foreach  ( $shapes  as  $shape )
				$this -> ObjectsByName [ $shape -> Name ] []	=  $shape ;
		    }
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    ToCaptures -
		Returns a simplified view of captured objects, with only name/value pairs.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  ToCaptures ( )
	   {
		$result		=  new stdClass ( ) ;

		foreach  ( $this -> CapturedObjects  as  $page => $captures )
		   {
			foreach  ( $captures  as  $capture )
			    {
				switch  ( $capture -> Type )
				   {
					case	PdfToTextCaptureShapeDefinition::SHAPE_RECTANGLE :
						$name				=  $capture -> Name ;
						$value				=  $capture -> Text ;
						$result -> {$name} [ $page ]	=  $value ;
						break ;

					case	PdfToTextCaptureShapeDefinition::SHAPE_LINE :
						$name				=  $capture -> Name ;

						if  ( ! isset ( $result -> {$name} ) )
							$result -> {$name}		=  array ( ) ;

						foreach  ( $capture  as  $line )
						   {
							$columns	=  new  stdClass ;

							foreach  ( $line  as  $column )
							   {
								$column_name			=  $column -> Name ;
								$column_value			=  $column -> Text ;
								$columns -> {$column_name}	=  $column_value ;
							    }

							$result -> {$name} []	=  $columns ;
						    }
				    }
			    }
		    }

		return ( $result ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    __get -
		Retrieves the captured objects by their name, as specified in the XML definition.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __get ( $member )
	   {
		$fieldname	=  "__capture_{$member}__" ;

		if  ( ! isset ( $this -> $fieldname ) )
		   {
			if  ( ! isset ( $this -> ObjectsByName [ $member ] ) )
				error ( new PdfToTextException ( "Undefined property \"$member\"." ) ) ;

			$this -> $fieldname	=  $this -> GetCaptureInstance ( $member ) ;
		    }

		return ( $this -> $fieldname ) ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

	    GetCapturedObjectsByName -
		Returns an associative array of the captured shapes, indexed by their name.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  GetCapturedObjectsByName ( )
	   { return ( $this -> ObjectsByName ) ; }


	/*--------------------------------------------------------------------------------------------------------------

	    GetCaptureInstance -
		Returns an object inheriting from the PdfToTextCapture class, that wraps the capture results.

	 *-------------------------------------------------------------------------------------------------------------*/
	protected function  GetCaptureInstance ( $fieldname )
	   {
		switch ( $this -> ObjectsByName [ $fieldname ] [0] -> Type )
		   {
			case	PdfToTextCaptureShapeDefinition::SHAPE_RECTANGLE :
				return ( new PdfToTextRectangleCapture ( $this -> ObjectsByName [ $fieldname ] ) ) ;

			case	PdfToTextCaptureShapeDefinition::SHAPE_LINE :
				return ( new PdfToTextLinesCapture ( $this -> ObjectsByName [ $fieldname ] ) ) ;

			default :
				error ( new PdfToTextCaptureException ( "Unhandled shape type " . $this -> ObjectsByName [ $fieldname ] [0] -> Type . "." ) ) ;
		    }
	    }


    }


/*==============================================================================================================

    class PdfToTextCapture -
        Base class for all capture classes accessible to the caller.

  ==============================================================================================================*/
class  PdfToTextCapture				//extends		Object
						implements	ArrayAccess, Countable, IteratorAggregate
   {
	protected	$Captures ;


	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Instantiates a PdfToTextCapture object.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $objects )
	   {
		//parent::__construct ( ) ;

		$this -> Captures		=  $objects ;
	    }


	/*--------------------------------------------------------------------------------------------------------------

		Interfaces implementations.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  count ( )
	   { return ( $this -> Captures ) ; }


	public function  getIterator ( )
	   { return ( new ArrayIterator ( $this -> Captures ) ) ; }


	public function  offsetExists ( $offset )
	   { return ( $offset  >=  0  &&  $offset  <  count ( $this -> Captures ) ) ; }


	public function  offsetGet ( $offset )
	   { return ( $this -> Captures [ $offset ] ) ; }


	public function  offsetSet ( $offset, $value )
	   { error ( new PdfToTextCaptureException ( "Unsupported operation." ) ) ; }


	public function  offsetUnset ( $offset )
	   { error ( new PdfToTextCaptureException ( "Unsupported operation." ) ) ; }

    }


/*==============================================================================================================

    class PdfToTextLinesCapture -
        Represents a lines capture, without indexation to their page number.

  ==============================================================================================================*/
class  PdfToTextLinesCapture			extends  PdfToTextCapture
   {
	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		"flattens" the supplied object list, by removing the PdfToTextCapturedLines class level, so that lines
		can be iterated whatever their page number is.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $objects )
	   {
		$new_objects	=  array ( ) ;

		foreach  ( $objects  as  $object )
		   {
			foreach  ( $object  as  $line )
				$new_objects []		=  $line ;
		    }

		parent::__construct ( $new_objects ) ;
	    }
    }


/*==============================================================================================================

    class PdfToTextRectangleCapture -
        Implements a rectangle capture, from the caller point of view.

  ==============================================================================================================*/
class  PdfToTextRectangleCapture		extends  PdfToTextCapture
   {
	/*--------------------------------------------------------------------------------------------------------------

	    Constructor -
		Builds an object array indexed by page number.

	 *-------------------------------------------------------------------------------------------------------------*/
	public function  __construct ( $objects )
	   {
		$new_objects	=  array ( ) ;

		foreach  ( $objects  as  $object )
			$new_objects [ $object -> Page ]	=  $object ;

		parent::__construct ( $new_objects ) ;
	    }
    }