Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 1.
- #! /usr/bin/python
- 2.
- 3.
- # ineptpdf8.4.51.pyw
- 4.
- # ineptpdf, version 8.4.51
- 5.
- 6.
- # To run this program install Python 2.7 from http://www.python.org/download/
- 7.
- #
- 8.
- # PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
- 9.
- #
- 10.
- # and PyWin Extension (Win32API module) from
- 11.
- # http://sourceforge.net/projects/pywin32/files/
- 12.
- #
- 13.
- # Make sure to install the dedicated versions for Python 2.7.
- 14.
- #
- 15.
- # It's recommended to use the 32-Bit Python Windows versions (even with a 64-bit
- 16.
- # Windows system).
- 17.
- #
- 18.
- # Save this script file as
- 19.
- # ineptpdf8.4.51.pyw and double-click on it to run it.
- 20.
- 21.
- # Revision history:
- 22.
- # 1 - Initial release
- 23.
- # 2 - Improved determination of key-generation algorithm
- 24.
- # 3 - Correctly handle PDF >=1.5 cross-reference streams
- 25.
- # 4 - Removal of ciando's personal ID (anon)
- 26.
- # 5 - removing small bug with V3 ebooks (anon)
- 27.
- # 6 - changed to adeptkey4.der format for 1.7.2 support (anon)
- 28.
- # 6.1 - backward compatibility for 1.7.1 and old adeptkey.der (anon)
- 29.
- # 7 - Get cross reference streams and object streams working for input.
- 30.
- # Not yet supported on output but this only effects file size,
- 31.
- # not functionality. (anon2)
- 32.
- # 7.1 - Correct a problem when an old trailer is not followed by startxref (anon2)
- 33.
- # 7.2 - Correct malformed Mac OS resource forks for Stanza
- 34.
- # - Support for cross ref streams on output (decreases file size) (anon2)
- 35.
- # 7.3 - Correct bug in trailer with cross ref stream that caused the error (anon2)
- 36.
- # "The root object is missing or invalid" in Adobe Reader.
- 37.
- # 7.4 - Force all generation numbers in output file to be 0, like in v6.
- 38.
- # Fallback code for wrong xref improved (search till last trailer
- 39.
- # instead of first) (anon2)
- 40.
- # 8 - fileopen user machine identifier support (Tetrachroma)
- 41.
- # 8.1 - fileopen user cookies support (Tetrachroma)
- 42.
- # 8.2 - fileopen user name/password support (Tetrachroma)
- 43.
- # 8.3 - fileopen session cookie support (Tetrachroma)
- 44.
- # 8.3.1 - fix for the "specified key file does not exist" error (Tetrachroma)
- 45.
- # 8.3.2 - improved server result parsing (Tetrachroma)
- 46.
- # 8.4 - Ident4D and encrypted Uuid support (Tetrachroma)
- 47.
- # 8.4.1 - improved MAC address processing (Tetrachroma)
- 48.
- # 8.4.2 - FowP3Uuid fallback file processing (Tetrachroma)
- 49.
- # 8.4.3 - improved user/password pdf file detection (Tetrachroma)
- 50.
- # 8.4.4 - small bugfix (Tetrachroma)
- 51.
- # 8.4.5 - improved cookie host searching (Tetrachroma)
- 52.
- # 8.4.6 - STRICT parsing disabled (non-standard pdf processing) (Tetrachroma)
- 53.
- # 8.4.7 - UTF-8 input file conversion (Tetrachroma)
- 54.
- # 8.4.8 - fix for more rare utf8 problems (Tetrachroma)
- 55.
- # 8.4.9 - solution for utf8 in comination with
- 56.
- # ident4id method (Tetrachroma)
- 57.
- # 8.4.10 - line feed processing, non c system drive patch, nrbook support (Tetrachroma)
- 58.
- # 8.4.11 - alternative ident4id calculation (Tetrachroma)
- 59.
- # 8.4.12 - fix for capital username characters and
- 60.
- # other unusual user login names (Tetrachroma & ZeroPoint)
- 61.
- # 8.4.13 - small bug fixes (Tetrachroma)
- 62.
- # 8.4.14 - fix for non-standard-conform fileopen pdfs (Tetrachroma)
- 63.
- # 8.4.15 - 'bad file descriptor'-fix (Tetrachroma)
- 64.
- # 8.4.16 - improves user/pass detection (Tetrachroma)
- 65.
- # 8.4.17 - fix for several '=' chars in a DPRM entity (Tetrachroma)
- 66.
- # 8.4.18 - follow up bug fix for the DPRM problem,
- 67.
- # more readable error messages (Tetrachroma)
- 68.
- # 8.4.19 - 2nd fix for 'bad file descriptor' problem (Tetrachroma)
- 69.
- # 8.4.20 - follow up patch (Tetrachroma)
- 70.
- # 8.4.21 - 3rd patch for 'bad file descriptor' (Tetrachroma)
- 71.
- # 8.4.22 - disable prints for exception prevention (Tetrachroma)
- 72.
- # 8.4.23 - check for additional security attributes (Tetrachroma)
- 73.
- # 8.4.24 - improved cookie session support (Tetrachroma)
- 74.
- # 8.4.25 - more compatibility with unicode files (Tetrachroma)
- 75.
- # 8.4.26 - automated session/user cookie request function (works
- 76.
- # only with Firefox 3.x+) (Tetrachroma)
- 77.
- # 8.4.27 - user/password fallback
- 78.
- # 8.4.28 - AES decryption, improved misconfigured pdf handling,
- 79.
- # limited experimental APS support (Tetrachroma & Neisklar)
- 80.
- # 8.4.29 - backport for bad formatted rc4 encrypted pdfs (Tetrachroma)
- 81.
- # 8.4.30 - extended authorization attributes support (Tetrachroma)
- 82.
- # 8.4.31 - improved session cookie and better server response error
- 83.
- # handling (Tetrachroma)
- 84.
- # 8.4.33 - small cookie optimizations (Tetrachroma)
- 85.
- # 8.4.33 - debug output option (Tetrachroma)
- 86.
- # 8.4.34 - better user/password management
- 87.
- # handles the 'AskUnp' response) (Tetrachroma)
- 88.
- # 8.4.35 - special handling for non-standard systems (Tetrachroma)
- 89.
- # 8.4.36 - previous machine/disk handling [PrevMach/PrevDisk] (Tetrachroma)
- 90.
- # 8.4.36 - FOPN_flock support (Tetrachroma)
- 91.
- # 8.4.37 - patch for unicode paths/filenames (Tetrachroma)
- 92.
- # 8.4.38 - small fix for user/password dialog (Tetrachroma)
- 93.
- # 8.4.39 - sophisticated request mode differentiation, forced
- 94.
- # uuid calculation (Tetrachroma)
- 95.
- # 8.4.40 - fix for non standard server responses (Tetrachroma)
- 96.
- # 8.4.41 - improved user/password request windows,
- 97.
- # better server response tolerance (Tetrachroma)
- 98.
- # 8.4.42 - improved nl/cr server response parsing (Tetrachroma)
- 99.
- # 8.4.43 - fix for user names longer than 13 characters and special
- 100.
- # uuid encryption (Tetrachroma)
- 101.
- # 8.4.44 - another fix for ident4d problem (Tetrachroma)
- 102.
- # 8.4.45 - 2nd fix for ident4d problem (Tetrachroma)
- 103.
- # 8.4.46 - script cleanup and optimizations (Tetrachroma)
- 104.
- # 8.4.47 - script identification change to Adobe Reader (Tetrachroma)
- 105.
- # 8.4.48 - improved tolerance for false file/registry entries (Tetrachroma)
- 106.
- # 8.4.49 - improved username encryption (Tetrachroma)
- 107.
- # 8.4.50 - improved (experimental) APS support (Tetrachroma & Neisklar)
- 108.
- # 8.4.51 - automatic APS offline key retrieval (works only for
- 109.
- # Onleihe right now) (80ka80 & Tetrachroma)
- 110.
- 111.
- """
- 112.
- Decrypts Adobe ADEPT-encrypted and Fileopen PDF files.
- 113.
- """
- 114.
- 115.
- from __future__ import with_statement
- 116.
- 117.
- __license__ = 'GPL v3'
- 118.
- 119.
- import sys
- 120.
- import os
- 121.
- import re
- 122.
- import zlib
- 123.
- import struct
- 124.
- import hashlib
- 125.
- from itertools import chain, islice
- 126.
- import xml.etree.ElementTree as etree
- 127.
- import Tkinter
- 128.
- import Tkconstants
- 129.
- import tkFileDialog
- 130.
- import tkMessageBox
- 131.
- # added for fileopen support
- 132.
- import urllib
- 133.
- import urlparse
- 134.
- import time
- 135.
- import socket
- 136.
- import string
- 137.
- import uuid
- 138.
- import subprocess
- 139.
- import time
- 140.
- import getpass
- 141.
- from ctypes import *
- 142.
- import traceback
- 143.
- import inspect
- 144.
- import tempfile
- 145.
- import sqlite3
- 146.
- import httplib
- 147.
- try:
- 148.
- from Crypto.Cipher import ARC4
- 149.
- # needed for newer pdfs
- 150.
- from Crypto.Cipher import AES
- 151.
- from Crypto.Hash import SHA256
- 152.
- from Crypto.PublicKey import RSA
- 153.
- 154.
- except ImportError:
- 155.
- ARC4 = None
- 156.
- RSA = None
- 157.
- try:
- 158.
- from cStringIO import StringIO
- 159.
- except ImportError:
- 160.
- from StringIO import StringIO
- 161.
- 162.
- class ADEPTError(Exception):
- 163.
- pass
- 164.
- 165.
- # global variable (needed for fileopen and password decryption)
- 166.
- INPUTFILEPATH = ''
- 167.
- KEYFILEPATH = ''
- 168.
- PASSWORD = ''
- 169.
- DEBUG_MODE = False
- 170.
- IVERSION = '8.4.51'
- 171.
- 172.
- # Do we generate cross reference streams on output?
- 173.
- # 0 = never
- 174.
- # 1 = only if present in input
- 175.
- # 2 = always
- 176.
- 177.
- GEN_XREF_STM = 1
- 178.
- 179.
- # This is the value for the current document
- 180.
- gen_xref_stm = False # will be set in PDFSerializer
- 181.
- 182.
- ###
- 183.
- ### ASN.1 parsing code from tlslite
- 184.
- 185.
- def bytesToNumber(bytes):
- 186.
- total = 0L
- 187.
- for byte in bytes:
- 188.
- total = (total << 8) + byte
- 189.
- return total
- 190.
- 191.
- class ASN1Error(Exception):
- 192.
- pass
- 193.
- 194.
- class ASN1Parser(object):
- 195.
- class Parser(object):
- 196.
- def __init__(self, bytes):
- 197.
- self.bytes = bytes
- 198.
- self.index = 0
- 199.
- 200.
- def get(self, length):
- 201.
- if self.index + length > len(self.bytes):
- 202.
- raise ASN1Error("Error decoding ASN.1")
- 203.
- x = 0
- 204.
- for count in range(length):
- 205.
- x <<= 8
- 206.
- x |= self.bytes[self.index]
- 207.
- self.index += 1
- 208.
- return x
- 209.
- 210.
- def getFixBytes(self, lengthBytes):
- 211.
- bytes = self.bytes[self.index : self.index+lengthBytes]
- 212.
- self.index += lengthBytes
- 213.
- return bytes
- 214.
- 215.
- def getVarBytes(self, lengthLength):
- 216.
- lengthBytes = self.get(lengthLength)
- 217.
- return self.getFixBytes(lengthBytes)
- 218.
- 219.
- def getFixList(self, length, lengthList):
- 220.
- l = [0] * lengthList
- 221.
- for x in range(lengthList):
- 222.
- l[x] = self.get(length)
- 223.
- return l
- 224.
- 225.
- def getVarList(self, length, lengthLength):
- 226.
- lengthList = self.get(lengthLength)
- 227.
- if lengthList % length != 0:
- 228.
- raise ASN1Error("Error decoding ASN.1")
- 229.
- lengthList = int(lengthList/length)
- 230.
- l = [0] * lengthList
- 231.
- for x in range(lengthList):
- 232.
- l[x] = self.get(length)
- 233.
- return l
- 234.
- 235.
- def startLengthCheck(self, lengthLength):
- 236.
- self.lengthCheck = self.get(lengthLength)
- 237.
- self.indexCheck = self.index
- 238.
- 239.
- def setLengthCheck(self, length):
- 240.
- self.lengthCheck = length
- 241.
- self.indexCheck = self.index
- 242.
- 243.
- def stopLengthCheck(self):
- 244.
- if (self.index - self.indexCheck) != self.lengthCheck:
- 245.
- raise ASN1Error("Error decoding ASN.1")
- 246.
- 247.
- def atLengthCheck(self):
- 248.
- if (self.index - self.indexCheck) < self.lengthCheck:
- 249.
- return False
- 250.
- elif (self.index - self.indexCheck) == self.lengthCheck:
- 251.
- return True
- 252.
- else:
- 253.
- raise ASN1Error("Error decoding ASN.1")
- 254.
- 255.
- def __init__(self, bytes):
- 256.
- p = self.Parser(bytes)
- 257.
- p.get(1)
- 258.
- self.length = self._getASN1Length(p)
- 259.
- self.value = p.getFixBytes(self.length)
- 260.
- 261.
- def getChild(self, which):
- 262.
- p = self.Parser(self.value)
- 263.
- for x in range(which+1):
- 264.
- markIndex = p.index
- 265.
- p.get(1)
- 266.
- length = self._getASN1Length(p)
- 267.
- p.getFixBytes(length)
- 268.
- return ASN1Parser(p.bytes[markIndex:p.index])
- 269.
- 270.
- def _getASN1Length(self, p):
- 271.
- firstLength = p.get(1)
- 272.
- if firstLength<=127:
- 273.
- return firstLength
- 274.
- else:
- 275.
- lengthLength = firstLength & 0x7F
- 276.
- return p.get(lengthLength)
- 277.
- 278.
- ###
- 279.
- ### PDF parsing routines from pdfminer, with changes for EBX_HANDLER
- 280.
- 281.
- ## Utilities
- 282.
- ##
- 283.
- def choplist(n, seq):
- 284.
- '''Groups every n elements of the list.'''
- 285.
- r = []
- 286.
- for x in seq:
- 287.
- r.append(x)
- 288.
- if len(r) == n:
- 289.
- yield tuple(r)
- 290.
- r = []
- 291.
- return
- 292.
- 293.
- def nunpack(s, default=0):
- 294.
- '''Unpacks up to 4 bytes big endian.'''
- 295.
- l = len(s)
- 296.
- if not l:
- 297.
- return default
- 298.
- elif l == 1:
- 299.
- return ord(s)
- 300.
- elif l == 2:
- 301.
- return struct.unpack('>H', s)[0]
- 302.
- elif l == 3:
- 303.
- return struct.unpack('>L', '\x00'+s)[0]
- 304.
- elif l == 4:
- 305.
- return struct.unpack('>L', s)[0]
- 306.
- else:
- 307.
- return TypeError('invalid length: %d' % l)
- 308.
- 309.
- 310.
- STRICT = 0
- 311.
- 312.
- 313.
- ## PS Exceptions
- 314.
- ##
- 315.
- class PSException(Exception): pass
- 316.
- class PSEOF(PSException): pass
- 317.
- class PSSyntaxError(PSException): pass
- 318.
- class PSTypeError(PSException): pass
- 319.
- class PSValueError(PSException): pass
- 320.
- 321.
- 322.
- ## Basic PostScript Types
- 323.
- ##
- 324.
- 325.
- # PSLiteral
- 326.
- class PSObject(object): pass
- 327.
- 328.
- class PSLiteral(PSObject):
- 329.
- '''
- 330.
- PS literals (e.g. "/Name").
- 331.
- Caution: Never create these objects directly.
- 332.
- Use PSLiteralTable.intern() instead.
- 333.
- '''
- 334.
- def __init__(self, name):
- 335.
- self.name = name
- 336.
- return
- 337.
- 338.
- def __repr__(self):
- 339.
- name = []
- 340.
- for char in self.name:
- 341.
- if not char.isalnum():
- 342.
- char = '#%02x' % ord(char)
- 343.
- name.append(char)
- 344.
- return '/%s' % ''.join(name)
- 345.
- 346.
- # PSKeyword
- 347.
- class PSKeyword(PSObject):
- 348.
- '''
- 349.
- PS keywords (e.g. "showpage").
- 350.
- Caution: Never create these objects directly.
- 351.
- Use PSKeywordTable.intern() instead.
- 352.
- '''
- 353.
- def __init__(self, name):
- 354.
- self.name = name
- 355.
- return
- 356.
- 357.
- def __repr__(self):
- 358.
- return self.name
- 359.
- 360.
- # PSSymbolTable
- 361.
- class PSSymbolTable(object):
- 362.
- 363.
- '''
- 364.
- Symbol table that stores PSLiteral or PSKeyword.
- 365.
- '''
- 366.
- 367.
- def __init__(self, classe):
- 368.
- self.dic = {}
- 369.
- self.classe = classe
- 370.
- return
- 371.
- 372.
- def intern(self, name):
- 373.
- if name in self.dic:
- 374.
- lit = self.dic[name]
- 375.
- else:
- 376.
- lit = self.classe(name)
- 377.
- self.dic[name] = lit
- 378.
- return lit
- 379.
- 380.
- PSLiteralTable = PSSymbolTable(PSLiteral)
- 381.
- PSKeywordTable = PSSymbolTable(PSKeyword)
- 382.
- LIT = PSLiteralTable.intern
- 383.
- KWD = PSKeywordTable.intern
- 384.
- KEYWORD_BRACE_BEGIN = KWD('{')
- 385.
- KEYWORD_BRACE_END = KWD('}')
- 386.
- KEYWORD_ARRAY_BEGIN = KWD('[')
- 387.
- KEYWORD_ARRAY_END = KWD(']')
- 388.
- KEYWORD_DICT_BEGIN = KWD('<<')
- 389.
- KEYWORD_DICT_END = KWD('>>')
- 390.
- 391.
- 392.
- def literal_name(x):
- 393.
- if not isinstance(x, PSLiteral):
- 394.
- if STRICT:
- 395.
- raise PSTypeError('Literal required: %r' % x)
- 396.
- else:
- 397.
- return str(x)
- 398.
- return x.name
- 399.
- 400.
- def keyword_name(x):
- 401.
- if not isinstance(x, PSKeyword):
- 402.
- if STRICT:
- 403.
- raise PSTypeError('Keyword required: %r' % x)
- 404.
- else:
- 405.
- return str(x)
- 406.
- return x.name
- 407.
- 408.
- 409.
- ## PSBaseParser
- 410.
- ##
- 411.
- EOL = re.compile(r'[\r\n]')
- 412.
- SPC = re.compile(r'\s')
- 413.
- NONSPC = re.compile(r'\S')
- 414.
- HEX = re.compile(r'[0-9a-fA-F]')
- 415.
- END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
- 416.
- END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
- 417.
- HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
- 418.
- END_NUMBER = re.compile(r'[^0-9]')
- 419.
- END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
- 420.
- END_STRING = re.compile(r'[()\134]')
- 421.
- OCT_STRING = re.compile(r'[0-7]')
- 422.
- ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
- 423.
- 424.
- class PSBaseParser(object):
- 425.
- 426.
- '''
- 427.
- Most basic PostScript parser that performs only basic tokenization.
- 428.
- '''
- 429.
- BUFSIZ = 4096
- 430.
- 431.
- def __init__(self, fp):
- 432.
- self.fp = fp
- 433.
- self.seek(0)
- 434.
- return
- 435.
- 436.
- def __repr__(self):
- 437.
- return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
- 438.
- 439.
- def flush(self):
- 440.
- return
- 441.
- 442.
- def close(self):
- 443.
- self.flush()
- 444.
- return
- 445.
- 446.
- def tell(self):
- 447.
- return self.bufpos+self.charpos
- 448.
- 449.
- def poll(self, pos=None, n=80):
- 450.
- pos0 = self.fp.tell()
- 451.
- if not pos:
- 452.
- pos = self.bufpos+self.charpos
- 453.
- self.fp.seek(pos)
- 454.
- ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
- 455.
- self.fp.seek(pos0)
- 456.
- return
- 457.
- 458.
- def seek(self, pos):
- 459.
- '''
- 460.
- Seeks the parser to the given position.
- 461.
- '''
- 462.
- self.fp.seek(pos)
- 463.
- # reset the status for nextline()
- 464.
- self.bufpos = pos
- 465.
- self.buf = ''
- 466.
- self.charpos = 0
- 467.
- # reset the status for nexttoken()
- 468.
- self.parse1 = self.parse_main
- 469.
- self.tokens = []
- 470.
- return
- 471.
- 472.
- def fillbuf(self):
- 473.
- if self.charpos < len(self.buf): return
- 474.
- # fetch next chunk.
- 475.
- self.bufpos = self.fp.tell()
- 476.
- self.buf = self.fp.read(self.BUFSIZ)
- 477.
- if not self.buf:
- 478.
- raise PSEOF('Unexpected EOF')
- 479.
- self.charpos = 0
- 480.
- return
- 481.
- 482.
- def parse_main(self, s, i):
- 483.
- m = NONSPC.search(s, i)
- 484.
- if not m:
- 485.
- return (self.parse_main, len(s))
- 486.
- j = m.start(0)
- 487.
- c = s[j]
- 488.
- self.tokenstart = self.bufpos+j
- 489.
- if c == '%':
- 490.
- self.token = '%'
- 491.
- return (self.parse_comment, j+1)
- 492.
- if c == '/':
- 493.
- self.token = ''
- 494.
- return (self.parse_literal, j+1)
- 495.
- if c in '-+' or c.isdigit():
- 496.
- self.token = c
- 497.
- return (self.parse_number, j+1)
- 498.
- if c == '.':
- 499.
- self.token = c
- 500.
- return (self.parse_float, j+1)
- 501.
- if c.isalpha():
- 502.
- self.token = c
- 503.
- return (self.parse_keyword, j+1)
- 504.
- if c == '(':
- 505.
- self.token = ''
- 506.
- self.paren = 1
- 507.
- return (self.parse_string, j+1)
- 508.
- if c == '<':
- 509.
- self.token = ''
- 510.
- return (self.parse_wopen, j+1)
- 511.
- if c == '>':
- 512.
- self.token = ''
- 513.
- return (self.parse_wclose, j+1)
- 514.
- self.add_token(KWD(c))
- 515.
- return (self.parse_main, j+1)
- 516.
- 517.
- def add_token(self, obj):
- 518.
- self.tokens.append((self.tokenstart, obj))
- 519.
- return
- 520.
- 521.
- def parse_comment(self, s, i):
- 522.
- m = EOL.search(s, i)
- 523.
- if not m:
- 524.
- self.token += s[i:]
- 525.
- return (self.parse_comment, len(s))
- 526.
- j = m.start(0)
- 527.
- self.token += s[i:j]
- 528.
- # We ignore comments.
- 529.
- #self.tokens.append(self.token)
- 530.
- return (self.parse_main, j)
- 531.
- 532.
- def parse_literal(self, s, i):
- 533.
- m = END_LITERAL.search(s, i)
- 534.
- if not m:
- 535.
- self.token += s[i:]
- 536.
- return (self.parse_literal, len(s))
- 537.
- j = m.start(0)
- 538.
- self.token += s[i:j]
- 539.
- c = s[j]
- 540.
- if c == '#':
- 541.
- self.hex = ''
- 542.
- return (self.parse_literal_hex, j+1)
- 543.
- self.add_token(LIT(self.token))
- 544.
- return (self.parse_main, j)
- 545.
- 546.
- def parse_literal_hex(self, s, i):
- 547.
- c = s[i]
- 548.
- if HEX.match(c) and len(self.hex) < 2:
- 549.
- self.hex += c
- 550.
- return (self.parse_literal_hex, i+1)
- 551.
- if self.hex:
- 552.
- self.token += chr(int(self.hex, 16))
- 553.
- return (self.parse_literal, i)
- 554.
- 555.
- def parse_number(self, s, i):
- 556.
- m = END_NUMBER.search(s, i)
- 557.
- if not m:
- 558.
- self.token += s[i:]
- 559.
- return (self.parse_number, len(s))
- 560.
- j = m.start(0)
- 561.
- self.token += s[i:j]
- 562.
- c = s[j]
- 563.
- if c == '.':
- 564.
- self.token += c
- 565.
- return (self.parse_float, j+1)
- 566.
- try:
- 567.
- self.add_token(int(self.token))
- 568.
- except ValueError:
- 569.
- pass
- 570.
- return (self.parse_main, j)
- 571.
- def parse_float(self, s, i):
- 572.
- m = END_NUMBER.search(s, i)
- 573.
- if not m:
- 574.
- self.token += s[i:]
- 575.
- return (self.parse_float, len(s))
- 576.
- j = m.start(0)
- 577.
- self.token += s[i:j]
- 578.
- self.add_token(float(self.token))
- 579.
- return (self.parse_main, j)
- 580.
- 581.
- def parse_keyword(self, s, i):
- 582.
- m = END_KEYWORD.search(s, i)
- 583.
- if not m:
- 584.
- self.token += s[i:]
- 585.
- return (self.parse_keyword, len(s))
- 586.
- j = m.start(0)
- 587.
- self.token += s[i:j]
- 588.
- if self.token == 'true':
- 589.
- token = True
- 590.
- elif self.token == 'false':
- 591.
- token = False
- 592.
- else:
- 593.
- token = KWD(self.token)
- 594.
- self.add_token(token)
- 595.
- return (self.parse_main, j)
- 596.
- 597.
- def parse_string(self, s, i):
- 598.
- m = END_STRING.search(s, i)
- 599.
- if not m:
- 600.
- self.token += s[i:]
- 601.
- return (self.parse_string, len(s))
- 602.
- j = m.start(0)
- 603.
- self.token += s[i:j]
- 604.
- c = s[j]
- 605.
- if c == '\\':
- 606.
- self.oct = ''
- 607.
- return (self.parse_string_1, j+1)
- 608.
- if c == '(':
- 609.
- self.paren += 1
- 610.
- self.token += c
- 611.
- return (self.parse_string, j+1)
- 612.
- if c == ')':
- 613.
- self.paren -= 1
- 614.
- if self.paren:
- 615.
- self.token += c
- 616.
- return (self.parse_string, j+1)
- 617.
- self.add_token(self.token)
- 618.
- return (self.parse_main, j+1)
- 619.
- def parse_string_1(self, s, i):
- 620.
- c = s[i]
- 621.
- if OCT_STRING.match(c) and len(self.oct) < 3:
- 622.
- self.oct += c
- 623.
- return (self.parse_string_1, i+1)
- 624.
- if self.oct:
- 625.
- self.token += chr(int(self.oct, 8))
- 626.
- return (self.parse_string, i)
- 627.
- if c in ESC_STRING:
- 628.
- self.token += chr(ESC_STRING[c])
- 629.
- return (self.parse_string, i+1)
- 630.
- 631.
- def parse_wopen(self, s, i):
- 632.
- c = s[i]
- 633.
- if c.isspace() or HEX.match(c):
- 634.
- return (self.parse_hexstring, i)
- 635.
- if c == '<':
- 636.
- self.add_token(KEYWORD_DICT_BEGIN)
- 637.
- i += 1
- 638.
- return (self.parse_main, i)
- 639.
- 640.
- def parse_wclose(self, s, i):
- 641.
- c = s[i]
- 642.
- if c == '>':
- 643.
- self.add_token(KEYWORD_DICT_END)
- 644.
- i += 1
- 645.
- return (self.parse_main, i)
- 646.
- 647.
- def parse_hexstring(self, s, i):
- 648.
- m = END_HEX_STRING.search(s, i)
- 649.
- if not m:
- 650.
- self.token += s[i:]
- 651.
- return (self.parse_hexstring, len(s))
- 652.
- j = m.start(0)
- 653.
- self.token += s[i:j]
- 654.
- token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
- 655.
- SPC.sub('', self.token))
- 656.
- self.add_token(token)
- 657.
- return (self.parse_main, j)
- 658.
- 659.
- def nexttoken(self):
- 660.
- while not self.tokens:
- 661.
- self.fillbuf()
- 662.
- (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
- 663.
- token = self.tokens.pop(0)
- 664.
- return token
- 665.
- 666.
- def nextline(self):
- 667.
- '''
- 668.
- Fetches a next line that ends either with \\r or \\n.
- 669.
- '''
- 670.
- linebuf = ''
- 671.
- linepos = self.bufpos + self.charpos
- 672.
- eol = False
- 673.
- while 1:
- 674.
- self.fillbuf()
- 675.
- if eol:
- 676.
- c = self.buf[self.charpos]
- 677.
- # handle '\r\n'
- 678.
- if c == '\n':
- 679.
- linebuf += c
- 680.
- self.charpos += 1
- 681.
- break
- 682.
- m = EOL.search(self.buf, self.charpos)
- 683.
- if m:
- 684.
- linebuf += self.buf[self.charpos:m.end(0)]
- 685.
- self.charpos = m.end(0)
- 686.
- if linebuf[-1] == '\r':
- 687.
- eol = True
- 688.
- else:
- 689.
- break
- 690.
- else:
- 691.
- linebuf += self.buf[self.charpos:]
- 692.
- self.charpos = len(self.buf)
- 693.
- return (linepos, linebuf)
- 694.
- 695.
- def revreadlines(self):
- 696.
- '''
- 697.
- Fetches a next line backword. This is used to locate
- 698.
- the trailers at the end of a file.
- 699.
- '''
- 700.
- self.fp.seek(0, 2)
- 701.
- pos = self.fp.tell()
- 702.
- buf = ''
- 703.
- while 0 < pos:
- 704.
- prevpos = pos
- 705.
- pos = max(0, pos-self.BUFSIZ)
- 706.
- self.fp.seek(pos)
- 707.
- s = self.fp.read(prevpos-pos)
- 708.
- if not s: break
- 709.
- while 1:
- 710.
- n = max(s.rfind('\r'), s.rfind('\n'))
- 711.
- if n == -1:
- 712.
- buf = s + buf
- 713.
- break
- 714.
- yield s[n:]+buf
- 715.
- s = s[:n]
- 716.
- buf = ''
- 717.
- return
- 718.
- 719.
- 720.
- ## PSStackParser
- 721.
- ##
- 722.
- class PSStackParser(PSBaseParser):
- 723.
- 724.
- def __init__(self, fp):
- 725.
- PSBaseParser.__init__(self, fp)
- 726.
- self.reset()
- 727.
- return
- 728.
- 729.
- def reset(self):
- 730.
- self.context = []
- 731.
- self.curtype = None
- 732.
- self.curstack = []
- 733.
- self.results = []
- 734.
- return
- 735.
- 736.
- def seek(self, pos):
- 737.
- PSBaseParser.seek(self, pos)
- 738.
- self.reset()
- 739.
- return
- 740.
- 741.
- def push(self, *objs):
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement