Guest User

Untitled

a guest
Feb 7th, 2011
25,185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 119.16 KB | None | 0 0
  1. #! /usr/bin/python
  2.  
  3. # ineptpdf8.4.51.pyw
  4. # ineptpdf, version 8.4.51
  5.  
  6. # To run this program install Python 2.7 from http://www.python.org/download/
  7. #
  8. # PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
  9. #
  10. # and PyWin Extension (Win32API module) from
  11. # http://sourceforge.net/projects/pywin32/files/
  12. #
  13. # Make sure to install the dedicated versions for Python 2.7.
  14. #
  15. # It's recommended to use the 32-Bit Python Windows versions (even with a 64-bit
  16. # Windows system).
  17. #
  18. # Save this script file as
  19. # ineptpdf8.4.51.pyw and double-click on it to run it.
  20.  
  21. # Revision history:
  22. #   1 - Initial release
  23. #   2 - Improved determination of key-generation algorithm
  24. #   3 - Correctly handle PDF >=1.5 cross-reference streams
  25. #   4 - Removal of ciando's personal ID (anon)
  26. #   5 - removing small bug with V3 ebooks (anon)
  27. #   6 - changed to adeptkey4.der format for 1.7.2 support (anon)
  28. #   6.1 - backward compatibility for 1.7.1 and old adeptkey.der (anon)
  29. #   7 - Get cross reference streams and object streams working for input.
  30. #       Not yet supported on output but this only effects file size,
  31. #       not functionality. (anon2)
  32. #   7.1 - Correct a problem when an old trailer is not followed by startxref (anon2)
  33. #   7.2 - Correct malformed Mac OS resource forks for Stanza
  34. #       - Support for cross ref streams on output (decreases file size) (anon2)
  35. #   7.3 - Correct bug in trailer with cross ref stream that caused the error (anon2)
  36. #         "The root object is missing or invalid" in Adobe Reader.
  37. #   7.4 - Force all generation numbers in output file to be 0, like in v6.
  38. #         Fallback code for wrong xref improved (search till last trailer
  39. #         instead of first) (anon2)
  40. #   8 - fileopen user machine identifier support (Tetrachroma)
  41. #   8.1 - fileopen user cookies support (Tetrachroma)
  42. #   8.2 - fileopen user name/password support (Tetrachroma)
  43. #   8.3 - fileopen session cookie support (Tetrachroma)
  44. #   8.3.1 - fix for the "specified key file does not exist" error (Tetrachroma)
  45. #   8.3.2 - improved server result parsing (Tetrachroma)
  46. #   8.4 - Ident4D and encrypted Uuid support (Tetrachroma)
  47. #   8.4.1 - improved MAC address processing (Tetrachroma)
  48. #   8.4.2 - FowP3Uuid fallback file processing (Tetrachroma)
  49. #   8.4.3 - improved user/password pdf file detection (Tetrachroma)
  50. #   8.4.4 - small bugfix (Tetrachroma)
  51. #   8.4.5 - improved cookie host searching (Tetrachroma)
  52. #   8.4.6 - STRICT parsing disabled (non-standard pdf processing) (Tetrachroma)
  53. #   8.4.7 - UTF-8 input file conversion (Tetrachroma)
  54. #   8.4.8 - fix for more rare utf8 problems (Tetrachroma)
  55. #   8.4.9 - solution for utf8 in comination with
  56. #           ident4id method (Tetrachroma)
  57. #   8.4.10 - line feed processing, non c system drive patch, nrbook support (Tetrachroma)
  58. #   8.4.11 - alternative ident4id calculation (Tetrachroma)
  59. #   8.4.12 - fix for capital username characters and
  60. #            other unusual user login names (Tetrachroma & ZeroPoint)
  61. #   8.4.13 - small bug fixes (Tetrachroma)
  62. #   8.4.14 - fix for non-standard-conform fileopen pdfs (Tetrachroma)
  63. #   8.4.15 - 'bad file descriptor'-fix (Tetrachroma)
  64. #   8.4.16 - improves user/pass detection (Tetrachroma)
  65. #   8.4.17 - fix for several '=' chars in a DPRM entity (Tetrachroma)
  66. #   8.4.18 - follow up bug fix for the DPRM problem,
  67. #            more readable error messages (Tetrachroma)
  68. #   8.4.19 - 2nd fix for 'bad file descriptor' problem (Tetrachroma)
  69. #   8.4.20 - follow up patch (Tetrachroma)
  70. #   8.4.21 - 3rd patch for 'bad file descriptor' (Tetrachroma)
  71. #   8.4.22 - disable prints for exception prevention (Tetrachroma)
  72. #   8.4.23 - check for additional security attributes (Tetrachroma)
  73. #   8.4.24 - improved cookie session support (Tetrachroma)
  74. #   8.4.25 - more compatibility with unicode files (Tetrachroma)
  75. #   8.4.26 - automated session/user cookie request function (works
  76. #            only with Firefox 3.x+) (Tetrachroma)
  77. #   8.4.27 - user/password fallback
  78. #   8.4.28 - AES decryption, improved misconfigured pdf handling,
  79. #            limited experimental APS support (Tetrachroma & Neisklar)
  80. #   8.4.29 - backport for bad formatted rc4 encrypted pdfs (Tetrachroma)
  81. #   8.4.30 - extended authorization attributes support (Tetrachroma)
  82. #   8.4.31 - improved session cookie and better server response error
  83. #            handling (Tetrachroma)
  84. #   8.4.33 - small cookie optimizations (Tetrachroma)
  85. #   8.4.33 - debug output option (Tetrachroma)
  86. #   8.4.34 - better user/password management
  87. #            handles the 'AskUnp' response) (Tetrachroma)
  88. #   8.4.35 - special handling for non-standard systems (Tetrachroma)
  89. #   8.4.36 - previous machine/disk handling [PrevMach/PrevDisk] (Tetrachroma)
  90. #   8.4.36 - FOPN_flock support (Tetrachroma)
  91. #   8.4.37 - patch for unicode paths/filenames (Tetrachroma)
  92. #   8.4.38 - small fix for user/password dialog (Tetrachroma)
  93. #   8.4.39 - sophisticated request mode differentiation, forced
  94. #            uuid calculation (Tetrachroma)
  95. #   8.4.40 - fix for non standard server responses (Tetrachroma)
  96. #   8.4.41 - improved user/password request windows,
  97. #            better server response tolerance (Tetrachroma)
  98. #   8.4.42 - improved nl/cr server response parsing (Tetrachroma)
  99. #   8.4.43 - fix for user names longer than 13 characters and special
  100. #            uuid encryption (Tetrachroma)
  101. #   8.4.44 - another fix for ident4d problem (Tetrachroma)
  102. #   8.4.45 - 2nd fix for ident4d problem (Tetrachroma)
  103. #   8.4.46 - script cleanup and optimizations (Tetrachroma)
  104. #   8.4.47 - script identification change to Adobe Reader (Tetrachroma)
  105. #   8.4.48 - improved tolerance for false file/registry entries (Tetrachroma)
  106. #   8.4.49 - improved username encryption (Tetrachroma)
  107. #   8.4.50 - improved (experimental) APS support (Tetrachroma & Neisklar)
  108. #   8.4.51 - automatic APS offline key retrieval (works only for
  109. #            Onleihe right now) (80ka80 & Tetrachroma)
  110.  
  111. """
  112. Decrypts Adobe ADEPT-encrypted and Fileopen PDF files.
  113. """
  114.  
  115. from __future__ import with_statement
  116.  
  117. __license__ = 'GPL v3'
  118.  
  119. import sys
  120. import os
  121. import re
  122. import zlib
  123. import struct
  124. import hashlib
  125. from itertools import chain, islice
  126. import xml.etree.ElementTree as etree
  127. import Tkinter
  128. import Tkconstants
  129. import tkFileDialog
  130. import tkMessageBox
  131. # added for fileopen support
  132. import urllib
  133. import urlparse
  134. import time
  135. import socket
  136. import string
  137. import uuid
  138. import subprocess
  139. import time
  140. import getpass
  141. from ctypes import *
  142. import traceback
  143. import inspect
  144. import tempfile
  145. import sqlite3
  146. import httplib
  147. try:
  148.     from Crypto.Cipher import ARC4
  149.     # needed for newer pdfs
  150.     from Crypto.Cipher import AES
  151.     from Crypto.Hash import SHA256
  152.     from Crypto.PublicKey import RSA
  153.    
  154. except ImportError:
  155.     ARC4 = None
  156.     RSA = None
  157. try:
  158.     from cStringIO import StringIO
  159. except ImportError:
  160.     from StringIO import StringIO
  161.  
  162. class ADEPTError(Exception):
  163.     pass
  164.  
  165. # global variable (needed for fileopen and password decryption)
  166. INPUTFILEPATH = ''
  167. KEYFILEPATH = ''
  168. PASSWORD = ''
  169. DEBUG_MODE = False
  170. IVERSION = '8.4.51'
  171.  
  172. # Do we generate cross reference streams on output?
  173. # 0 = never
  174. # 1 = only if present in input
  175. # 2 = always
  176.  
  177. GEN_XREF_STM = 1
  178.  
  179. # This is the value for the current document
  180. gen_xref_stm = False # will be set in PDFSerializer
  181.  
  182. ###
  183. ### ASN.1 parsing code from tlslite
  184.  
  185. def bytesToNumber(bytes):
  186.     total = 0L
  187.     for byte in bytes:
  188.         total = (total << 8) + byte
  189.     return total
  190.  
  191. class ASN1Error(Exception):
  192.     pass
  193.  
  194. class ASN1Parser(object):
  195.     class Parser(object):
  196.         def __init__(self, bytes):
  197.             self.bytes = bytes
  198.             self.index = 0
  199.    
  200.         def get(self, length):
  201.             if self.index + length > len(self.bytes):
  202.                 raise ASN1Error("Error decoding ASN.1")
  203.             x = 0
  204.             for count in range(length):
  205.                 x <<= 8
  206.                 x |= self.bytes[self.index]
  207.                 self.index += 1
  208.             return x
  209.    
  210.         def getFixBytes(self, lengthBytes):
  211.             bytes = self.bytes[self.index : self.index+lengthBytes]
  212.             self.index += lengthBytes
  213.             return bytes
  214.    
  215.         def getVarBytes(self, lengthLength):
  216.             lengthBytes = self.get(lengthLength)
  217.             return self.getFixBytes(lengthBytes)
  218.    
  219.         def getFixList(self, length, lengthList):
  220.             l = [0] * lengthList
  221.             for x in range(lengthList):
  222.                 l[x] = self.get(length)
  223.             return l
  224.    
  225.         def getVarList(self, length, lengthLength):
  226.             lengthList = self.get(lengthLength)
  227.             if lengthList % length != 0:
  228.                 raise ASN1Error("Error decoding ASN.1")
  229.             lengthList = int(lengthList/length)
  230.             l = [0] * lengthList
  231.             for x in range(lengthList):
  232.                 l[x] = self.get(length)
  233.             return l
  234.    
  235.         def startLengthCheck(self, lengthLength):
  236.             self.lengthCheck = self.get(lengthLength)
  237.             self.indexCheck = self.index
  238.    
  239.         def setLengthCheck(self, length):
  240.             self.lengthCheck = length
  241.             self.indexCheck = self.index
  242.    
  243.         def stopLengthCheck(self):
  244.             if (self.index - self.indexCheck) != self.lengthCheck:
  245.                 raise ASN1Error("Error decoding ASN.1")
  246.    
  247.         def atLengthCheck(self):
  248.             if (self.index - self.indexCheck) < self.lengthCheck:
  249.                 return False
  250.             elif (self.index - self.indexCheck) == self.lengthCheck:
  251.                 return True
  252.             else:
  253.                 raise ASN1Error("Error decoding ASN.1")
  254.  
  255.     def __init__(self, bytes):
  256.         p = self.Parser(bytes)
  257.         p.get(1)
  258.         self.length = self._getASN1Length(p)
  259.         self.value = p.getFixBytes(self.length)
  260.  
  261.     def getChild(self, which):
  262.         p = self.Parser(self.value)
  263.         for x in range(which+1):
  264.             markIndex = p.index
  265.             p.get(1)
  266.             length = self._getASN1Length(p)
  267.             p.getFixBytes(length)
  268.         return ASN1Parser(p.bytes[markIndex:p.index])
  269.  
  270.     def _getASN1Length(self, p):
  271.         firstLength = p.get(1)
  272.         if firstLength<=127:
  273.             return firstLength
  274.         else:
  275.             lengthLength = firstLength & 0x7F
  276.             return p.get(lengthLength)
  277.  
  278. ###
  279. ### PDF parsing routines from pdfminer, with changes for EBX_HANDLER
  280.  
  281. ##  Utilities
  282. ##
  283. def choplist(n, seq):
  284.     '''Groups every n elements of the list.'''
  285.     r = []
  286.     for x in seq:
  287.         r.append(x)
  288.         if len(r) == n:
  289.             yield tuple(r)
  290.             r = []
  291.     return
  292.  
  293. def nunpack(s, default=0):
  294.     '''Unpacks up to 4 bytes big endian.'''
  295.     l = len(s)
  296.     if not l:
  297.         return default
  298.     elif l == 1:
  299.         return ord(s)
  300.     elif l == 2:
  301.         return struct.unpack('>H', s)[0]
  302.     elif l == 3:
  303.         return struct.unpack('>L', '\x00'+s)[0]
  304.     elif l == 4:
  305.         return struct.unpack('>L', s)[0]
  306.     else:
  307.         return TypeError('invalid length: %d' % l)
  308.  
  309.  
  310. STRICT = 0
  311.  
  312.  
  313. ##  PS Exceptions
  314. ##
  315. class PSException(Exception): pass
  316. class PSEOF(PSException): pass
  317. class PSSyntaxError(PSException): pass
  318. class PSTypeError(PSException): pass
  319. class PSValueError(PSException): pass
  320.  
  321.  
  322. ##  Basic PostScript Types
  323. ##
  324.  
  325. # PSLiteral
  326. class PSObject(object): pass
  327.  
  328. class PSLiteral(PSObject):
  329.     '''
  330.    PS literals (e.g. "/Name").
  331.    Caution: Never create these objects directly.
  332.    Use PSLiteralTable.intern() instead.
  333.    '''
  334.     def __init__(self, name):
  335.         self.name = name
  336.         return
  337.    
  338.     def __repr__(self):
  339.         name = []
  340.         for char in self.name:
  341.             if not char.isalnum():
  342.                 char = '#%02x' % ord(char)
  343.             name.append(char)
  344.         return '/%s' % ''.join(name)
  345.  
  346. # PSKeyword
  347. class PSKeyword(PSObject):
  348.     '''
  349.    PS keywords (e.g. "showpage").
  350.    Caution: Never create these objects directly.
  351.    Use PSKeywordTable.intern() instead.
  352.    '''
  353.     def __init__(self, name):
  354.         self.name = name
  355.         return
  356.    
  357.     def __repr__(self):
  358.         return self.name
  359.  
  360. # PSSymbolTable
  361. class PSSymbolTable(object):
  362.    
  363.     '''
  364.    Symbol table that stores PSLiteral or PSKeyword.
  365.    '''
  366.    
  367.     def __init__(self, classe):
  368.         self.dic = {}
  369.         self.classe = classe
  370.         return
  371.    
  372.     def intern(self, name):
  373.         if name in self.dic:
  374.             lit = self.dic[name]
  375.         else:
  376.             lit = self.classe(name)
  377.             self.dic[name] = lit
  378.         return lit
  379.  
  380. PSLiteralTable = PSSymbolTable(PSLiteral)
  381. PSKeywordTable = PSSymbolTable(PSKeyword)
  382. LIT = PSLiteralTable.intern
  383. KWD = PSKeywordTable.intern
  384. KEYWORD_BRACE_BEGIN = KWD('{')
  385. KEYWORD_BRACE_END = KWD('}')
  386. KEYWORD_ARRAY_BEGIN = KWD('[')
  387. KEYWORD_ARRAY_END = KWD(']')
  388. KEYWORD_DICT_BEGIN = KWD('<<')
  389. KEYWORD_DICT_END = KWD('>>')
  390.  
  391.  
  392. def literal_name(x):
  393.     if not isinstance(x, PSLiteral):
  394.         if STRICT:
  395.             raise PSTypeError('Literal required: %r' % x)
  396.         else:
  397.             return str(x)
  398.     return x.name
  399.  
  400. def keyword_name(x):
  401.     if not isinstance(x, PSKeyword):
  402.         if STRICT:
  403.             raise PSTypeError('Keyword required: %r' % x)
  404.         else:
  405.             return str(x)
  406.     return x.name
  407.  
  408.  
  409. ##  PSBaseParser
  410. ##
  411. EOL = re.compile(r'[\r\n]')
  412. SPC = re.compile(r'\s')
  413. NONSPC = re.compile(r'\S')
  414. HEX = re.compile(r'[0-9a-fA-F]')
  415. END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
  416. END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
  417. HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
  418. END_NUMBER = re.compile(r'[^0-9]')
  419. END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
  420. END_STRING = re.compile(r'[()\134]')
  421. OCT_STRING = re.compile(r'[0-7]')
  422. ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
  423.  
  424. class PSBaseParser(object):
  425.  
  426.     '''
  427.    Most basic PostScript parser that performs only basic tokenization.
  428.    '''
  429.     BUFSIZ = 4096
  430.  
  431.     def __init__(self, fp):
  432.         self.fp = fp
  433.         self.seek(0)
  434.         return
  435.  
  436.     def __repr__(self):
  437.         return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
  438.  
  439.     def flush(self):
  440.         return
  441.    
  442.     def close(self):
  443.         self.flush()
  444.         return
  445.    
  446.     def tell(self):
  447.         return self.bufpos+self.charpos
  448.  
  449.     def poll(self, pos=None, n=80):
  450.         pos0 = self.fp.tell()
  451.         if not pos:
  452.             pos = self.bufpos+self.charpos
  453.         self.fp.seek(pos)
  454.         ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
  455.         self.fp.seek(pos0)
  456.         return
  457.  
  458.     def seek(self, pos):
  459.         '''
  460.        Seeks the parser to the given position.
  461.        '''
  462.         self.fp.seek(pos)
  463.         # reset the status for nextline()
  464.         self.bufpos = pos
  465.         self.buf = ''
  466.         self.charpos = 0
  467.         # reset the status for nexttoken()
  468.         self.parse1 = self.parse_main
  469.         self.tokens = []
  470.         return
  471.  
  472.     def fillbuf(self):
  473.         if self.charpos < len(self.buf): return
  474.         # fetch next chunk.
  475.         self.bufpos = self.fp.tell()
  476.         self.buf = self.fp.read(self.BUFSIZ)
  477.         if not self.buf:
  478.             raise PSEOF('Unexpected EOF')
  479.         self.charpos = 0
  480.         return
  481.    
  482.     def parse_main(self, s, i):
  483.         m = NONSPC.search(s, i)
  484.         if not m:
  485.             return (self.parse_main, len(s))
  486.         j = m.start(0)
  487.         c = s[j]
  488.         self.tokenstart = self.bufpos+j
  489.         if c == '%':
  490.             self.token = '%'
  491.             return (self.parse_comment, j+1)
  492.         if c == '/':
  493.             self.token = ''
  494.             return (self.parse_literal, j+1)
  495.         if c in '-+' or c.isdigit():
  496.             self.token = c
  497.             return (self.parse_number, j+1)
  498.         if c == '.':
  499.             self.token = c
  500.             return (self.parse_float, j+1)
  501.         if c.isalpha():
  502.             self.token = c
  503.             return (self.parse_keyword, j+1)
  504.         if c == '(':
  505.             self.token = ''
  506.             self.paren = 1
  507.             return (self.parse_string, j+1)
  508.         if c == '<':
  509.             self.token = ''
  510.             return (self.parse_wopen, j+1)
  511.         if c == '>':
  512.             self.token = ''
  513.             return (self.parse_wclose, j+1)
  514.         self.add_token(KWD(c))
  515.         return (self.parse_main, j+1)
  516.                            
  517.     def add_token(self, obj):
  518.         self.tokens.append((self.tokenstart, obj))
  519.         return
  520.    
  521.     def parse_comment(self, s, i):
  522.         m = EOL.search(s, i)
  523.         if not m:
  524.             self.token += s[i:]
  525.             return (self.parse_comment, len(s))
  526.         j = m.start(0)
  527.         self.token += s[i:j]
  528.         # We ignore comments.
  529.         #self.tokens.append(self.token)
  530.         return (self.parse_main, j)
  531.    
  532.     def parse_literal(self, s, i):
  533.         m = END_LITERAL.search(s, i)
  534.         if not m:
  535.             self.token += s[i:]
  536.             return (self.parse_literal, len(s))
  537.         j = m.start(0)
  538.         self.token += s[i:j]
  539.         c = s[j]
  540.         if c == '#':
  541.             self.hex = ''
  542.             return (self.parse_literal_hex, j+1)
  543.         self.add_token(LIT(self.token))
  544.         return (self.parse_main, j)
  545.    
  546.     def parse_literal_hex(self, s, i):
  547.         c = s[i]
  548.         if HEX.match(c) and len(self.hex) < 2:
  549.             self.hex += c
  550.             return (self.parse_literal_hex, i+1)
  551.         if self.hex:
  552.             self.token += chr(int(self.hex, 16))
  553.         return (self.parse_literal, i)
  554.  
  555.     def parse_number(self, s, i):
  556.         m = END_NUMBER.search(s, i)
  557.         if not m:
  558.             self.token += s[i:]
  559.             return (self.parse_number, len(s))
  560.         j = m.start(0)
  561.         self.token += s[i:j]
  562.         c = s[j]
  563.         if c == '.':
  564.             self.token += c
  565.             return (self.parse_float, j+1)
  566.         try:
  567.             self.add_token(int(self.token))
  568.         except ValueError:
  569.             pass
  570.         return (self.parse_main, j)
  571.     def parse_float(self, s, i):
  572.         m = END_NUMBER.search(s, i)
  573.         if not m:
  574.             self.token += s[i:]
  575.             return (self.parse_float, len(s))
  576.         j = m.start(0)
  577.         self.token += s[i:j]
  578.         self.add_token(float(self.token))
  579.         return (self.parse_main, j)
  580.    
  581.     def parse_keyword(self, s, i):
  582.         m = END_KEYWORD.search(s, i)
  583.         if not m:
  584.             self.token += s[i:]
  585.             return (self.parse_keyword, len(s))
  586.         j = m.start(0)
  587.         self.token += s[i:j]
  588.         if self.token == 'true':
  589.             token = True
  590.         elif self.token == 'false':
  591.             token = False
  592.         else:
  593.             token = KWD(self.token)
  594.         self.add_token(token)
  595.         return (self.parse_main, j)
  596.  
  597.     def parse_string(self, s, i):
  598.         m = END_STRING.search(s, i)
  599.         if not m:
  600.             self.token += s[i:]
  601.             return (self.parse_string, len(s))
  602.         j = m.start(0)
  603.         self.token += s[i:j]
  604.         c = s[j]
  605.         if c == '\\':
  606.             self.oct = ''
  607.             return (self.parse_string_1, j+1)
  608.         if c == '(':
  609.             self.paren += 1
  610.             self.token += c
  611.             return (self.parse_string, j+1)
  612.         if c == ')':
  613.             self.paren -= 1
  614.             if self.paren:
  615.                 self.token += c
  616.                 return (self.parse_string, j+1)
  617.         self.add_token(self.token)
  618.         return (self.parse_main, j+1)
  619.     def parse_string_1(self, s, i):
  620.         c = s[i]
  621.         if OCT_STRING.match(c) and len(self.oct) < 3:
  622.             self.oct += c
  623.             return (self.parse_string_1, i+1)
  624.         if self.oct:
  625.             self.token += chr(int(self.oct, 8))
  626.             return (self.parse_string, i)
  627.         if c in ESC_STRING:
  628.             self.token += chr(ESC_STRING[c])
  629.         return (self.parse_string, i+1)
  630.  
  631.     def parse_wopen(self, s, i):
  632.         c = s[i]
  633.         if c.isspace() or HEX.match(c):
  634.             return (self.parse_hexstring, i)
  635.         if c == '<':
  636.             self.add_token(KEYWORD_DICT_BEGIN)
  637.             i += 1
  638.         return (self.parse_main, i)
  639.  
  640.     def parse_wclose(self, s, i):
  641.         c = s[i]
  642.         if c == '>':
  643.             self.add_token(KEYWORD_DICT_END)
  644.             i += 1
  645.         return (self.parse_main, i)
  646.  
  647.     def parse_hexstring(self, s, i):
  648.         m = END_HEX_STRING.search(s, i)
  649.         if not m:
  650.             self.token += s[i:]
  651.             return (self.parse_hexstring, len(s))
  652.         j = m.start(0)
  653.         self.token += s[i:j]
  654.         token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
  655.                                                  SPC.sub('', self.token))
  656.         self.add_token(token)
  657.         return (self.parse_main, j)
  658.  
  659.     def nexttoken(self):
  660.         while not self.tokens:
  661.             self.fillbuf()
  662.             (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
  663.         token = self.tokens.pop(0)
  664.         return token
  665.  
  666.     def nextline(self):
  667.         '''
  668.        Fetches a next line that ends either with \\r or \\n.
  669.        '''
  670.         linebuf = ''
  671.         linepos = self.bufpos + self.charpos
  672.         eol = False
  673.         while 1:
  674.             self.fillbuf()
  675.             if eol:
  676.                 c = self.buf[self.charpos]
  677.                 # handle '\r\n'
  678.                 if c == '\n':
  679.                     linebuf += c
  680.                     self.charpos += 1
  681.                 break
  682.             m = EOL.search(self.buf, self.charpos)
  683.             if m:
  684.                 linebuf += self.buf[self.charpos:m.end(0)]
  685.                 self.charpos = m.end(0)
  686.                 if linebuf[-1] == '\r':
  687.                     eol = True
  688.                 else:
  689.                     break
  690.             else:
  691.                 linebuf += self.buf[self.charpos:]
  692.                 self.charpos = len(self.buf)
  693.         return (linepos, linebuf)
  694.  
  695.     def revreadlines(self):
  696.         '''
  697.        Fetches a next line backword. This is used to locate
  698.        the trailers at the end of a file.
  699.        '''
  700.         self.fp.seek(0, 2)
  701.         pos = self.fp.tell()
  702.         buf = ''
  703.         while 0 < pos:
  704.             prevpos = pos
  705.             pos = max(0, pos-self.BUFSIZ)
  706.             self.fp.seek(pos)
  707.             s = self.fp.read(prevpos-pos)
  708.             if not s: break
  709.             while 1:
  710.                 n = max(s.rfind('\r'), s.rfind('\n'))
  711.                 if n == -1:
  712.                     buf = s + buf
  713.                     break
  714.                 yield s[n:]+buf
  715.                 s = s[:n]
  716.                 buf = ''
  717.         return
  718.  
  719.  
  720. ##  PSStackParser
  721. ##
  722. class PSStackParser(PSBaseParser):
  723.  
  724.     def __init__(self, fp):
  725.         PSBaseParser.__init__(self, fp)
  726.         self.reset()
  727.         return
  728.    
  729.     def reset(self):
  730.         self.context = []
  731.         self.curtype = None
  732.         self.curstack = []
  733.         self.results = []
  734.         return
  735.  
  736.     def seek(self, pos):
  737.         PSBaseParser.seek(self, pos)
  738.         self.reset()
  739.         return
  740.  
  741.     def push(self, *objs):
  742.         self.curstack.extend(objs)
  743.         return
  744.     def pop(self, n):
  745.         objs = self.curstack[-n:]
  746.         self.curstack[-n:] = []
  747.         return objs
  748.     def popall(self):
  749.         objs = self.curstack
  750.         self.curstack = []
  751.         return objs
  752.     def add_results(self, *objs):
  753.         self.results.extend(objs)
  754.         return
  755.  
  756.     def start_type(self, pos, type):
  757.         self.context.append((pos, self.curtype, self.curstack))
  758.         (self.curtype, self.curstack) = (type, [])
  759.         return
  760.     def end_type(self, type):
  761.         if self.curtype != type:
  762.             raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
  763.         objs = [ obj for (_,obj) in self.curstack ]
  764.         (pos, self.curtype, self.curstack) = self.context.pop()
  765.         return (pos, objs)
  766.  
  767.     def do_keyword(self, pos, token):
  768.         return
  769.    
  770.     def nextobject(self, direct=False):
  771.         '''
  772.        Yields a list of objects: keywords, literals, strings,
  773.        numbers, arrays and dictionaries. Arrays and dictionaries
  774.        are represented as Python sequence and dictionaries.
  775.        '''
  776.         while not self.results:
  777.             (pos, token) = self.nexttoken()
  778.             ##print (pos,token), (self.curtype, self.curstack)
  779.             if (isinstance(token, int) or
  780.                     isinstance(token, float) or
  781.                     isinstance(token, bool) or
  782.                     isinstance(token, str) or
  783.                     isinstance(token, PSLiteral)):
  784.                 # normal token
  785.                 self.push((pos, token))
  786.             elif token == KEYWORD_ARRAY_BEGIN:
  787.                 # begin array
  788.                 self.start_type(pos, 'a')
  789.             elif token == KEYWORD_ARRAY_END:
  790.                 # end array
  791.                 try:
  792.                     self.push(self.end_type('a'))
  793.                 except PSTypeError:
  794.                     if STRICT: raise
  795.             elif token == KEYWORD_DICT_BEGIN:
  796.                 # begin dictionary
  797.                 self.start_type(pos, 'd')
  798.             elif token == KEYWORD_DICT_END:
  799.                 # end dictionary
  800.                 try:
  801.                     (pos, objs) = self.end_type('d')
  802.                     if len(objs) % 2 != 0:
  803.                         raise PSSyntaxError(
  804.                             'Invalid dictionary construct: %r' % objs)
  805.                     d = dict((literal_name(k), v) \
  806.                                  for (k,v) in choplist(2, objs))
  807.                     self.push((pos, d))
  808.                 except PSTypeError:
  809.                     if STRICT: raise
  810.             else:
  811.                 self.do_keyword(pos, token)
  812.             if self.context:
  813.                 continue
  814.             else:
  815.                 if direct:
  816.                     return self.pop(1)[0]
  817.                 self.flush()
  818.         obj = self.results.pop(0)
  819.         return obj
  820.  
  821.  
  822. LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
  823. LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
  824. LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
  825. LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
  826.  
  827.  
  828. ##  PDF Objects
  829. ##
  830. class PDFObject(PSObject): pass
  831.  
  832. class PDFException(PSException): pass
  833. class PDFTypeError(PDFException): pass
  834. class PDFValueError(PDFException): pass
  835. class PDFNotImplementedError(PSException): pass
  836.  
  837.  
  838. ##  PDFObjRef
  839. ##
  840. class PDFObjRef(PDFObject):
  841.    
  842.     def __init__(self, doc, objid, genno):
  843.         if objid == 0:
  844.             if STRICT:
  845.                 raise PDFValueError('PDF object id cannot be 0.')
  846.         self.doc = doc
  847.         self.objid = objid
  848.         self.genno = genno
  849.         return
  850.  
  851.     def __repr__(self):
  852.         return '<PDFObjRef:%d %d>' % (self.objid, self.genno)
  853.  
  854.     def resolve(self):
  855.         return self.doc.getobj(self.objid)
  856.  
  857.  
  858. # resolve
  859. def resolve1(x):
  860.     '''
  861.    Resolve an object. If this is an array or dictionary,
  862.    it may still contains some indirect objects inside.
  863.    '''
  864.     while isinstance(x, PDFObjRef):
  865.         x = x.resolve()
  866.     return x
  867.  
  868. def resolve_all(x):
  869.     '''
  870.    Recursively resolve X and all the internals.
  871.    Make sure there is no indirect reference within the nested object.
  872.    This procedure might be slow.
  873.    '''
  874.     while isinstance(x, PDFObjRef):
  875.         x = x.resolve()
  876.     if isinstance(x, list):
  877.         x = [ resolve_all(v) for v in x ]
  878.     elif isinstance(x, dict):
  879.         for (k,v) in x.iteritems():
  880.             x[k] = resolve_all(v)
  881.     return x
  882.  
  883. def decipher_all(decipher, objid, genno, x):
  884.     '''
  885.    Recursively decipher X.
  886.    '''
  887.     if isinstance(x, str):
  888.         return decipher(objid, genno, x)
  889.     decf = lambda v: decipher_all(decipher, objid, genno, v)
  890.     if isinstance(x, list):
  891.         x = [decf(v) for v in x]
  892.     elif isinstance(x, dict):
  893.         x = dict((k, decf(v)) for (k, v) in x.iteritems())
  894.     return x
  895.  
  896.  
  897. # Type cheking
  898. def int_value(x):
  899.     x = resolve1(x)
  900.     if not isinstance(x, int):
  901.         if STRICT:
  902.             raise PDFTypeError('Integer required: %r' % x)
  903.         return 0
  904.     return x
  905.  
  906. def float_value(x):
  907.     x = resolve1(x)
  908.     if not isinstance(x, float):
  909.         if STRICT:
  910.             raise PDFTypeError('Float required: %r' % x)
  911.         return 0.0
  912.     return x
  913.  
  914. def num_value(x):
  915.     x = resolve1(x)
  916.     if not (isinstance(x, int) or isinstance(x, float)):
  917.         if STRICT:
  918.             raise PDFTypeError('Int or Float required: %r' % x)
  919.         return 0
  920.     return x
  921.  
  922. def str_value(x):
  923.     x = resolve1(x)
  924.     if not isinstance(x, str):
  925.         if STRICT:
  926.             raise PDFTypeError('String required: %r' % x)
  927.         return ''
  928.     return x
  929.  
  930. def list_value(x):
  931.     x = resolve1(x)
  932.     if not (isinstance(x, list) or isinstance(x, tuple)):
  933.         if STRICT:
  934.             raise PDFTypeError('List required: %r' % x)
  935.         return []
  936.     return x
  937.  
  938. def dict_value(x):
  939.     x = resolve1(x)
  940.     if not isinstance(x, dict):
  941.         if STRICT:
  942.             raise PDFTypeError('Dict required: %r' % x)
  943.         return {}
  944.     return x
  945.  
  946. def stream_value(x):
  947.     x = resolve1(x)
  948.     if not isinstance(x, PDFStream):
  949.         if STRICT:
  950.             raise PDFTypeError('PDFStream required: %r' % x)
  951.         return PDFStream({}, '')
  952.     return x
  953.  
  954. # ascii85decode(data)
  955. def ascii85decode(data):
  956.   n = b = 0
  957.   out = ''
  958.   for c in data:
  959.     if '!' <= c and c <= 'u':
  960.       n += 1
  961.       b = b*85+(ord(c)-33)
  962.       if n == 5:
  963.         out += struct.pack('>L',b)
  964.         n = b = 0
  965.     elif c == 'z':
  966.       assert n == 0
  967.       out += '\0\0\0\0'
  968.     elif c == '~':
  969.       if n:
  970.         for _ in range(5-n):
  971.           b = b*85+84
  972.         out += struct.pack('>L',b)[:n-1]
  973.       break
  974.   return out
  975.  
  976.  
  977. ##  PDFStream type
  978. class PDFStream(PDFObject):
  979.     def __init__(self, dic, rawdata, decipher=None):
  980.         length = int_value(dic.get('Length', 0))
  981.         eol = rawdata[length:]
  982.         # quick and dirty fix for false length attribute,
  983.         # might not work if the pdf stream parser has a problem
  984.         if decipher != None and decipher.__name__ == 'decrypt_aes':
  985.             if (len(rawdata) % 16) != 0:
  986.                 cutdiv = len(rawdata) // 16
  987.                 rawdata = rawdata[:16*cutdiv]
  988.         else:
  989.             if eol in ('\r', '\n', '\r\n'):
  990.                 rawdata = rawdata[:length]
  991.                
  992.         self.dic = dic
  993.         self.rawdata = rawdata
  994.         self.decipher = decipher
  995.         self.data = None
  996.         self.decdata = None
  997.         self.objid = None
  998.         self.genno = None
  999.         return
  1000.  
  1001.     def set_objid(self, objid, genno):
  1002.         self.objid = objid
  1003.         self.genno = genno
  1004.         return
  1005.    
  1006.     def __repr__(self):
  1007.         if self.rawdata:
  1008.             return '<PDFStream(%r): raw=%d, %r>' % \
  1009.                    (self.objid, len(self.rawdata), self.dic)
  1010.         else:
  1011.             return '<PDFStream(%r): data=%d, %r>' % \
  1012.                    (self.objid, len(self.data), self.dic)
  1013.  
  1014.     def decode(self):
  1015.         assert self.data is None and self.rawdata is not None
  1016.         data = self.rawdata
  1017.         if self.decipher:
  1018.             # Handle encryption
  1019.             data = self.decipher(self.objid, self.genno, data)
  1020.             if gen_xref_stm:
  1021.                 self.decdata = data # keep decrypted data
  1022.         if 'Filter' not in self.dic:
  1023.             self.data = data
  1024.             self.rawdata = None
  1025.             ##print self.dict
  1026.             return
  1027.         filters = self.dic['Filter']
  1028.         if not isinstance(filters, list):
  1029.             filters = [ filters ]
  1030.         for f in filters:
  1031.             if f in LITERALS_FLATE_DECODE:
  1032.                 # will get errors if the document is encrypted.
  1033.                 data = zlib.decompress(data)
  1034.             elif f in LITERALS_LZW_DECODE:
  1035.                 data = ''.join(LZWDecoder(StringIO(data)).run())
  1036.             elif f in LITERALS_ASCII85_DECODE:
  1037.                 data = ascii85decode(data)
  1038.             elif f == LITERAL_CRYPT:
  1039.                 raise PDFNotImplementedError('/Crypt filter is unsupported')
  1040.             else:
  1041.                 raise PDFNotImplementedError('Unsupported filter: %r' % f)
  1042.             # apply predictors
  1043.             if 'DP' in self.dic:
  1044.                 params = self.dic['DP']
  1045.             else:
  1046.                 params = self.dic.get('DecodeParms', {})
  1047.             if 'Predictor' in params:
  1048.                 pred = int_value(params['Predictor'])
  1049.                 if pred:
  1050.                     if pred != 12:
  1051.                         raise PDFNotImplementedError(
  1052.                             'Unsupported predictor: %r' % pred)
  1053.                     if 'Columns' not in params:
  1054.                         raise PDFValueError(
  1055.                             'Columns undefined for predictor=12')
  1056.                     columns = int_value(params['Columns'])
  1057.                     buf = ''
  1058.                     ent0 = '\x00' * columns
  1059.                     for i in xrange(0, len(data), columns+1):
  1060.                         pred = data[i]
  1061.                         ent1 = data[i+1:i+1+columns]
  1062.                         if pred == '\x02':
  1063.                             ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
  1064.                                                for (a,b) in zip(ent0,ent1))
  1065.                         buf += ent1
  1066.                         ent0 = ent1
  1067.                     data = buf
  1068.         self.data = data
  1069.         self.rawdata = None
  1070.         return
  1071.  
  1072.     def get_data(self):
  1073.         if self.data is None:
  1074.             self.decode()
  1075.         return self.data
  1076.  
  1077.     def get_rawdata(self):
  1078.         return self.rawdata
  1079.  
  1080.     def get_decdata(self):
  1081.         if self.decdata is not None:
  1082.             return self.decdata
  1083.         data = self.rawdata
  1084.         if self.decipher and data:
  1085.             # Handle encryption
  1086.             data = self.decipher(self.objid, self.genno, data)
  1087.         return data
  1088.  
  1089.        
  1090. ##  PDF Exceptions
  1091. ##
  1092. class PDFSyntaxError(PDFException): pass
  1093. class PDFNoValidXRef(PDFSyntaxError): pass
  1094. class PDFEncryptionError(PDFException): pass
  1095. class PDFPasswordIncorrect(PDFEncryptionError): pass
  1096.  
  1097. # some predefined literals and keywords.
  1098. LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
  1099. LITERAL_XREF = PSLiteralTable.intern('XRef')
  1100. LITERAL_PAGE = PSLiteralTable.intern('Page')
  1101. LITERAL_PAGES = PSLiteralTable.intern('Pages')
  1102. LITERAL_CATALOG = PSLiteralTable.intern('Catalog')
  1103.  
  1104.  
  1105. ##  XRefs
  1106. ##
  1107.  
  1108. ##  PDFXRef
  1109. ##
  1110. class PDFXRef(object):
  1111.  
  1112.     def __init__(self):
  1113.         self.offsets = None
  1114.         return
  1115.  
  1116.     def __repr__(self):
  1117.         return '<PDFXRef: objs=%d>' % len(self.offsets)
  1118.  
  1119.     def objids(self):
  1120.         return self.offsets.iterkeys()
  1121.  
  1122.     def load(self, parser):
  1123.         self.offsets = {}
  1124.         while 1:
  1125.             try:
  1126.                 (pos, line) = parser.nextline()
  1127.             except PSEOF:
  1128.                 raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
  1129.             if not line:
  1130.                 raise PDFNoValidXRef('Premature eof: %r' % parser)
  1131.             if line.startswith('trailer'):
  1132.                 parser.seek(pos)
  1133.                 break
  1134.             f = line.strip().split(' ')
  1135.             if len(f) != 2:
  1136.                 raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
  1137.             try:
  1138.                 (start, nobjs) = map(int, f)
  1139.             except ValueError:
  1140.                 raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
  1141.             for objid in xrange(start, start+nobjs):
  1142.                 try:
  1143.                     (_, line) = parser.nextline()
  1144.                 except PSEOF:
  1145.                     raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
  1146.                 f = line.strip().split(' ')
  1147.                 if len(f) != 3:
  1148.                     raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
  1149.                 (pos, genno, use) = f
  1150.                 if use != 'n': continue
  1151.                 self.offsets[objid] = (int(genno), int(pos))
  1152.         self.load_trailer(parser)
  1153.         return
  1154.    
  1155.     KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
  1156.     def load_trailer(self, parser):
  1157.         try:
  1158.             (_,kwd) = parser.nexttoken()
  1159.             assert kwd is self.KEYWORD_TRAILER
  1160.             (_,dic) = parser.nextobject(direct=True)
  1161.         except PSEOF:
  1162.             x = parser.pop(1)
  1163.             if not x:
  1164.                 raise PDFNoValidXRef('Unexpected EOF - file corrupted')
  1165.             (_,dic) = x[0]
  1166.         self.trailer = dict_value(dic)
  1167.         return
  1168.  
  1169.     def getpos(self, objid):
  1170.         try:
  1171.             (genno, pos) = self.offsets[objid]
  1172.         except KeyError:
  1173.             raise
  1174.         return (None, pos)
  1175.  
  1176.  
  1177. ##  PDFXRefStream
  1178. ##
  1179. class PDFXRefStream(object):
  1180.  
  1181.     def __init__(self):
  1182.         self.index = None
  1183.         self.data = None
  1184.         self.entlen = None
  1185.         self.fl1 = self.fl2 = self.fl3 = None
  1186.         return
  1187.  
  1188.     def __repr__(self):
  1189.         return '<PDFXRef: objids=%s>' % self.index
  1190.  
  1191.     def objids(self):
  1192.         for first, size in self.index:
  1193.             for objid in xrange(first, first + size):
  1194.                 yield objid
  1195.    
  1196.     def load(self, parser, debug=0):
  1197.         (_,objid) = parser.nexttoken() # ignored
  1198.         (_,genno) = parser.nexttoken() # ignored
  1199.         (_,kwd) = parser.nexttoken()
  1200.         (_,stream) = parser.nextobject()
  1201.         if not isinstance(stream, PDFStream) or \
  1202.            stream.dic['Type'] is not LITERAL_XREF:
  1203.             raise PDFNoValidXRef('Invalid PDF stream spec.')
  1204.         size = stream.dic['Size']
  1205.         index = stream.dic.get('Index', (0,size))
  1206.         self.index = zip(islice(index, 0, None, 2),
  1207.                          islice(index, 1, None, 2))
  1208.         (self.fl1, self.fl2, self.fl3) = stream.dic['W']
  1209.         self.data = stream.get_data()
  1210.         self.entlen = self.fl1+self.fl2+self.fl3
  1211.         self.trailer = stream.dic
  1212.         return
  1213.    
  1214.     def getpos(self, objid):
  1215.         offset = 0
  1216.         for first, size in self.index:
  1217.             if first <= objid  and objid < (first + size):
  1218.                 break
  1219.             offset += size
  1220.         else:
  1221.             raise KeyError(objid)
  1222.         i = self.entlen * ((objid - first) + offset)
  1223.         ent = self.data[i:i+self.entlen]
  1224.         f1 = nunpack(ent[:self.fl1], 1)
  1225.         if f1 == 1:
  1226.             pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
  1227.             genno = nunpack(ent[self.fl1+self.fl2:])
  1228.             return (None, pos)
  1229.         elif f1 == 2:
  1230.             objid = nunpack(ent[self.fl1:self.fl1+self.fl2])
  1231.             index = nunpack(ent[self.fl1+self.fl2:])
  1232.             return (objid, index)
  1233.         # this is a free object
  1234.         raise KeyError(objid)
  1235.  
  1236.  
  1237. ##  PDFDocument
  1238. ##
  1239. ##  A PDFDocument object represents a PDF document.
  1240. ##  Since a PDF file is usually pretty big, normally it is not loaded
  1241. ##  at once. Rather it is parsed dynamically as processing goes.
  1242. ##  A PDF parser is associated with the document.
  1243. ##
  1244. class PDFDocument(object):
  1245.  
  1246.     def __init__(self):
  1247.         self.xrefs = []
  1248.         self.objs = {}
  1249.         self.parsed_objs = {}
  1250.         self.root = None
  1251.         self.catalog = None
  1252.         self.parser = None
  1253.         self.encryption = None
  1254.         self.decipher = None
  1255.         # dictionaries for fileopen
  1256.         self.fileopen = {}
  1257.         self.urlresult = {}        
  1258.         self.ready = False
  1259.         return
  1260.  
  1261.     # set_parser(parser)
  1262.     #   Associates the document with an (already initialized) parser object.
  1263.     def set_parser(self, parser):
  1264.         if self.parser: return
  1265.         self.parser = parser
  1266.         # The document is set to be temporarily ready during collecting
  1267.         # all the basic information about the document, e.g.
  1268.         # the header, the encryption information, and the access rights
  1269.         # for the document.
  1270.         self.ready = True
  1271.         # Retrieve the information of each header that was appended
  1272.         # (maybe multiple times) at the end of the document.
  1273.         self.xrefs = parser.read_xref()
  1274.         for xref in self.xrefs:
  1275.             trailer = xref.trailer
  1276.             if not trailer: continue
  1277.  
  1278.             # If there's an encryption info, remember it.
  1279.             if 'Encrypt' in trailer:
  1280.                 #assert not self.encryption
  1281.                 try:
  1282.                     self.encryption = (list_value(trailer['ID']),
  1283.                                    dict_value(trailer['Encrypt']))
  1284.                 # fix for bad files
  1285.                 except:
  1286.                     self.encryption = ('ffffffffffffffffffffffffffffffffffff',
  1287.                                        dict_value(trailer['Encrypt']))
  1288.             if 'Root' in trailer:
  1289.                 self.set_root(dict_value(trailer['Root']))
  1290.                 break
  1291.         else:
  1292.             raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
  1293.         # The document is set to be non-ready again, until all the
  1294.         # proper initialization (asking the password key and
  1295.         # verifying the access permission, so on) is finished.
  1296.         self.ready = False
  1297.         return
  1298.  
  1299.     # set_root(root)
  1300.     #   Set the Root dictionary of the document.
  1301.     #   Each PDF file must have exactly one /Root dictionary.
  1302.     def set_root(self, root):
  1303.         self.root = root
  1304.         self.catalog = dict_value(self.root)
  1305.         if self.catalog.get('Type') is not LITERAL_CATALOG:
  1306.             if STRICT:
  1307.                 raise PDFSyntaxError('Catalog not found!')
  1308.         return
  1309.     # initialize(password='')
  1310.     #   Perform the initialization with a given password.
  1311.     #   This step is mandatory even if there's no password associated
  1312.     #   with the document.
  1313.     def initialize(self, password=''):
  1314.         if not self.encryption:
  1315.             self.is_printable = self.is_modifiable = self.is_extractable = True
  1316.             self.ready = True
  1317.             return
  1318.         (docid, param) = self.encryption
  1319.         type = literal_name(param['Filter'])
  1320.         if type == 'Adobe.APS':
  1321.             return self.initialize_adobe_ps(password, docid, param)
  1322.         if type == 'Standard':
  1323.             return self.initialize_standard(password, docid, param)
  1324.         if type == 'EBX_HANDLER':
  1325.             return self.initialize_ebx(password, docid, param)
  1326.         if type == 'FOPN_fLock':
  1327.             # remove of unnecessairy password attribute
  1328.             return self.initialize_fopn_flock(docid, param)  
  1329.         if type == 'FOPN_foweb':
  1330.             # remove of unnecessairy password attribute
  1331.             return self.initialize_fopn(docid, param)
  1332.         raise PDFEncryptionError('Unknown filter: param=%r' % param)
  1333.  
  1334.     def initialize_adobe_ps(self, password, docid, param):
  1335.         global KEYFILEPATH
  1336.         self.decrypt_key = self.genkey_adobe_ps(param)
  1337.         self.genkey = self.genkey_v4
  1338.         self.decipher = self.decrypt_aes
  1339.         self.ready = True
  1340.         return
  1341.    
  1342.     def getPrincipalKey(self, k=None, url=None, referer=None):
  1343.             if url == None:
  1344.                     url="ssl://edc.bibliothek-digital.de/edcws/services/urn:EDCLicenseService"
  1345.             data1='<?xml version="1.0" encoding="UTF-8"?><SOAP-ENV:Envelope xmlns:SO'+\
  1346.             'AP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http'+\
  1347.             '://schemas.xmlsoap.org/soap/encoding/" xmlns:xsi="http://www.w3.org/2001/'+\
  1348.             'XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:tns1="'+\
  1349.             'http://edc.adobe.com/edcwebservice" xmlns:impl="http://localhost:8080/axis/s'+\
  1350.             'ervices/urn:EDCLicenseService" xmlns:ns2="http://common.edc.adobe.com" xmlns:ns1="'+\
  1351.             'http://ns.adobe.com/PolicyServer/ws"><SOAP-ENV:Header><EDCSecurity>&lt;wsse:Security '+\
  1352.             'xmlns:wsse="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-'+\
  1353.             '1.0.xsd"&gt;&lt;wsse:UsernameToken&gt;&lt;wsse:Username&gt;edc_anonymous&lt;/wsse:Username&'+\
  1354.             'gt;&lt;wsse:Password Type="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-username-'+\
  1355.             'token-profile-1.0#PasswordText"&gt;edc_anonymous&lt;/wsse:Password&gt;&lt;/wsse:UsernameToken&'+\
  1356.             'gt;&lt;/wsse:Security&gt;</EDCSecurity><Version>7</Version><Locale>de-de</Locale></SOAP-ENV:Header>'+\
  1357.             '<SOAP-ENV:Body><impl:synchronize><SynchronizationRequest><firstTime>1</firstTime><licenseSeqNum>0</'+\
  1358.             'licenseSeqNum><policySeqNum>1</policySeqNum><revocationSeqNum>0</revocationSeqNum><'+\
  1359.             'watermarkTemplateSeqNum>0</watermarkTemplateSeqNum></SynchronizationRequest></'+\
  1360.             'impl:synchronize></SOAP-ENV:Body></SOAP-ENV:Envelope>'
  1361.             if k not in url[:40]:
  1362.                 return None
  1363.             #~ extract host and path:
  1364.             host=re.compile(r'[a-zA-Z]://([^/]+)/.+', re.I).search(url).group(1)
  1365.             urlpath=re.compile(r'[a-zA-Z]://[^/]+(/.+)', re.I).search(url).group(1)
  1366.            
  1367.             # open a socket connection on port 80
  1368.  
  1369.             conn = httplib.HTTPSConnection(host, 443)
  1370.            
  1371.             #~ Headers for request
  1372.             headers={"Accept": "*/*", "Host": host, "User-Agent": "Mozilla/3.0 (compatible; Acrobat EDC SOAP 1.0)",
  1373.                      "Content-Type": "text/xml; charset=utf-8", "Cache-Control": "no-cache", "SOAPAction": ""}
  1374.            
  1375.             # send data1 and headers
  1376.             try:
  1377.                     conn.request("POST", urlpath, data1, headers)
  1378.             except:
  1379.                     raise ADEPTError("Could not post request to '"+host+"'.")
  1380.            
  1381.             # read respose
  1382.             try:
  1383.                     response = conn.getresponse()
  1384.                     responsedata=response.read()
  1385.             except:
  1386.                     raise ADEPTError("Could not read response from '"+host+"'.")
  1387.            
  1388.             # close connection
  1389.             conn.close()
  1390.            
  1391.             try:
  1392.                     key=re.compile(r'PricipalKey"((?!<key>).)*<key[^>]*>(((?!</key>).)*)</key>', re.I).search(responsedata).group(2)
  1393.            
  1394.             except :
  1395.                     key=None
  1396.             return key
  1397.  
  1398.     def genkey_adobe_ps(self, param):
  1399.         # nice little offline principal keys dictionary
  1400.         principalkeys = { 'bibliothek-digital.de': 'Dzqx8McQUNd2CDzBVmtnweUxVWlqJTMqyYtiDIc4dZI='.decode('base64')}
  1401.         for k, v in principalkeys.iteritems():
  1402.             result = self.getPrincipalKey(k)
  1403.             #print result
  1404.             if result != None:
  1405.                 principalkeys[k] = result.decode('base64')
  1406.             else:
  1407.                 raise ADEPTError("No (Online) PrincipalKey found.")
  1408.                
  1409.         self.is_printable = self.is_modifiable = self.is_extractable = True
  1410. ##        print 'keyvalue'
  1411. ##        print len(keyvalue)
  1412. ##        print keyvalue.encode('hex')
  1413.         length = int_value(param.get('Length', 0)) / 8
  1414.         edcdata = str_value(param.get('EDCData')).decode('base64')
  1415.         pdrllic = str_value(param.get('PDRLLic')).decode('base64')
  1416.         pdrlpol = str_value(param.get('PDRLPol')).decode('base64')          
  1417.         #print 'ecd rights'
  1418.         edclist = []
  1419.         for pair in edcdata.split('\n'):
  1420.             edclist.append(pair)
  1421. ##        print edclist
  1422. ##        print 'edcdata decrypted'
  1423. ##        print edclist[0].decode('base64').encode('hex')
  1424. ##        print edclist[1].decode('base64').encode('hex')
  1425. ##        print edclist[2].decode('base64').encode('hex')
  1426. ##        print edclist[3].decode('base64').encode('hex')
  1427. ##        print 'offlinekey'
  1428. ##        print len(edclist[9].decode('base64'))
  1429. ##        print pdrllic
  1430.         # principal key request
  1431.         for key in principalkeys:
  1432.             if key in pdrllic:
  1433.                 principalkey = principalkeys[key]
  1434.             else:
  1435.                 raise ADEPTError('Cannot find principal key for this pdf')
  1436. ##        print 'minorversion'
  1437. ##        print int(edclist[8])
  1438.         # fix for minor version
  1439. ##        minorversion = int(edclist[8]) - 100
  1440. ##        if minorversion < 1:
  1441. ##            minorversion = 1
  1442. ##        print int(minorversion)
  1443.         shakey = SHA256.new()
  1444.         shakey.update(principalkey)
  1445. ##        for i in range(0,minorversion):
  1446. ##            shakey.update(principalkey)
  1447.         shakey = shakey.digest()
  1448. ##        shakey = SHA256.new(principalkey).digest()
  1449.         ivector = 16 * chr(0)
  1450.         #print shakey
  1451.         plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64'))
  1452.         if plaintext[-16:] != 16 * chr(16):
  1453.             raise ADEPTError('Offlinekey cannot be decrypted, aborting (hint: redownload pdf) ...')
  1454.         pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol)
  1455.         if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16:
  1456.             raise ADEPTError('Could not decrypt PDRLPol, aborting ...')
  1457.         else:
  1458.             cutter = -1 * ord(pdrlpol[-1])
  1459.             #print cutter
  1460.             pdrlpol = pdrlpol[:cutter]            
  1461.         #print plaintext.encode('hex')
  1462.         #print 'pdrlpol'
  1463.         #print pdrlpol
  1464.         return plaintext[:16]
  1465.    
  1466.     PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
  1467.                        '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
  1468.     # experimental aes pw support
  1469.     def initialize_standard(self, password, docid, param):
  1470.         # copy from a global variable
  1471.         V = int_value(param.get('V', 0))
  1472.         if (V <=0 or V > 4):
  1473.             raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
  1474.         length = int_value(param.get('Length', 40)) # Key length (bits)
  1475.         O = str_value(param['O'])
  1476.         R = int_value(param['R']) # Revision
  1477.         if 5 <= R:
  1478.             raise PDFEncryptionError('Unknown revision: %r' % R)
  1479.         U = str_value(param['U'])
  1480.         P = int_value(param['P'])
  1481.         try:
  1482.             EncMetadata = str_value(param['EncryptMetadata'])
  1483.         except:
  1484.             EncMetadata = 'True'
  1485.         self.is_printable = bool(P & 4)        
  1486.         self.is_modifiable = bool(P & 8)
  1487.         self.is_extractable = bool(P & 16)
  1488.         self.is_annotationable = bool(P & 32)
  1489.         self.is_formsenabled = bool(P & 256)
  1490.         self.is_textextractable = bool(P & 512)
  1491.         self.is_assemblable = bool(P & 1024)
  1492.         self.is_formprintable = bool(P & 2048)
  1493.         # Algorithm 3.2
  1494.         password = (password+self.PASSWORD_PADDING)[:32] # 1
  1495.         hash = hashlib.md5(password) # 2
  1496.         hash.update(O) # 3
  1497.         hash.update(struct.pack('<l', P)) # 4
  1498.         hash.update(docid[0]) # 5
  1499.         # aes special handling if metadata isn't encrypted
  1500.         if EncMetadata == ('False' or 'false'):
  1501.             hash.update('ffffffff'.decode('hex'))
  1502.             # 6
  1503. ##            raise PDFNotImplementedError(
  1504. ##                'Revision 4 encryption is currently unsupported')
  1505.         if 5 <= R:
  1506.             # 8
  1507.             for _ in xrange(50):
  1508.                 hash = hashlib.md5(hash.digest()[:length/8])
  1509.         key = hash.digest()[:length/8]
  1510.         if R == 2:
  1511.             # Algorithm 3.4
  1512.             u1 = ARC4.new(key).decrypt(password)
  1513.         elif R >= 3:
  1514.             # Algorithm 3.5
  1515.             hash = hashlib.md5(self.PASSWORD_PADDING) # 2
  1516.             hash.update(docid[0]) # 3
  1517.             x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
  1518.             for i in xrange(1,19+1):
  1519.                 k = ''.join( chr(ord(c) ^ i) for c in key )
  1520.                 x = ARC4.new(k).decrypt(x)
  1521.             u1 = x+x # 32bytes total
  1522.         if R == 2:
  1523.             is_authenticated = (u1 == U)
  1524.         else:
  1525.             is_authenticated = (u1[:16] == U[:16])
  1526.         if not is_authenticated:
  1527.             raise ADEPTError('Password is not correct.')
  1528. ##            raise PDFPasswordIncorrect
  1529.         self.decrypt_key = key
  1530.         # genkey method
  1531.         if V == 1 or V == 2:
  1532.             self.genkey = self.genkey_v2
  1533.         elif V == 3:
  1534.             self.genkey = self.genkey_v3
  1535.         elif V == 4:
  1536.             self.genkey = self.genkey_v2
  1537.          #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
  1538.         # rc4
  1539.         if V != 4:
  1540.             self.decipher = self.decipher_rc4  # XXX may be AES
  1541.         # aes
  1542.         elif V == 4 and Length == 128:
  1543.             elf.decipher = self.decipher_aes
  1544.         elif V == 4 and Length == 256:
  1545.             raise PDFNotImplementedError('AES256 encryption is currently unsupported')
  1546.         self.ready = True
  1547.         return
  1548.  
  1549.     def initialize_ebx(self, password, docid, param):
  1550.         global KEYFILEPATH
  1551.         self.is_printable = self.is_modifiable = self.is_extractable = True
  1552.         # keyfile path is wrong
  1553.         if KEYFILEPATH == False:
  1554.             errortext = 'Cannot find adeptkey.der keyfile. Use ineptkey to generate it.'
  1555.             raise ADEPTError(errortext)
  1556.         with open(password, 'rb') as f:
  1557.             keyder = f.read()
  1558.         #    KEYFILEPATH = ''
  1559.         key = ASN1Parser([ord(x) for x in keyder])
  1560.         key = [bytesToNumber(key.getChild(x).value) for x in xrange(1, 4)]
  1561.         rsa = RSA.construct(key)
  1562.         length = int_value(param.get('Length', 0)) / 8
  1563.         rights = str_value(param.get('ADEPT_LICENSE')).decode('base64')
  1564.         rights = zlib.decompress(rights, -15)
  1565.         rights = etree.fromstring(rights)
  1566.         expr = './/{http://ns.adobe.com/adept}encryptedKey'
  1567.         bookkey = ''.join(rights.findtext(expr)).decode('base64')
  1568.         bookkey = rsa.decrypt(bookkey)
  1569.         if bookkey[0] != '\x02':
  1570.             raise ADEPTError('error decrypting book session key')
  1571.         index = bookkey.index('\0') + 1
  1572.         bookkey = bookkey[index:]
  1573.         ebx_V = int_value(param.get('V', 4))
  1574.         ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
  1575.         # added because of the booktype / decryption book session key error
  1576.         if ebx_V == 3:
  1577.             V = 3        
  1578.         elif ebx_V < 4 or ebx_type < 6:
  1579.             V = ord(bookkey[0])
  1580.             bookkey = bookkey[1:]
  1581.         else:
  1582.             V = 2
  1583.         if length and len(bookkey) != length:
  1584.             raise ADEPTError('error decrypting book session key')
  1585.         self.decrypt_key = bookkey
  1586.         self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
  1587.         self.decipher = self.decrypt_rc4
  1588.         self.ready = True
  1589.         return
  1590.  
  1591.     # fileopen support    
  1592.     def initialize_fopn_flock(self, docid, param):
  1593.         raise ADEPTError('FOPN_fLock not supported, yet ...')
  1594.         # debug mode processing
  1595.         global DEBUG_MODE
  1596.         global IVERSION
  1597.         if DEBUG_MODE == True:
  1598.             if os.access('.',os.W_OK) == True:
  1599.                 debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w')
  1600.             else:
  1601.                 raise ADEPTError('Cannot write debug file, current directory is not writable')
  1602.         self.is_printable = self.is_modifiable = self.is_extractable = True
  1603.         # get parameters and add it to the fo dictionary
  1604.         self.fileopen['V'] = int_value(param.get('V',2))        
  1605.         # crypt base
  1606.         (docid, param) = self.encryption
  1607.         #rights = dict_value(param['Info'])
  1608.         rights = param['Info']        
  1609.         #print rights
  1610.         if DEBUG_MODE == True: debugfile.write(rights + '\n\n')
  1611. ##        for pair in rights.split(';'):
  1612. ##            try:
  1613. ##                key, value = pair.split('=',1)
  1614. ##                self.fileopen[key] = value
  1615. ##            # fix for some misconfigured INFO variables
  1616. ##            except:
  1617. ##                pass
  1618. ##        kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \
  1619. ##                  'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'}
  1620. ##        for keys in  kattr:
  1621. ##            try:
  1622. ##                self.fileopen[kattr[keys]] = self.fileopen[keys]
  1623. ##                del self.fileopen[keys]
  1624. ##            except:
  1625. ##                continue
  1626.         # differentiate OS types
  1627. ##        sysplatform = sys.platform
  1628. ##        # if ostype is Windows
  1629. ##        if sysplatform=='win32':
  1630. ##            self.osuseragent = 'Windows NT 6.0'
  1631. ##            self.get_macaddress = self.get_win_macaddress
  1632. ##            self.fo_sethwids = self.fo_win_sethwids
  1633. ##            self.BrowserCookie = WinBrowserCookie
  1634. ##        elif sysplatform=='linux2':
  1635. ##            adeptout = 'Linux is not supported, yet.\n'
  1636. ##            raise ADEPTError(adeptout)
  1637. ##            self.osuseragent = 'Linux i686'
  1638. ##            self.get_macaddress = self.get_linux_macaddress            
  1639. ##            self.fo_sethwids = self.fo_linux_sethwids            
  1640. ##        else:
  1641. ##            adeptout = ''
  1642. ##            adeptout = adeptout + 'Due to various privacy violations from Apple\n'
  1643. ##            adeptout = adeptout + 'Mac OS X support is disabled by default.'
  1644. ##            raise ADEPTError(adeptout)            
  1645. ##        # add static arguments for http/https request
  1646. ##        self.fo_setattributes()
  1647. ##        # add hardware specific arguments for http/https request        
  1648. ##        self.fo_sethwids()
  1649. ##
  1650. ##        if 'Code' in self.urlresult:            
  1651. ##            if self.fileopen['Length'] == len(self.urlresult['Code']):
  1652. ##                self.decrypt_key = self.urlresult['Code']
  1653. ##            else:
  1654. ##                self.decrypt_key = self.urlresult['Code'].decode('hex')
  1655. ##        else:
  1656. ##            raise ADEPTError('Cannot find decryption key.')
  1657.         self.decrypt_key = 'stuff'
  1658.         self.genkey = self.genkey_v2
  1659.         self.decipher = self.decrypt_rc4
  1660.         self.ready = True
  1661.         return
  1662.  
  1663.     def initialize_fopn(self, docid, param):
  1664.         # debug mode processing
  1665.         global DEBUG_MODE
  1666.         global IVERSION
  1667.         if DEBUG_MODE == True:
  1668.             if os.access('.',os.W_OK) == True:
  1669.                 debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w')
  1670.             else:
  1671.                 raise ADEPTError('Cannot write debug file, current directory is not writable')
  1672.         self.is_printable = self.is_modifiable = self.is_extractable = True
  1673.         # get parameters and add it to the fo dictionary
  1674.         self.fileopen['Length'] = int_value(param.get('Length', 0)) / 8
  1675.         self.fileopen['VEID'] = str_value(param.get('VEID'))
  1676.         self.fileopen['BUILD'] = str_value(param.get('BUILD'))
  1677.         self.fileopen['SVID'] = str_value(param.get('SVID'))
  1678.         self.fileopen['DUID'] = str_value(param.get('DUID'))
  1679.         self.fileopen['V'] = int_value(param.get('V',2))        
  1680.         # crypt base
  1681.         rights = str_value(param.get('INFO')).decode('base64')
  1682.         rights = self.genkey_fileopeninfo(rights)
  1683.         if DEBUG_MODE == True: debugfile.write(rights + '\n\n')    
  1684.         for pair in rights.split(';'):
  1685.             try:
  1686.                 key, value = pair.split('=',1)
  1687.                 self.fileopen[key] = value
  1688.             # fix for some misconfigured INFO variables
  1689.             except:
  1690.                 pass
  1691.         kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \
  1692.                   'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'}
  1693.         for keys in  kattr:
  1694.             # fishing some misconfigured slashs out of it
  1695.             try:
  1696.                 self.fileopen[kattr[keys]] = urllib.quote(self.fileopen[keys],safe='')
  1697.                 del self.fileopen[keys]
  1698.             except:
  1699.                 continue
  1700.         # differentiate OS types
  1701.         sysplatform = sys.platform
  1702.         # if ostype is Windows
  1703.         if sysplatform=='win32':
  1704.             self.osuseragent = 'Windows NT 6.0'
  1705.             self.get_macaddress = self.get_win_macaddress
  1706.             self.fo_sethwids = self.fo_win_sethwids
  1707.             self.BrowserCookie = WinBrowserCookie
  1708.         elif sysplatform=='linux2':
  1709.             adeptout = 'Linux is not supported, yet.\n'
  1710.             raise ADEPTError(adeptout)
  1711.             self.osuseragent = 'Linux i686'
  1712.             self.get_macaddress = self.get_linux_macaddress            
  1713.             self.fo_sethwids = self.fo_linux_sethwids            
  1714.         else:
  1715.             adeptout = ''
  1716.             adeptout = adeptout + 'Mac OS X is not supported, yet.'
  1717.             adeptout = adeptout + 'Read the blogs FAQs for more information'
  1718.             raise ADEPTError(adeptout)            
  1719.         # add static arguments for http/https request
  1720.         self.fo_setattributes()
  1721.         # add hardware specific arguments for http/https request        
  1722.         self.fo_sethwids()
  1723.         #if DEBUG_MODE == True: debugfile.write(self.fileopen)
  1724.         if 'UURL' in self.fileopen:
  1725.             buildurl = self.fileopen['UURL']
  1726.         else:
  1727.             buildurl = self.fileopen['PURL']
  1728.         # fix for bad DPRM structure
  1729.         if self.fileopen['DPRM'][0] != r'/':
  1730.             self.fileopen['DPRM'] = r'/' + self.fileopen['DPRM']
  1731.         # genius fix for bad server urls (IMHO)
  1732.         if '?' in self.fileopen['DPRM']:
  1733.             buildurl = buildurl + self.fileopen['DPRM'] + '&'
  1734.         else:
  1735.             buildurl = buildurl + self.fileopen['DPRM'] + '?'            
  1736.  
  1737.         # debug customization
  1738.         #self.fileopen['Machine'] = ''
  1739.         #self.fileopen['Disk'] = ''
  1740.  
  1741.  
  1742.         surl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\
  1743.                  'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'OSName', 'OSData', 'Language',\
  1744.                  'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\
  1745.                  'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk',\
  1746.                  'FormHFT',\
  1747.                  'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\
  1748.                  'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\
  1749.                  'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\
  1750.                  'FSName',  'FowpKbd', 'OSBuild',\
  1751.                   'RequestSchema')
  1752.        
  1753.         #settings request and special modes
  1754.         if 'EVER' in self.fileopen and float(self.fileopen['EVER']) < 3.8:
  1755.             self.fileopen['Mode'] = 'ICx'
  1756.        
  1757.         origurl = buildurl
  1758.         buildurl = buildurl + 'Request=Setting'        
  1759.         for keys in surl:
  1760.             try:
  1761.                 buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys]
  1762.             except:
  1763.                 continue
  1764.         if DEBUG_MODE == True: debugfile.write( 'settings url:\n')
  1765.         if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n')
  1766.         # custom user agent identification?
  1767.         if 'AGEN' in self.fileopen:
  1768.             useragent = self.fileopen['AGEN']
  1769.             urllib.URLopener.version = useragent
  1770.         # attribute doesn't exist - take the default user agent
  1771.         else:
  1772.             urllib.URLopener.version = self.osuseragent
  1773.         # try to open the url
  1774.         try:
  1775.             u = urllib.urlopen(buildurl)
  1776.             u.geturl()
  1777.             result = u.read()
  1778.         except:
  1779.             raise ADEPTError('No internet connection or a blocking firewall!')
  1780. ##        finally:
  1781. ##            u.close()
  1782.         # getting rid of the line feed
  1783.         if DEBUG_MODE == True: debugfile.write('Settings'+'\n')
  1784.         if DEBUG_MODE == True: debugfile.write(result+'\n\n')
  1785.         #get rid of unnecessary characters
  1786.         result = result.rstrip('\n')
  1787.         result = result.rstrip(chr(13))
  1788.         result = result.lstrip('\n')
  1789.         result = result.lstrip(chr(13))
  1790.         self.surlresult = {}
  1791.         for pair in result.split('&'):
  1792.             try:
  1793.                 key, value = pair.split('=',1)
  1794.                 # fix for bad server response
  1795.                 if key not in self.surlresult:
  1796.                     self.surlresult[key] = value
  1797.             except:
  1798.                 pass
  1799.         if 'RequestSchema' in self.surlresult:
  1800.             self.fileopen['RequestSchema'] = self.surlresult['RequestSchema']
  1801.         if 'ServerSessionData' in self.surlresult:
  1802.             self.fileopen['ServerSessionData'] = self.surlresult['ServerSessionData']
  1803.         if 'SetScope' in self.surlresult:
  1804.             self.fileopen['RequestSchema'] = self.surlresult['SetScope']            
  1805.         #print self.surlresult
  1806.         if 'RetVal' in self.surlresult and 'SEMO' not in self.fileopen and(('Reason' in self.surlresult and \
  1807.            self.surlresult['Reason'] == 'AskUnp') or ('SetTarget' in self.surlresult and\
  1808.                                                self.surlresult['SetTarget'] == 'UnpDlg')):
  1809.             # get user and password dialog
  1810.             try:
  1811.                 self.gen_pw_dialog(self.surlresult['UnpUiName'], self.surlresult['UnpUiPass'],\
  1812.                                    self.surlresult['UnpUiTitle'], self.surlresult['UnpUiOk'],\
  1813.                                    self.surlresult['UnpUiSunk'], self.surlresult['UnpUiComm'])
  1814.             except:
  1815.                 self.gen_pw_dialog()
  1816.            
  1817.         # the fileopen check might not be always right because of strange server responses    
  1818.         if 'SEMO' in self.fileopen and (self.fileopen['SEMO'] == '1'\
  1819.             or self.fileopen['SEMO'] == '2') and ('CSES' in self.fileopen and\
  1820.                                                   self.fileopen['CSES'] != 'fileopen'):
  1821.             # get the url name for the cookie(s)
  1822.             if 'CURL' in self.fileopen:
  1823.                 self.surl = self.fileopen['CURL']
  1824.             if 'CSES' in self.fileopen:
  1825.                 self.cses = self.fileopen['CSES']
  1826.             elif 'PHOS' in self.fileopen:
  1827.                 self.surl = self.fileopen['PHOS']
  1828.             elif 'LHOS' in self.fileopen:
  1829.                 self.surl = self.fileopen['LHOS']
  1830.             else:
  1831.                 raise ADEPTError('unknown Cookie name.\n Check ineptpdf forum for further assistance')
  1832.             self.pwfieldreq = 1
  1833.             # session cookie processing
  1834.             if self.fileopen['SEMO'] == '1':
  1835.                 cookies = self.BrowserCookie()
  1836.                 #print self.cses
  1837.                 #print self.surl
  1838.                 csession = cookies.getcookie(self.cses,self.surl)
  1839.                 if csession != None:
  1840.                     self.fileopen['Session'] = csession
  1841.                     self.gui = False
  1842.                 # fallback
  1843.                 else:
  1844.                     self.pwtk = Tkinter.Tk()
  1845.                     self.pwtk.title('Ineptpdf8')
  1846.                     self.pwtk.minsize(150, 0)
  1847.                     infotxt1 = 'Get the session cookie key manually (Firefox step-by-step:\n'+\
  1848.                                'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\
  1849.                                '-> Search for a cookie from ' + self.surl +' with the\n'+\
  1850.                                'name ' + self.cses +' and copy paste the content field in the\n'+\
  1851.                                'Session Content field. Remove possible spaces or new lines at the '+\
  1852.                                'end\n (cursor must be blinking right behind the last character)'
  1853.                     self.label0 = Tkinter.Label(self.pwtk, text=infotxt1)
  1854.                     self.label0.pack()
  1855.                     self.label1 = Tkinter.Label(self.pwtk, text="Session Content")
  1856.                     self.pwfieldreq = 0
  1857.                     self.gui = True
  1858.             # user cookie processing                                    
  1859.             elif self.fileopen['SEMO'] == '2':
  1860.                 cookies = self.BrowserCookie()
  1861.                 #print self.cses
  1862.                 #print self.surl
  1863.                 name = cookies.getcookie('name',self.surl)
  1864.                 passw = cookies.getcookie('pass',self.surl)                    
  1865.                 if name != None or passw != None:
  1866.                     self.fileopen['UserName'] = urllib.quote(name)
  1867.                     self.fileopen['UserPass'] = urllib.quote(passw)
  1868.                     self.gui = False
  1869.                 # fallback
  1870.                 else:
  1871.                     self.pwtk = Tkinter.Tk()
  1872.                     self.pwtk.title('Ineptpdf8')
  1873.                     self.pwtk.minsize(150, 0)
  1874.                     self.label1 = Tkinter.Label(self.pwtk, text="Username")
  1875.                     infotxt1 = 'Get the user cookie keys manually (Firefox step-by-step:\n'+\
  1876.                                'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\
  1877.                                '-> Search for cookies from ' + self.surl +' with the\n'+\
  1878.                                'name name in the user field and copy paste the content field in the\n'+\
  1879.                                'username field. Do the same with the name pass in the password field).'
  1880.                     self.label0 = Tkinter.Label(self.pwtk, text=infotxt1)
  1881.                     self.label0.pack()                                      
  1882.                     self.pwfieldreq = 1
  1883.                     self.gui = True
  1884. ##            else:
  1885. ##                self.pwtk = Tkinter.Tk()
  1886. ##                self.pwtk.title('Ineptpdf8')
  1887. ##                self.pwtk.minsize(150, 0)
  1888. ##                self.pwfieldreq = 0
  1889. ##                self.label1 = Tkinter.Label(self.pwtk, text="Username")
  1890. ##                self.pwfieldreq = 1
  1891. ##                self.gui = True
  1892.             if self.gui == True:
  1893.                 self.un_entry = Tkinter.Entry(self.pwtk)
  1894.                 # cursor here
  1895.                 self.un_entry.focus()
  1896.                 self.label2 = Tkinter.Label(self.pwtk, text="Password")
  1897.                 self.pw_entry = Tkinter.Entry(self.pwtk, show="*")
  1898.                 self.button = Tkinter.Button(self.pwtk, text='Go for it!', command=self.fo_save_values)
  1899.                 # widget layout, stack vertical
  1900.                 self.label1.pack()
  1901.                 self.un_entry.pack()
  1902.                 # create a password label and field
  1903.                 if self.pwfieldreq == 1:
  1904.                     self.label2.pack()
  1905.                     self.pw_entry.pack()
  1906.                 self.button.pack()
  1907.                 self.pwtk.update()            
  1908.                 # start the event loop
  1909.                 self.pwtk.mainloop()
  1910.          
  1911.         # original request
  1912.         # drive through tupple for building the permission url
  1913.         burl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\
  1914.                  'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'Language',\
  1915.                  'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\
  1916.                  'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk', 'User', 'SaUser', 'SaSID',\
  1917.                  # special security measures
  1918.                  'HostIsDomain', 'PhysHostname', 'LogiHostname', 'SaRefDomain',\
  1919.                  'FormHFT', 'UserName', 'UserPass', 'Session', \
  1920.                  'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\
  1921.                  'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\
  1922.                  'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\
  1923.                  'FSName', 'ServerSessionData', 'FowpKbd', 'OSBuild', \
  1924.                  'DocumentSessionData', 'RequestSchema')
  1925.        
  1926.         buildurl = origurl
  1927.         buildurl = buildurl + 'Request=DocPerm'
  1928.         for keys in burl:
  1929.             try:
  1930.                 buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys]
  1931.             except:
  1932.                 continue
  1933.         if DEBUG_MODE == True: debugfile.write('1st url:'+'\n')
  1934.         if DEBUG_MODE == True: debugfile.write(buildurl+'\n\n')
  1935.         # custom user agent identification?
  1936.         if 'AGEN' in self.fileopen:
  1937.             useragent = self.fileopen['AGEN']
  1938.             urllib.URLopener.version = useragent
  1939.         # attribute doesn't exist - take the default user agent
  1940.         else:
  1941.             urllib.URLopener.version = self.osuseragent
  1942.         # try to open the url
  1943.         try:
  1944.             u = urllib.urlopen(buildurl)
  1945.             u.geturl()
  1946.             result = u.read()
  1947.         except:
  1948.             raise ADEPTError('No internet connection or a blocking firewall!')
  1949. ##        finally:
  1950. ##            u.close()
  1951.         # getting rid of the line feed
  1952.         if DEBUG_MODE == True: debugfile.write('1st preresult'+'\n')
  1953.         if DEBUG_MODE == True: debugfile.write(result+'\n\n')
  1954.         #get rid of unnecessary characters
  1955.         result = result.rstrip('\n')
  1956.         result = result.rstrip(chr(13))
  1957.         result = result.lstrip('\n')
  1958.         result = result.lstrip(chr(13))
  1959.         self.urlresult = {}
  1960.         for pair in result.split('&'):
  1961.             try:
  1962.                 key, value = pair.split('=',1)
  1963.                 self.urlresult[key] = value
  1964.             except:
  1965.                 pass
  1966. ##        if 'RequestSchema' in self.surlresult:
  1967. ##            self.fileopen['RequestSchema'] = self.urlresult['RequestSchema']
  1968.          #self.urlresult
  1969.         #result[0:8] == 'RetVal=1') or (result[0:8] == 'RetVal=2'):
  1970.         if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \
  1971.                                             self.urlresult['RetVal'] != '2' and \
  1972.                                             self.urlresult['RetVal'] != 'Update' and \
  1973.                                             self.urlresult['RetVal'] != 'Answer')):
  1974.            
  1975.             if ('Reason' in self.urlresult and (self.urlresult['Reason'] == 'BadUserPwd'\
  1976.                 or self.urlresult['Reason'] == 'AskUnp')) or ('SwitchTo' in self.urlresult\
  1977.                     and (self.urlresult['SwitchTo'] == 'Dialog')):
  1978.                 if 'ServerSessionData' in self.urlresult:
  1979.                     self.fileopen['ServerSessionData'] = self.urlresult['ServerSessionData']
  1980.                 if 'DocumentSessionData' in self.urlresult:
  1981.                     self.fileopen['DocumentSessionData'] = self.urlresult['DocumentSessionData']        
  1982.                 buildurl = origurl
  1983.                 buildurl = buildurl + 'Request=DocPerm'
  1984.                 self.gen_pw_dialog()
  1985.                 # password not found - fallback
  1986.                 for keys in burl:
  1987.                     try:
  1988.                         buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys]
  1989.                     except:
  1990.                         continue
  1991.                 if DEBUG_MODE == True: debugfile.write( '2ndurl:')
  1992.                 if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n')
  1993.                 # try to open the url
  1994.                 try:
  1995.                     u = urllib.urlopen(buildurl)
  1996.                     u.geturl()
  1997.                     result = u.read()
  1998.                 except:
  1999.                     raise ADEPTError('No internet connection or a blocking firewall!')
  2000.                 # getting rid of the line feed
  2001.                 if DEBUG_MODE == True: debugfile.write( '2nd preresult')
  2002.                 if DEBUG_MODE == True: debugfile.write( result+'\n\n')
  2003.                 #get rid of unnecessary characters
  2004.                 result = result.rstrip('\n')
  2005.                 result = result.rstrip(chr(13))
  2006.                 result = result.lstrip('\n')
  2007.                 result = result.lstrip(chr(13))
  2008.                 self.urlresult = {}
  2009.                 for pair in result.split('&'):
  2010.                     try:
  2011.                         key, value = pair.split('=',1)
  2012.                         self.urlresult[key] = value
  2013.                     except:
  2014.                         pass
  2015.         # did it work?
  2016.         if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \
  2017.                                                     self.urlresult['RetVal'] != '2' and
  2018.                                                     self.urlresult['RetVal'] != 'Update' and \
  2019.                                                     self.urlresult['RetVal'] != 'Answer')):
  2020.             raise ADEPTError('Decryption was not successfull.\nReason: ' + self.urlresult['Error'])
  2021.         # fix for non-standard-conform fileopen pdfs
  2022. ##        if self.fileopen['Length'] != 5 and self.fileopen['Length'] != 16:
  2023. ##            if self.fileopen['V'] == 1:
  2024. ##                self.fileopen['Length'] = 5
  2025. ##            else:
  2026. ##                self.fileopen['Length'] = 16
  2027.         # patch for malformed pdfs
  2028.         #print len(self.urlresult['Code'])
  2029.         #print self.urlresult['Code'].encode('hex')
  2030.         if 'code' in self.urlresult:
  2031.             self.urlresult['Code'] = self.urlresult['code']
  2032.         if 'Code' in self.urlresult:            
  2033.             if len(self.urlresult['Code']) == 5 or len(self.urlresult['Code']) == 16:
  2034.                 self.decrypt_key = self.urlresult['Code']
  2035.             else:
  2036.                 self.decrypt_key = self.urlresult['Code'].decode('hex')
  2037.         else:
  2038.             raise ADEPTError('Cannot find decryption key.')
  2039.         self.genkey = self.genkey_v2
  2040.         self.decipher = self.decrypt_rc4
  2041.         self.ready = True
  2042.         return
  2043.    
  2044.     def gen_pw_dialog(self, Username='Username', Password='Password', Title='User/Password Authentication',\
  2045.                       OK='Proceed', Text1='Authorization', Text2='Enter Required Data'):
  2046.         self.pwtk = Tkinter.Tk()
  2047.         self.pwtk.title(Title)
  2048.         self.pwtk.minsize(150, 0)
  2049.         self.label1 = Tkinter.Label(self.pwtk, text=Text1)
  2050.         self.label2 = Tkinter.Label(self.pwtk, text=Text2)
  2051.         self.label3 = Tkinter.Label(self.pwtk, text=Username)
  2052.         self.pwfieldreq = 1        
  2053.         self.gui = True
  2054.         self.un_entry = Tkinter.Entry(self.pwtk)
  2055.         # cursor here
  2056.         self.un_entry.focus()
  2057.         self.label4 = Tkinter.Label(self.pwtk, text=Password)
  2058.         self.pw_entry = Tkinter.Entry(self.pwtk, show="*")
  2059.         self.button = Tkinter.Button(self.pwtk, text=OK, command=self.fo_save_values)
  2060.         # widget layout, stack vertical
  2061.         self.label1.pack()
  2062.         self.label2.pack()
  2063.         self.label3.pack()        
  2064.         self.un_entry.pack()
  2065.         # create a password label and field
  2066.         if self.pwfieldreq == 1:
  2067.             self.label4.pack()
  2068.             self.pw_entry.pack()
  2069.         self.button.pack()
  2070.         self.pwtk.update()            
  2071.         # start the event loop
  2072.         self.pwtk.mainloop()
  2073.        
  2074.     # genkey functions
  2075.     def genkey_v2(self, objid, genno):
  2076.         objid = struct.pack('<L', objid)[:3]
  2077.         genno = struct.pack('<L', genno)[:2]
  2078.         key = self.decrypt_key + objid + genno
  2079.         hash = hashlib.md5(key)
  2080.         key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
  2081.         return key
  2082.    
  2083.     def genkey_v3(self, objid, genno):
  2084.         objid = struct.pack('<L', objid ^ 0x3569ac)
  2085.         genno = struct.pack('<L', genno ^ 0xca96)
  2086.         key = self.decrypt_key
  2087.         key += objid[0] + genno[0] + objid[1] + genno[1] + objid[2] + 'sAlT'
  2088.         hash = hashlib.md5(key)
  2089.         key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
  2090.         return key
  2091.  
  2092.     # aes v2 and v4 algorithm
  2093.     def genkey_v4(self, objid, genno):
  2094.         objid = struct.pack('<L', objid)[:3]
  2095.         genno = struct.pack('<L', genno)[:2]
  2096.         key = self.decrypt_key + objid + genno + 'sAlT'
  2097.         hash = hashlib.md5(key)
  2098.         key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
  2099.         return key
  2100.  
  2101.     def decrypt_aes(self, objid, genno, data):
  2102.         key = self.genkey(objid, genno)
  2103.         ivector = data[:16]
  2104.         data = data[16:]
  2105.         plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
  2106.         # remove pkcs#5 aes padding
  2107.         cutter = -1 * ord(plaintext[-1])
  2108.         #print cutter
  2109.         plaintext = plaintext[:cutter]
  2110.         return plaintext
  2111.  
  2112.     def decrypt_aes256(self, objid, genno, data):
  2113.         key = self.genkey(objid, genno)
  2114.         ivector = data[:16]
  2115.         data = data[16:]
  2116.         plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
  2117.         # remove pkcs#5 aes padding
  2118.         cutter = -1 * ord(plaintext[-1])
  2119.         #print cutter
  2120.         plaintext = plaintext[:cutter]
  2121.         return plaintext
  2122.    
  2123.     def decrypt_rc4(self, objid, genno, data):
  2124.         key = self.genkey(objid, genno)
  2125.         return ARC4.new(key).decrypt(data)
  2126.  
  2127.     # fileopen user/password dialog    
  2128.     def fo_save_values(self):
  2129.         getout = 0
  2130.         username = 0
  2131.         password = 0
  2132.         username = self.un_entry.get()
  2133.         if self.pwfieldreq == 1:        
  2134.             password = self.pw_entry.get()
  2135.         un_length = len(username)
  2136.         if self.pwfieldreq == 1:                
  2137.             pw_length = len(password)
  2138.         if (un_length != 0):
  2139.             if self.pwfieldreq == 1:
  2140.                 if (pw_length != 0):
  2141.                     getout = 1
  2142.             else:
  2143.                 getout = 1
  2144.         if getout == 1:
  2145.             if 'SEMO' in self.fileopen and self.fileopen['SEMO'] == '1':
  2146.                 self.fileopen['Session'] = urllib.quote(username)
  2147.             else:
  2148.                 self.fileopen['UserName'] = urllib.quote(username)
  2149.             if self.pwfieldreq == 1:
  2150.                 self.fileopen['UserPass'] = urllib.quote(password)
  2151.             else:
  2152.                 pass
  2153.                 #self.fileopen['UserPass'] = self.fileopen['UserName']
  2154.             # doesn't always close the password window, who
  2155.             # knows why (Tkinter secrets ;=))
  2156.             self.pwtk.quit()
  2157.    
  2158.    
  2159.     def fo_setattributes(self):
  2160.         self.fileopen['Request']='DocPerm'
  2161.         self.fileopen['Mode']='CNR'
  2162.         self.fileopen['DocStrFmt']='ASCII'
  2163.         self.fileopen['Language']='ENU'
  2164.         self.fileopen['LngLCID']='ENU'
  2165.         self.fileopen['LngRFC1766']='en'
  2166.         self.fileopen['LngISO4Char']='en-us'
  2167.         self.fileopen['ProdVer']='1.8.7.9'
  2168.         self.fileopen['FormHFT']='Yes'
  2169.         self.fileopen['SelServer']='Yes'
  2170.         self.fileopen['AcroCanEdit']='Yes'
  2171.         self.fileopen['AcroPrefIDib']='Yes'
  2172.         self.fileopen['InBrowser']='Unk'
  2173.         self.fileopen['CliAppName']=''
  2174.         self.fileopen['DocIsLocal']='Yes'
  2175.         self.fileopen['FowpKbd']='Yes'
  2176.         self.fileopen['RequestSchema']='Default'
  2177.        
  2178.     # get nic mac address
  2179.     def get_linux_macaddress(self):
  2180.         try:
  2181.             for line in os.popen("/sbin/ifconfig"):
  2182.                 if line.find('Ether') > -1:
  2183.                     mac = line.split()[4]
  2184.                     break
  2185.             return mac.replace(':','')
  2186.         except:
  2187.             raise ADEPTError('Cannot find MAC address. Get forum help.')
  2188.  
  2189.     def get_win_macaddress(self):
  2190.         try:
  2191.             gasize = c_ulong(5000)
  2192.             p = create_string_buffer(5000)
  2193.             GetAdaptersInfo = windll.iphlpapi.GetAdaptersInfo
  2194.             GetAdaptersInfo(byref(p),byref(gasize))
  2195.             return p[0x194:0x19a].encode('hex')
  2196.         except:
  2197.             raise ADEPTError('Cannot find MAC address. Get forum help.')
  2198.        
  2199.     # custom conversion 5 bytes to 8 chars method
  2200.     def fo_convert5to8(self, edisk):
  2201.         # byte to number/char mapping table
  2202.         darray=[0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,\
  2203.                 0x46,0x47,0x48,0x4A,0x4B,0x4C,0x4D,0x4E,0x50,0x51,0x52,0x53,0x54,\
  2204.                 0x55,0x56,0x57,0x58,0x59,0x5A]
  2205.         pdid = struct.pack('<I', int(edisk[0:4].encode("hex"),16))
  2206.         pdid = int(pdid.encode("hex"),16)
  2207.         outputhw = ''
  2208.         # disk id processing
  2209.         for i in range(0,6):
  2210.             index = pdid & 0x1f
  2211.             # shift the disk id 5 bits to the right
  2212.             pdid = pdid >> 5
  2213.             outputhw = outputhw + chr(darray[index])
  2214.         pdid = (ord(edisk[4]) << 2)|pdid
  2215.         # get the last 2 bits from the hwid + low part of the cpuid
  2216.         for i in range(0,2):
  2217.             index = pdid & 0x1f
  2218.             # shift the disk id 5 bits to the right
  2219.             pdid = pdid >> 5
  2220.             outputhw = outputhw + chr(darray[index])
  2221.         return outputhw
  2222.  
  2223.     # Linux processing
  2224.     def fo_linux_sethwids(self):
  2225.         # linux specific attributes
  2226.         self.fileopen['OSType']='Linux'
  2227.         self.fileopen['AcroProduct']='AcroReader'
  2228.         self.fileopen['AcroReader']='Yes'
  2229.         self.fileopen['AcroVersion']='9.101'
  2230.         self.fileopen['FSName']='ext3'    
  2231.         self.fileopen['Build']='878'
  2232.         self.fileopen['ProdVer']='1.8.5.1'
  2233.         self.fileopen['OSBuild']='2.6.33'        
  2234.         # write hardware keys
  2235.         hwkey = 0
  2236.         pmac = self.get_macaddress().decode("hex");
  2237.         self.fileopen['Disk'] = self.fo_convert5to8(pmac[1:])
  2238.         # get primary used default mac address
  2239.         self.fileopen['Machine'] = self.fo_convert5to8(pmac[1:])
  2240.         # get uuid
  2241.         # check for reversed offline handler 6AB83F4Ah + AFh 6AB83F4Ah
  2242.         if 'LILA' in self.fileopen:
  2243.             pass
  2244.         if 'Ident4ID' in self.fileopen:
  2245.             self.fileopen['User'] = getpass.getuser()
  2246.             self.fileopen['SaUser'] = getpass.getuser()
  2247.             try:
  2248.                 cuser = winreg.HKEY_CURRENT_USER
  2249.                 FOW3_UUID = 'Software\\Fileopen'
  2250.                 regkey = winreg.OpenKey(cuser, FOW3_UUID)
  2251.                 userkey = winreg.QueryValueEx(regkey, 'Fowp3Uuid')[0]
  2252. #                if self.genkey_cryptmach(userkey)[0:4] != 'ec20':
  2253.                 self.fileopen['Uuid'] = self.genkey_cryptmach(userkey)[4:]
  2254. ##                elif self.genkey_cryptmach(userkey)[0:4] != 'ec20':
  2255. ##                    self.fileopen['Uuid'] = self.genkey_cryptmach(userkey,1)[4:]
  2256. ##                else:
  2257.             except:
  2258.                 raise ADEPTError('Cannot find FowP3Uuid file - reason might be Adobe (Reader) X.'\
  2259.                                  'Read the FAQs for more information how to solve the problem.')
  2260.         else:
  2261.             self.fileopen['Uuid'] = str(uuid.uuid1())
  2262.         # get time stamp
  2263.         self.fileopen['Stamp'] = str(time.time())[:-3]
  2264.         # get fileopen input pdf name + path
  2265.         self.fileopen['DocPathUrl'] = 'file%3a%2f%2f%2f'\
  2266.                                       + urllib.quote(os.path.normpath(INPUTFILEPATH))
  2267.         # clear the link
  2268.         #INPUTFILEPATH = ''
  2269. ##        # get volume name (urllib quote necessairy?) urllib.quote(
  2270. ##        self.fileopen['VolName'] = win32api.GetVolumeInformation("C:\\")[0]
  2271. ##        # get volume serial number
  2272. ##        self.fileopen['VolSN'] = str(win32api.GetVolumeInformation("C:\\")[1])
  2273.         return
  2274.  
  2275.     # Windows processing
  2276.     def fo_win_sethwids(self):
  2277.         # Windows specific attributes        
  2278.         self.fileopen['OSType']='Windows'
  2279.         self.fileopen['OSName']='Vista'
  2280.         self.fileopen['OSData']='Service%20Pack%204'        
  2281.         self.fileopen['AcroProduct']='Reader'
  2282.         self.fileopen['AcroReader']='Yes'    
  2283.         self.fileopen['OSBuild']='7600'
  2284.         self.fileopen['AcroVersion']='9.1024'
  2285.         self.fileopen['Build']='879'        
  2286.         # write hardware keys
  2287.         hwkey = 0
  2288.         # get the os type and save it in ostype
  2289.         try:
  2290.             import win32api
  2291.             import win32security
  2292.             import win32file
  2293.             import _winreg as winreg                
  2294.         except:
  2295.             raise ADEPTError('PyWin Extension (Win32API module) needed.\n'+\
  2296.                              'Download from http://sourceforge.net/projects/pywin32/files/ ')
  2297.         try:
  2298.             v0 = win32api.GetVolumeInformation('C:\\')
  2299.             v1 = win32api.GetSystemInfo()[6]
  2300.             # fix for possible negative integer (Python problem)
  2301.             volserial = v0[1] & 0xffffffff
  2302.             lowcpu = v1 & 255
  2303.             highcpu = (v1 >> 8) & 255
  2304.             # changed to int
  2305.             volserial = struct.pack('<I', int(volserial))
  2306.             lowcpu   = struct.pack('B', lowcpu)
  2307.             highcpu = struct.pack('B', highcpu)
  2308.             encrypteddisk = volserial + lowcpu + highcpu
  2309.             self.fileopen['Disk'] = self.fo_convert5to8(encrypteddisk)            
  2310.         except:
  2311.             # no c system drive available empty disk attribute
  2312.             self.fileopen['Disk'] = ''          
  2313.         # get primary used default mac address
  2314.         pmac = self.get_macaddress().decode("hex");
  2315.         self.fileopen['Machine'] = self.fo_convert5to8(pmac[1:])
  2316.         if 'LIFF' in self.fileopen:
  2317.             if 'Yes' in self.fileopen['LIFF']:
  2318.                 hostname = socket.gethostname()
  2319.                 self.fileopen['HostIsDomain']='Yes'
  2320.                 if '1' in self.fileopen['LIFF']:
  2321.                     self.fileopen['PhysHostname']= hostname
  2322.                     self.fileopen['LogiHostname']= hostname
  2323.                     self.fileopen['SaRefDomain']= hostname
  2324.         # default users
  2325.         self.user = win32api.GetUserName().lower()
  2326.         self.sauser = win32api.GetUserName()                      
  2327.         # get uuid
  2328.         # check for reversed offline handler
  2329.         if 'LILA' in self.fileopen and self.fileopen['LILA'] == 'Yes':
  2330. ##            self.fileopen['User'] = win32api.GetUserName().lower()
  2331. ##            self.fileopen['SaUser'] = win32api.GetUserName()
  2332.          
  2333.             # get sid / sasid
  2334.             try:
  2335.                 psid = win32security.LookupAccountName("",self.sauser)[0]
  2336.                 psid = win32security.ConvertSidToStringSid(psid)
  2337.                 self.fileopen['SaSID'] = psid
  2338.                 self.fileopen['User'] = urllib.quote(self.user)
  2339.                 self.fileopen['SaUser'] = urllib.quote(self.sauser)                
  2340.             # didn't work use a generic one
  2341.             except:
  2342.                 self.fileopen['SaSID'] = 'S-1-5-21-1380067357-584463869-1343024091-1000'
  2343.         #if 'Ident4d' in self.fileopen or 'LILA' in self.fileopen:
  2344.         # always calculate the right uuid
  2345.         userkey = []        
  2346.         try:
  2347.             cuser = winreg.HKEY_CURRENT_USER
  2348.             FOW3_UUID = 'Software\\Fileopen'
  2349.             regkey = winreg.OpenKey(cuser, FOW3_UUID)
  2350.             userkey.append(winreg.QueryValueEx(regkey, 'Fowp3Uuid')[0])
  2351.         except:
  2352.             pass
  2353.         try:
  2354.             fopath = os.environ['AppData']+'\\FileOpen\\'
  2355.             fofilename = 'Fowpmadi.txt'
  2356.             f = open(fopath+fofilename, 'rb')
  2357.             userkey.append(f.read()[0:40])
  2358.             f.close()
  2359.         except:
  2360.             pass
  2361.         if not userkey:
  2362.             raise ADEPTError('Cannot find FowP3Uuid in registry or file.\n'\
  2363.                                  +'Did Adobe (Reader) open the pdf file?')
  2364.         cresult = self.genkey_cryptmach(userkey)
  2365.         if cresult != False:
  2366.             self.fileopen['Uuid'] = cresult
  2367.         # kind of a long shot we'll see about it
  2368.         else:
  2369.             self.fileopen['Uuid'] = str(uuid.uuid1())
  2370. ##        else:
  2371. ##            self.fileopen['Uuid'] = str(uuid.uuid1())
  2372.         # get time stamp
  2373.         self.fileopen['Stamp'] = str(time.time())[:-3]
  2374.         # get fileopen input pdf name + path
  2375.         # print INPUTFILEPATH
  2376.         self.fileopen['DocPathUrl'] = 'file%3a%2f%2f%2f'\
  2377.                                       + urllib.quote(INPUTFILEPATH)
  2378.         # determine voltype
  2379.         voltype = ('Unknown', 'Invalid', 'Removable', 'Fixed', 'Remote', 'CDRom', 'RamDisk')
  2380.         dletter = os.path.splitdrive(INPUTFILEPATH)[0] + '\\'
  2381.         self.fileopen['VolType'] = voltype[win32file.GetDriveType(dletter)]        
  2382.         # get volume name (urllib quote necessairy?) urllib.quote(
  2383.         self.fileopen['VolName'] = urllib.quote(win32api.GetVolumeInformation(dletter)[0])
  2384.         # get volume serial number (fix for possible negative numbers)          
  2385.         self.fileopen['VolSN'] = str(win32api.GetVolumeInformation(dletter)[1])
  2386.         # no c volume so skip it
  2387.         self.fileopen['FSName'] = win32api.GetVolumeInformation(dletter)[4]
  2388.         # get previous mac address or disk handling
  2389.         userkey = []
  2390.         try:
  2391.             cuser = winreg.HKEY_CURRENT_USER
  2392.             FOW3_UUID = 'Software\\Fileopen'
  2393.             regkey = winreg.OpenKey(cuser, FOW3_UUID)
  2394.             userkey.append(winreg.QueryValueEx(regkey, 'Fowp3Madi')[0])
  2395.         except:
  2396.             pass
  2397.         try:
  2398.             fopath = os.environ['AppData']+'\\FileOpen\\'
  2399.             fofilename = 'Fowpmadi.txt'
  2400.             f = open(fopath+fofilename, 'rb')
  2401.             userkey.append(f.read()[40:])
  2402.             f.close()
  2403.         except:
  2404.             pass
  2405.         if not userkey:
  2406.             raise ADEPTError('Cannot find FowP3Madi in registry or file.\n'\
  2407.                              +'Did Adobe Reader open the pdf file?')
  2408.         cresult = self.genkey_cryptmach(userkey)
  2409.         if cresult != False:
  2410.             machdisk = self.genkey_cryptmach(userkey)
  2411.             machine = machdisk[:8]
  2412.             disk = machdisk[8:]
  2413.         # did not find the required information, false it
  2414.         else:
  2415.             machdisk = False
  2416.             machine = False
  2417.             disk = False
  2418.         if machine != self.fileopen['Machine'] and machdisk != False:
  2419.             self.fileopen['PrevMach'] = machine
  2420.         if disk != self.fileopen['Disk'] and machdisk != False:
  2421.             self.fileopen['PrevDisk'] = disk        
  2422.         return
  2423.  
  2424.     # decryption routine for the INFO area
  2425.     def genkey_fileopeninfo(self, data):
  2426.         input1 = struct.pack('L', 0xa4da49de)
  2427.         seed   = struct.pack('B', 0x82)
  2428.         key = input1[3] + input1[2] +input1[1] +input1[0] + seed
  2429.         hash = hashlib.md5()
  2430.         key = hash.update(key)
  2431.         spointer4 = struct.pack('<L', 0xec8d6c58)
  2432.         seed = struct.pack('B', 0x07)
  2433.         key = spointer4[3] + spointer4[2] + spointer4[1] + spointer4[0] + seed
  2434.         key = hash.update(key)
  2435.         md5 = hash.digest()
  2436.         key = md5[0:10]
  2437.         return ARC4.new(key).decrypt(data)
  2438.  
  2439.     def genkey_cryptmach(self, data):
  2440.         # nested subfunction
  2441.         def genkeysub(uname, mode=False):
  2442.             key_string = '37A4DA49DE82064939A60B1D8D7B5F0F8873B6D93E'.decode('hex')
  2443.             m = hashlib.md5()
  2444.             m.update(key_string[:3])
  2445.             m.update(uname[:13]) # max 13 characters 13 - sizeof(username)
  2446.             if (13 - len(uname)) > 0 and mode == True:
  2447.                 m.update(key_string[:(13-len(uname))])
  2448.             md5sum = m.digest()[0:16]
  2449.             # print md5sum.encode('hex')
  2450.             # normal ident4id calculation
  2451.             retval = []
  2452.             for sdata in data:
  2453.                 retval.append(ARC4.new(md5sum).decrypt(sdata))
  2454.             for rval in retval:
  2455.                 if rval[:4] == 'ec20':
  2456.                     return rval[4:]
  2457.             return False
  2458.         # start normal execution    
  2459.         # list for username variants
  2460.         unamevars = []
  2461.         # fill username variants list
  2462.         unamevars.append(self.user)
  2463.         unamevars.append(self.user + chr(0))
  2464.         unamevars.append(self.user.lower())
  2465.         unamevars.append(self.user.lower() + chr(0))
  2466.         unamevars.append(self.user.upper())
  2467.         unamevars.append(self.user.upper() + chr(0))
  2468.         # go through it
  2469.         for uname in unamevars:
  2470.             result = genkeysub(uname, True)
  2471.             if result != False:
  2472.               return result            
  2473.             result = genkeysub(uname)
  2474.             if result != False:
  2475.               return result
  2476.         # didn't find it, return false
  2477.         return False
  2478. ##        raise ADEPTError('Unsupported Ident4D Decryption,\n'+\
  2479. ##                             'report the bug to the ineptpdf script forum')                
  2480.                
  2481.     KEYWORD_OBJ = PSKeywordTable.intern('obj')
  2482.    
  2483.     def getobj(self, objid):
  2484.         if not self.ready:
  2485.             raise PDFException('PDFDocument not initialized')
  2486.         #assert self.xrefs
  2487.         if objid in self.objs:
  2488.             genno = 0
  2489.             obj = self.objs[objid]
  2490.         else:
  2491.             for xref in self.xrefs:
  2492.                 try:
  2493.                     (stmid, index) = xref.getpos(objid)
  2494.                     break
  2495.                 except KeyError:
  2496.                     pass
  2497.             else:
  2498.                 #if STRICT:
  2499.                 #    raise PDFSyntaxError('Cannot locate objid=%r' % objid)
  2500.                 return None
  2501.             if stmid:
  2502.                 if gen_xref_stm:
  2503.                     return PDFObjStmRef(objid, stmid, index)
  2504. # Stuff from pdfminer: extract objects from object stream
  2505.                 stream = stream_value(self.getobj(stmid))
  2506.                 if stream.dic.get('Type') is not LITERAL_OBJSTM:
  2507.                     if STRICT:
  2508.                         raise PDFSyntaxError('Not a stream object: %r' % stream)
  2509.                 try:
  2510.                     n = stream.dic['N']
  2511.                 except KeyError:
  2512.                     if STRICT:
  2513.                         raise PDFSyntaxError('N is not defined: %r' % stream)
  2514.                     n = 0
  2515.  
  2516.                 if stmid in self.parsed_objs:
  2517.                     objs = self.parsed_objs[stmid]
  2518.                 else:
  2519.                     parser = PDFObjStrmParser(stream.get_data(), self)
  2520.                     objs = []
  2521.                     try:
  2522.                         while 1:
  2523.                             (_,obj) = parser.nextobject()
  2524.                             objs.append(obj)
  2525.                     except PSEOF:
  2526.                         pass
  2527.                     self.parsed_objs[stmid] = objs
  2528.                 genno = 0
  2529.                 i = n*2+index
  2530.                 try:
  2531.                     obj = objs[i]
  2532.                 except IndexError:
  2533.                     raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
  2534.                 if isinstance(obj, PDFStream):
  2535.                     obj.set_objid(objid, 0)
  2536. ###
  2537.             else:
  2538.                 self.parser.seek(index)
  2539.                 (_,objid1) = self.parser.nexttoken() # objid
  2540.                 (_,genno) = self.parser.nexttoken() # genno
  2541.                 #assert objid1 == objid, (objid, objid1)
  2542.                 (_,kwd) = self.parser.nexttoken()
  2543.         # #### hack around malformed pdf files
  2544.         #        assert objid1 == objid, (objid, objid1)
  2545. ##                if objid1 != objid:
  2546. ##                    x = []
  2547. ##                    while kwd is not self.KEYWORD_OBJ:
  2548. ##                        (_,kwd) = self.parser.nexttoken()
  2549. ##                        x.append(kwd)
  2550. ##                    if x:
  2551. ##                        objid1 = x[-2]
  2552. ##                        genno = x[-1]
  2553. ##                
  2554.                 if kwd is not self.KEYWORD_OBJ:
  2555.                     raise PDFSyntaxError(
  2556.                         'Invalid object spec: offset=%r' % index)
  2557.                 (_,obj) = self.parser.nextobject()
  2558.                 if isinstance(obj, PDFStream):
  2559.                     obj.set_objid(objid, genno)
  2560.                 if self.decipher:
  2561.                     obj = decipher_all(self.decipher, objid, genno, obj)
  2562.             self.objs[objid] = obj
  2563.         return obj
  2564.  
  2565. # helper class for cookie retrival
  2566. class WinBrowserCookie():
  2567.     def __init__(self):
  2568.         pass
  2569.     def getcookie(self, cname, chost):
  2570.         # check firefox db
  2571.         fprofile =  os.environ['AppData']+r'\Mozilla\Firefox'
  2572.         pinifile = 'profiles.ini'
  2573.         fini = os.path.normpath(fprofile + '\\' + pinifile)
  2574.         try:
  2575.             with open(fini,'r') as ffini:
  2576.                 firefoxini =  ffini.read()
  2577.         # Firefox not installed or on an USB stick
  2578.         except:
  2579.             return None
  2580.         for pair in firefoxini.split('\n'):
  2581.             try:
  2582.                 key, value = pair.split('=',1)
  2583.                 if key == 'Path':
  2584.                     fprofile = os.path.normpath(fprofile+'//'+value+'//'+'cookies.sqlite')
  2585.                     break
  2586.             # asdf
  2587.             except:
  2588.                 continue
  2589.         if os.path.isfile(fprofile):
  2590.             try:
  2591.                 con = sqlite3.connect(fprofile,1)
  2592.             except:
  2593.                 raise ADEPTError('Firefox Cookie data base locked. Close Firefox and try again')
  2594.             cur = con.cursor()
  2595.             try:            
  2596.                 cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost))
  2597.             except Exception:
  2598.                 raise ADEPTError('Firefox Cookie database is locked. Close Firefox and try again')
  2599.             try:
  2600.                 return cur.fetchone()[0]
  2601.             except Exception:
  2602.                 # sometimes is a dot in front of the host
  2603.                 chost = '.'+chost
  2604.                 cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost))
  2605.                 try:
  2606.                     return cur.fetchone()[0]
  2607.                 except:
  2608.                     return None
  2609.                
  2610. class PDFObjStmRef(object):
  2611.     maxindex = 0
  2612.     def __init__(self, objid, stmid, index):
  2613.         self.objid = objid
  2614.         self.stmid = stmid
  2615.         self.index = index
  2616.         if index > PDFObjStmRef.maxindex:
  2617.             PDFObjStmRef.maxindex = index
  2618.  
  2619.    
  2620. ##  PDFParser
  2621. ##
  2622. class PDFParser(PSStackParser):
  2623.  
  2624.     def __init__(self, doc, fp):
  2625.         PSStackParser.__init__(self, fp)
  2626.         self.doc = doc
  2627.         self.doc.set_parser(self)
  2628.         return
  2629.  
  2630.     def __repr__(self):
  2631.         return '<PDFParser>'
  2632.  
  2633.     KEYWORD_R = PSKeywordTable.intern('R')
  2634.     KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj')
  2635.     KEYWORD_STREAM = PSKeywordTable.intern('stream')
  2636.     KEYWORD_XREF = PSKeywordTable.intern('xref')
  2637.     KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
  2638.     def do_keyword(self, pos, token):
  2639.         if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
  2640.             self.add_results(*self.pop(1))
  2641.             return
  2642.         if token is self.KEYWORD_ENDOBJ:
  2643.             self.add_results(*self.pop(4))
  2644.             return
  2645.        
  2646.         if token is self.KEYWORD_R:
  2647.             # reference to indirect object
  2648.             try:
  2649.                 ((_,objid), (_,genno)) = self.pop(2)
  2650.                 (objid, genno) = (int(objid), int(genno))
  2651.                 obj = PDFObjRef(self.doc, objid, genno)
  2652.                 self.push((pos, obj))
  2653.             except PSSyntaxError:
  2654.                 pass
  2655.             return
  2656.            
  2657.         if token is self.KEYWORD_STREAM:
  2658.             # stream object
  2659.             ((_,dic),) = self.pop(1)
  2660.             dic = dict_value(dic)
  2661.             try:
  2662.                 objlen = int_value(dic['Length'])
  2663.             except KeyError:
  2664.                 if STRICT:
  2665.                     raise PDFSyntaxError('/Length is undefined: %r' % dic)
  2666.                 objlen = 0
  2667.             self.seek(pos)
  2668.             try:
  2669.                 (_, line) = self.nextline()  # 'stream'
  2670.             except PSEOF:
  2671.                 if STRICT:
  2672.                     raise PDFSyntaxError('Unexpected EOF')
  2673.                 return
  2674.             pos += len(line)
  2675.             self.fp.seek(pos)
  2676.             data = self.fp.read(objlen)
  2677.             self.seek(pos+objlen)
  2678.             while 1:
  2679.                 try:
  2680.                     (linepos, line) = self.nextline()
  2681.                 except PSEOF:
  2682.                     if STRICT:
  2683.                         raise PDFSyntaxError('Unexpected EOF')
  2684.                     break
  2685.                 if 'endstream' in line:
  2686.                     i = line.index('endstream')
  2687.                     objlen += i
  2688.                     data += line[:i]
  2689.                     break
  2690.                 objlen += len(line)
  2691.                 data += line
  2692.             self.seek(pos+objlen)
  2693.             obj = PDFStream(dic, data, self.doc.decipher)
  2694.             self.push((pos, obj))
  2695.             return
  2696.        
  2697.         # others
  2698.         self.push((pos, token))
  2699.         return
  2700.  
  2701.     def find_xref(self):
  2702.         # search the last xref table by scanning the file backwards.
  2703.         prev = None
  2704.         for line in self.revreadlines():
  2705.             line = line.strip()
  2706.             if line == 'startxref': break
  2707.             if line:
  2708.                 prev = line
  2709.         else:
  2710.             raise PDFNoValidXRef('Unexpected EOF')
  2711.         return int(prev)
  2712.  
  2713.     # read xref table
  2714.     def read_xref_from(self, start, xrefs):
  2715.         self.seek(start)
  2716.         self.reset()
  2717.         try:
  2718.             (pos, token) = self.nexttoken()
  2719.         except PSEOF:
  2720.             raise PDFNoValidXRef('Unexpected EOF')
  2721.         if isinstance(token, int):
  2722.             # XRefStream: PDF-1.5
  2723.             if GEN_XREF_STM == 1:
  2724.                 global gen_xref_stm
  2725.                 gen_xref_stm = True
  2726.             self.seek(pos)
  2727.             self.reset()
  2728.             xref = PDFXRefStream()
  2729.             xref.load(self)
  2730.         else:
  2731.             if token is not self.KEYWORD_XREF:
  2732.                 raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
  2733.                                      (pos, token))
  2734.             self.nextline()
  2735.             xref = PDFXRef()
  2736.             xref.load(self)
  2737.         xrefs.append(xref)
  2738.         trailer = xref.trailer
  2739.         if 'XRefStm' in trailer:
  2740.             pos = int_value(trailer['XRefStm'])
  2741.             self.read_xref_from(pos, xrefs)
  2742.         if 'Prev' in trailer:
  2743.             # find previous xref
  2744.             pos = int_value(trailer['Prev'])
  2745.             self.read_xref_from(pos, xrefs)
  2746.         return
  2747.        
  2748.     # read xref tables and trailers
  2749.     def read_xref(self):
  2750.         xrefs = []
  2751.         trailerpos = None
  2752.         try:
  2753.             pos = self.find_xref()
  2754.             self.read_xref_from(pos, xrefs)
  2755.         except PDFNoValidXRef:
  2756.             # fallback
  2757.             self.seek(0)
  2758.             pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
  2759.             offsets = {}
  2760.             xref = PDFXRef()
  2761.             while 1:
  2762.                 try:
  2763.                     (pos, line) = self.nextline()
  2764.                 except PSEOF:
  2765.                     break
  2766.                 if line.startswith('trailer'):
  2767.                     trailerpos = pos # remember last trailer
  2768.                 m = pat.match(line)
  2769.                 if not m: continue
  2770.                 (objid, genno) = m.groups()
  2771.                 offsets[int(objid)] = (0, pos)
  2772.             if not offsets: raise
  2773.             xref.offsets = offsets
  2774.             if trailerpos:
  2775.                 self.seek(trailerpos)
  2776.                 xref.load_trailer(self)
  2777.                 xrefs.append(xref)
  2778.         return xrefs
  2779.  
  2780. ##  PDFObjStrmParser
  2781. ##
  2782. class PDFObjStrmParser(PDFParser):
  2783.  
  2784.     def __init__(self, data, doc):
  2785.         PSStackParser.__init__(self, StringIO(data))
  2786.         self.doc = doc
  2787.         return
  2788.  
  2789.     def flush(self):
  2790.         self.add_results(*self.popall())
  2791.         return
  2792.  
  2793.     KEYWORD_R = KWD('R')
  2794.     def do_keyword(self, pos, token):
  2795.         if token is self.KEYWORD_R:
  2796.             # reference to indirect object
  2797.             try:
  2798.                 ((_,objid), (_,genno)) = self.pop(2)
  2799.                 (objid, genno) = (int(objid), int(genno))
  2800.                 obj = PDFObjRef(self.doc, objid, genno)
  2801.                 self.push((pos, obj))
  2802.             except PSSyntaxError:
  2803.                 pass
  2804.             return
  2805.         # others
  2806.         self.push((pos, token))
  2807.         return
  2808.  
  2809. ###
  2810. ### My own code, for which there is none else to blame
  2811.  
  2812. class PDFSerializer(object):
  2813.     def __init__(self, inf, keypath):
  2814.         global GEN_XREF_STM, gen_xref_stm
  2815.         gen_xref_stm = GEN_XREF_STM > 1
  2816.         self.version = inf.read(8)
  2817.         inf.seek(0)
  2818.         self.doc = doc = PDFDocument()
  2819.         parser = PDFParser(doc, inf)
  2820.         doc.initialize(keypath)
  2821.         self.objids = objids = set()
  2822.         for xref in reversed(doc.xrefs):
  2823.             trailer = xref.trailer
  2824.             for objid in xref.objids():
  2825.                 objids.add(objid)
  2826.         trailer = dict(trailer)
  2827.         trailer.pop('Prev', None)
  2828.         trailer.pop('XRefStm', None)
  2829.         if 'Encrypt' in trailer:
  2830.             objids.remove(trailer.pop('Encrypt').objid)
  2831.         self.trailer = trailer
  2832.  
  2833.     def dump(self, outf):
  2834.         self.outf = outf
  2835.         self.write(self.version)
  2836.         self.write('\n%\xe2\xe3\xcf\xd3\n')
  2837.         doc = self.doc
  2838.         objids = self.objids
  2839.         xrefs = {}
  2840.         maxobj = max(objids)
  2841.         trailer = dict(self.trailer)
  2842.         trailer['Size'] = maxobj + 1
  2843.         for objid in objids:
  2844.             obj = doc.getobj(objid)
  2845.             if isinstance(obj, PDFObjStmRef):
  2846.                 xrefs[objid] = obj
  2847.                 continue
  2848.             if obj is not None:
  2849.                 try:
  2850.                     genno = obj.genno
  2851.                 except AttributeError:
  2852.                     genno = 0
  2853.                 xrefs[objid] = (self.tell(), genno)
  2854.                 self.serialize_indirect(objid, obj)
  2855.         startxref = self.tell()
  2856.  
  2857.         if not gen_xref_stm:
  2858.             self.write('xref\n')
  2859.             self.write('0 %d\n' % (maxobj + 1,))
  2860.             for objid in xrange(0, maxobj + 1):
  2861.                 if objid in xrefs:
  2862.                     # force the genno to be 0
  2863.                     self.write("%010d 00000 n \n" % xrefs[objid][0])
  2864.                 else:
  2865.                     self.write("%010d %05d f \n" % (0, 65535))
  2866.            
  2867.             self.write('trailer\n')
  2868.             self.serialize_object(trailer)
  2869.             self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
  2870.  
  2871.         else: # Generate crossref stream.
  2872.  
  2873.             # Calculate size of entries
  2874.             maxoffset = max(startxref, maxobj)
  2875.             maxindex = PDFObjStmRef.maxindex
  2876.             fl2 = 2
  2877.             power = 65536
  2878.             while maxoffset >= power:
  2879.                 fl2 += 1
  2880.                 power *= 256
  2881.             fl3 = 1
  2882.             power = 256
  2883.             while maxindex >= power:
  2884.                 fl3 += 1
  2885.                 power *= 256
  2886.                    
  2887.             index = []
  2888.             first = None
  2889.             prev = None
  2890.             data = []
  2891.             # Put the xrefstream's reference in itself
  2892.             startxref = self.tell()
  2893.             maxobj += 1
  2894.             xrefs[maxobj] = (startxref, 0)
  2895.             for objid in sorted(xrefs):
  2896.                 if first is None:
  2897.                     first = objid
  2898.                 elif objid != prev + 1:
  2899.                     index.extend((first, prev - first + 1))
  2900.                     first = objid
  2901.                 prev = objid
  2902.                 objref = xrefs[objid]
  2903.                 if isinstance(objref, PDFObjStmRef):
  2904.                     f1 = 2
  2905.                     f2 = objref.stmid
  2906.                     f3 = objref.index
  2907.                 else:
  2908.                     f1 = 1
  2909.                     f2 = objref[0]
  2910.                     # we force all generation numbers to be 0
  2911.                     # f3 = objref[1]
  2912.                     f3 = 0
  2913.                
  2914.                 data.append(struct.pack('>B', f1))
  2915.                 data.append(struct.pack('>L', f2)[-fl2:])
  2916.                 data.append(struct.pack('>L', f3)[-fl3:])
  2917.             index.extend((first, prev - first + 1))
  2918.             data = zlib.compress(''.join(data))
  2919.             dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
  2920.                    'W': [1, fl2, fl3], 'Length': len(data),
  2921.                    'Filter': LITERALS_FLATE_DECODE[0],
  2922.                    'Root': trailer['Root'],}
  2923.             if 'Info' in trailer:
  2924.                 dic['Info'] = trailer['Info']
  2925.             xrefstm = PDFStream(dic, data)
  2926.             self.serialize_indirect(maxobj, xrefstm)
  2927.             self.write('startxref\n%d\n%%%%EOF' % startxref)
  2928.     def write(self, data):
  2929.         self.outf.write(data)
  2930.         self.last = data[-1:]
  2931.  
  2932.     def tell(self):
  2933.         return self.outf.tell()
  2934.  
  2935.     def escape_string(self, string):
  2936.         string = string.replace('\\', '\\\\')
  2937.         string = string.replace('\n', r'\n')
  2938.         string = string.replace('(', r'\(')
  2939.         string = string.replace(')', r'\)')
  2940.          # get rid of ciando id
  2941.         regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
  2942.         if regularexp.match(string): return ('http://www.ciando.com')
  2943.         return string
  2944.    
  2945.     def serialize_object(self, obj):
  2946.         if isinstance(obj, dict):
  2947.             # Correct malformed Mac OS resource forks for Stanza
  2948.             if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \
  2949.                    and isinstance(obj['Type'], int):
  2950.                 obj['Subtype'] = obj['Type']
  2951.                 del obj['Type']
  2952.             # end - hope this doesn't have bad effects
  2953.             self.write('<<')
  2954.             for key, val in obj.items():
  2955.                 self.write('/%s' % key)
  2956.                 self.serialize_object(val)
  2957.             self.write('>>')
  2958.         elif isinstance(obj, list):
  2959.             self.write('[')
  2960.             for val in obj:
  2961.                 self.serialize_object(val)
  2962.             self.write(']')
  2963.         elif isinstance(obj, str):
  2964.             self.write('(%s)' % self.escape_string(obj))
  2965.         elif isinstance(obj, bool):
  2966.             if self.last.isalnum():
  2967.                 self.write(' ')
  2968.             self.write(str(obj).lower())            
  2969.         elif isinstance(obj, (int, long, float)):
  2970.             if self.last.isalnum():
  2971.                 self.write(' ')
  2972.             self.write(str(obj))
  2973.         elif isinstance(obj, PDFObjRef):
  2974.             if self.last.isalnum():
  2975.                 self.write(' ')            
  2976.             self.write('%d %d R' % (obj.objid, 0))
  2977.         elif isinstance(obj, PDFStream):
  2978.             ### If we don't generate cross ref streams the object streams
  2979.             ### are no longer useful, as we have extracted all objects from
  2980.             ### them. Therefore leave them out from the output.
  2981.             if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
  2982.                     self.write('(deleted)')
  2983.             else:
  2984.                 data = obj.get_decdata()
  2985.                 self.serialize_object(obj.dic)
  2986.                 self.write('stream\n')
  2987.                 self.write(data)
  2988.                 self.write('\nendstream')
  2989.         else:
  2990.             data = str(obj)
  2991.             if data[0].isalnum() and self.last.isalnum():
  2992.                 self.write(' ')
  2993.             self.write(data)
  2994.    
  2995.     def serialize_indirect(self, objid, obj):
  2996.         self.write('%d 0 obj' % (objid,))
  2997.         self.serialize_object(obj)
  2998.         if self.last.isalnum():
  2999.             self.write('\n')
  3000.         self.write('endobj\n')
  3001.  
  3002. def cli_main(argv=sys.argv):
  3003.     progname = os.path.basename(argv[0])
  3004.     if RSA is None:
  3005.         print "%s: This script requires PyCrypto, which must be installed " \
  3006.               "separately.  Read the top-of-script comment for details." % \
  3007.               (progname,)
  3008.         return 1
  3009.     if len(argv) != 4:
  3010.         print "usage: %s KEYFILE INBOOK OUTBOOK" % (progname,)
  3011.         return 1
  3012.     keypath, inpath, outpath = argv[1:]
  3013.     with open(inpath, 'rb') as inf:
  3014.         serializer = PDFSerializer(inf, keypath)
  3015.         # hope this will fix the 'bad file descriptor' problem
  3016.         with open(outpath, 'wb') as outf:
  3017.         # help construct to make sure the method runs to the end
  3018.             serializer.dump(outf)
  3019.     return 0
  3020.  
  3021.  
  3022. class DecryptionDialog(Tkinter.Frame):
  3023.     def __init__(self, root):
  3024.         # debug mode debugging
  3025.         global DEBUG_MODE
  3026.         Tkinter.Frame.__init__(self, root, border=5)
  3027.         ltext='Select file for decryption\n(Ignore Password / Key file option for Fileopen/APS PDFs)'        
  3028.         self.status = Tkinter.Label(self, text=ltext)
  3029.         self.status.pack(fill=Tkconstants.X, expand=1)
  3030.         body = Tkinter.Frame(self)
  3031.         body.pack(fill=Tkconstants.X, expand=1)
  3032.         sticky = Tkconstants.E + Tkconstants.W
  3033.         body.grid_columnconfigure(1, weight=2)
  3034.         Tkinter.Label(body, text='Password\nor Key file').grid(row=0)
  3035.         self.keypath = Tkinter.Entry(body, width=30)
  3036.         self.keypath.grid(row=0, column=1, sticky=sticky)
  3037.         if os.path.exists('adeptkey.der'):
  3038.             self.keypath.insert(0, 'adeptkey.der')
  3039.         button = Tkinter.Button(body, text="...", command=self.get_keypath)
  3040.         button.grid(row=0, column=2)
  3041.         Tkinter.Label(body, text='Input file').grid(row=1)
  3042.         self.inpath = Tkinter.Entry(body, width=30)
  3043.         self.inpath.grid(row=1, column=1, sticky=sticky)
  3044.         button = Tkinter.Button(body, text="...", command=self.get_inpath)
  3045.         button.grid(row=1, column=2)
  3046.         Tkinter.Label(body, text='Output file').grid(row=2)
  3047.         self.outpath = Tkinter.Entry(body, width=30)
  3048.         self.outpath.grid(row=2, column=1, sticky=sticky)
  3049.         debugmode = Tkinter.Checkbutton(self, text = "Debug Mode (writable directory required)", command=self.debug_toggle, height=2, \
  3050.                  width = 40)            
  3051.         debugmode.pack()        
  3052.         button = Tkinter.Button(body, text="...", command=self.get_outpath)
  3053.         button.grid(row=2, column=2)
  3054.         buttons = Tkinter.Frame(self)
  3055.         buttons.pack()
  3056.  
  3057.  
  3058.         botton = Tkinter.Button(
  3059.             buttons, text="Decrypt", width=10, command=self.decrypt)
  3060.         botton.pack(side=Tkconstants.LEFT)
  3061.         Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
  3062.         button = Tkinter.Button(
  3063.             buttons, text="Quit", width=10, command=self.quit)
  3064.         button.pack(side=Tkconstants.RIGHT)
  3065.          
  3066.  
  3067.     def get_keypath(self):
  3068.         keypath = tkFileDialog.askopenfilename(
  3069.             parent=None, title='Select ADEPT key file',
  3070.             defaultextension='.der', filetypes=[('DER-encoded files', '.der'),
  3071.                                                 ('All Files', '.*')])
  3072.         if keypath:
  3073.             keypath = os.path.normpath(os.path.realpath(keypath))
  3074.             self.keypath.delete(0, Tkconstants.END)
  3075.             self.keypath.insert(0, keypath)
  3076.         return
  3077.  
  3078.     def get_inpath(self):
  3079.         inpath = tkFileDialog.askopenfilename(
  3080.             parent=None, title='Select ADEPT or FileOpen-encrypted PDF file to decrypt',
  3081.             defaultextension='.pdf', filetypes=[('PDF files', '.pdf'),
  3082.                                                  ('All files', '.*')])
  3083.         if inpath:
  3084.             inpath = os.path.normpath(os.path.realpath(inpath))
  3085.             self.inpath.delete(0, Tkconstants.END)
  3086.             self.inpath.insert(0, inpath)
  3087.         return
  3088.  
  3089.     def debug_toggle(self):
  3090.         global DEBUG_MODE
  3091.         if DEBUG_MODE == False:
  3092.             DEBUG_MODE = True
  3093.         else:
  3094.             DEBUG_MODE = False
  3095.            
  3096.     def get_outpath(self):
  3097.         outpath = tkFileDialog.asksaveasfilename(
  3098.             parent=None, title='Select unencrypted PDF file to produce',
  3099.             defaultextension='.pdf', filetypes=[('PDF files', '.pdf'),
  3100.                                                  ('All files', '.*')])
  3101.         if outpath:
  3102.             outpath = os.path.normpath(os.path.realpath(outpath))
  3103.             self.outpath.delete(0, Tkconstants.END)
  3104.             self.outpath.insert(0, outpath)
  3105.         return
  3106.  
  3107.     def decrypt(self):
  3108.         global INPUTFILEPATH
  3109.         global KEYFILEPATH
  3110.         global PASSWORD
  3111.         keypath = self.keypath.get()
  3112.         inpath = self.inpath.get()
  3113.         outpath = self.outpath.get()
  3114.         if not keypath or not os.path.exists(keypath):
  3115.             # keyfile doesn't exist
  3116.             KEYFILEPATH = False
  3117.             PASSWORD = keypath            
  3118.         if not inpath or not os.path.exists(inpath):
  3119.             self.status['text'] = 'Specified input file does not exist'
  3120.             return
  3121.         if not outpath:
  3122.             self.status['text'] = 'Output file not specified'
  3123.             return
  3124.         if inpath == outpath:
  3125.             self.status['text'] = 'Must have different input and output files'
  3126.             return
  3127.         # patch for non-ascii characters
  3128.         INPUTFILEPATH = inpath.encode('utf-8')
  3129.         argv = [sys.argv[0], keypath, inpath, outpath]
  3130.         self.status['text'] = 'Processing ...'
  3131.         try:
  3132.             cli_main(argv)
  3133.         except Exception, a:
  3134.             self.status['text'] = 'Error: ' + str(a)
  3135.             return
  3136.         self.status['text'] = 'File successfully decrypted.\n'+\
  3137.                               'Close this window or decrypt another pdf file.'
  3138.         return
  3139.  
  3140. def gui_main():
  3141.     root = Tkinter.Tk()
  3142.     if RSA is None:
  3143.         root.withdraw()
  3144.         tkMessageBox.showerror(
  3145.             "INEPT PDF and FileOpen Decrypter",
  3146.             "This script requires PyCrypto, which must be installed "
  3147.             "separately.  Read the top-of-script comment for details.")
  3148.         return 1
  3149.     root.title('INEPT PDF Decrypter 8.4.51 (FileOpen/APS-Support)')
  3150.     root.resizable(True, False)
  3151.     root.minsize(370, 0)
  3152.     DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
  3153.     root.mainloop()
  3154.     return 0
  3155.  
  3156.  
  3157. if __name__ == '__main__':
  3158.     if len(sys.argv) > 1:
  3159.         sys.exit(cli_main())
  3160.     sys.exit(gui_main())
Add Comment
Please, Sign In to add comment