Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ##########################################################################
- #### Felipe Andres Manzano * felipe.andres.manzano@gmail.com ####
- #### http://twitter.com/feliam * http://wordpress.com/feliam ####
- ##########################################################################
- import struct
- import zlib
- ##TODO
- ## getattribute and setattribute must by pythonic!!
- ## References may be used to get actual object!
- #For constructing a minimal pdf file
- ## PDF REference 3rd edition:: 3.2 Objects
- class PDFObject:
- def __init__(self):
- self.n=None
- self.v=None
- def __str__(self):
- raise "Fail"
- ## PDF REference 3rd edition:: 3.2.1 Booleans Objects
- class PDFBool(PDFObject):
- def __init__(self,s):
- PDFObject.__init__(self)
- self.s=s
- def __str__(self):
- if self.s:
- return "true"
- return "false"
- ## PDF REference 3rd edition:: 3.2.2 Numeric Objects
- class PDFNum(PDFObject):
- def __init__(self,s):
- PDFObject.__init__(self)
- self.s=s
- def __str__(self):
- return "%s"%self.s
- ## PDF REference 3rd edition:: 3.2.3 String Objects
- class PDFString(PDFObject):
- def __init__(self,s):
- PDFObject.__init__(self)
- self.s=s
- def __str__(self):
- return "(%s)"%self.s
- ## PDF REference 3rd edition:: 3.2.3 String Objects / Hexadecimal Strings
- class PDFHexString(PDFObject):
- def __init__(self,s):
- PDFObject.__init__(self)
- self.s=s
- def __str__(self):
- return "<" + "".join(["%02x"%ord(c) for c in self.s]) + ">"
- ## A convenient type of literal Strings
- class PDFOctalString(PDFObject):
- def __init__(self,s):
- PDFObject.__init__(self)
- self.s="".join(["\\%03o"%ord(c) for c in s])
- def __str__(self):
- return "(%s)"%self.s
- ## PDF REference 3rd edition:: 3.2.4 Name Objects
- class PDFName(PDFObject):
- def __init__(self,s):
- PDFObject.__init__(self)
- self.s=s
- def __str__(self):
- return "/%s"%self.s
- ## PDF REference 3rd edition:: 3.2.5 Array Objects
- class PDFArray(PDFObject):
- def __init__(self,s):
- PDFObject.__init__(self)
- self.s=s
- def __str__(self):
- return "[%s]"%(" ".join([ o.__str__() for o in self.s]))
- ## PDF REference 3rd edition:: 3.2.6 Dictionary Objects
- class PDFDict(PDFObject):
- def __init__(self, d={}):
- PDFObject.__init__(self)
- self.dict = {}
- for k in d:
- self.dict[k]=d[k]
- def add(self,name,obj):
- self.dict[name] = obj
- def __getattr__(self,name):
- try:
- return self.dict[name]
- except:
- raise AttributeError
- def __str__(self):
- s="<<"
- for name in self.dict:
- s+="%s %s "%(PDFName(name),self.dict[name])
- s+=">>"
- return s
- ## PDF REference 3rd edition:: 3.2.7 Stream Objects
- class PDFStream(PDFDict):
- def __init__(self,stream=""):
- PDFDict.__init__(self)
- self.stream=stream
- self.filtered=self.stream
- self.filters = []
- def appendFilter(self, filter):
- self.filters.append(filter)
- self._applyFilters() #yeah every time .. so what!
- def _applyFilters(self):
- self.filtered = self.stream
- for f in self.filters:
- self.filtered = f.encode(self.filtered)
- self.add('Length', len(self.filtered))
- if len(self.filters)>0:
- self.add('Filter', PDFArray([f.name for f in self.filters]))
- #Add Filter parameters ?
- def __str__(self):
- self._applyFilters() #yeah every time .. so what!
- s=""
- s+=PDFDict.__str__(self)
- s+="\nstream\n"
- s+=self.filtered
- s+="\nendstream"
- return s
- ## PDF REference 3rd edition:: 3.2.8 Null Object
- class PDFNull(PDFObject):
- def __init__(self):
- PDFObject.__init__(self)
- def __str__(self):
- return "null"
- ## PDF REference 3rd edition:: 3.2.9 Indirect Objects
- class PDFRef(PDFObject):
- def __init__(self,obj):
- PDFObject.__init__(self)
- self.obj=[obj]
- def __str__(self):
- return "%d %d R"%(self.obj[0].n,self.obj[0].v)
- def get(self):
- return self.obj[0]
- ## PDF REference 3rd edition:: 3.3 Filters
- ## Example Filter...
- class FlateDecode:
- name = PDFName('FlateDecode')
- def __init__(self):
- pass
- def encode(self,stream):
- return zlib.compress(stream)
- def decode(self,stream):
- return zlib.decompress(stream)
- ## PDF REference 3rd edition:: 3.4 File Structure
- ## Simplest file structure...
- class PDFDoc():
- def __init__(self,obfuscate=0):
- self.objs=[]
- self.info=None
- self.root=None
- def setRoot(self,root):
- self.root=root
- def getRoot(self):
- return self.root
- def setInfo(self,info):
- self.info=info
- def _add(self,obj):
- if obj.v!=None or obj.n!=None:
- raise "Already added!!!"
- obj.v=0
- obj.n=1+len(self.objs)
- self.objs.append(obj)
- def add(self,obj):
- if type(obj) != type([]):
- self._add(obj);
- else:
- for o in obj:
- self._add(o)
- def _header(self):
- return "%PDF-1.3\n%\xE7\xF3\xCF\xD3\n"
- def __str__(self):
- doc1 = self._header()
- xref = {}
- for obj in self.objs:
- xref[obj.n] = len(doc1)
- doc1+="%d %d obj\n"%(obj.n,obj.v)
- doc1+=obj.__str__()
- doc1+="\nendobj\n"
- posxref=len(doc1)
- doc1+="xref\n"
- doc1+="0 %d\n"%(len(self.objs)+1)
- doc1+="0000000000 65535 f \n"
- for xr in xref.keys():
- doc1+= "%010d %05d n \n"%(xref[xr],0)
- doc1+="trailer\n"
- trailer = PDFDict()
- trailer.add("Size",len(self.objs)+1)
- trailer.add("Root",PDFRef(self.root))
- if self.info:
- trailer.add("Info",PDFRef(self.info))
- doc1+=trailer.__str__()
- doc1+="\nstartxref\n%d\n"%posxref
- doc1+="%%EOF"
- return doc1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement