SHARE
TWEET

feliam

a guest Feb 11th, 2010 937 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ##########################################################################
  2. ####   Felipe Andres Manzano     *   felipe.andres.manzano@gmail.com  ####
  3. ####   http://twitter.com/feliam *   http://wordpress.com/feliam      ####
  4. ##########################################################################
  5. import struct
  6. import zlib
  7.  
  8. ##TODO
  9. ## getattribute and setattribute must by pythonic!!
  10. ## References may be used to get actual object!
  11.  
  12.  
  13. #For constructing a minimal pdf file
  14. ## PDF REference 3rd edition:: 3.2 Objects
  15. class PDFObject:
  16.     def __init__(self):
  17.         self.n=None
  18.         self.v=None
  19.     def __str__(self):
  20.         raise "Fail"
  21.  
  22. ## PDF REference 3rd edition:: 3.2.1 Booleans Objects
  23. class PDFBool(PDFObject):
  24.     def __init__(self,s):
  25.         PDFObject.__init__(self)
  26.         self.s=s
  27.     def __str__(self):
  28.         if self.s:
  29.             return "true"
  30.         return "false"
  31.  
  32. ## PDF REference 3rd edition:: 3.2.2 Numeric Objects
  33. class PDFNum(PDFObject):
  34.     def __init__(self,s):
  35.         PDFObject.__init__(self)
  36.         self.s=s
  37.     def __str__(self):
  38.         return "%s"%self.s
  39.  
  40. ## PDF REference 3rd edition:: 3.2.3 String Objects
  41. class PDFString(PDFObject):
  42.     def __init__(self,s):
  43.         PDFObject.__init__(self)
  44.         self.s=s
  45.     def __str__(self):
  46.         return "(%s)"%self.s
  47.  
  48. ## PDF REference 3rd edition:: 3.2.3 String Objects / Hexadecimal Strings
  49. class PDFHexString(PDFObject):
  50.     def __init__(self,s):
  51.         PDFObject.__init__(self)
  52.         self.s=s
  53.     def __str__(self):
  54.         return "<" + "".join(["%02x"%ord(c) for c in self.s]) + ">"
  55.  
  56. ## A convenient type of literal Strings
  57. class PDFOctalString(PDFObject):
  58.     def __init__(self,s):
  59.         PDFObject.__init__(self)
  60.         self.s="".join(["\\%03o"%ord(c) for c in s])
  61.     def __str__(self):
  62.         return "(%s)"%self.s
  63.  
  64. ## PDF REference 3rd edition:: 3.2.4 Name Objects
  65. class PDFName(PDFObject):
  66.     def __init__(self,s):
  67.         PDFObject.__init__(self)
  68.         self.s=s
  69.     def __str__(self):
  70.         return "/%s"%self.s
  71.  
  72. ## PDF REference 3rd edition:: 3.2.5 Array Objects
  73. class PDFArray(PDFObject):
  74.     def __init__(self,s):
  75.         PDFObject.__init__(self)
  76.         self.s=s
  77.     def __str__(self):
  78.         return "[%s]"%(" ".join([ o.__str__() for o in self.s]))
  79.  
  80. ## PDF REference 3rd edition:: 3.2.6 Dictionary Objects
  81. class PDFDict(PDFObject):
  82.     def __init__(self, d={}):
  83.         PDFObject.__init__(self)
  84.         self.dict = {}
  85.         for k in d:
  86.             self.dict[k]=d[k]
  87.  
  88.     def add(self,name,obj):
  89.         self.dict[name] = obj
  90.  
  91.     def __getattr__(self,name):
  92.         try:
  93.             return self.dict[name]
  94.         except:
  95.             raise AttributeError
  96.  
  97.     def __str__(self):
  98.         s="<<"
  99.         for name in self.dict:
  100.             s+="%s %s "%(PDFName(name),self.dict[name])
  101.         s+=">>"
  102.         return s
  103.  
  104. ## PDF REference 3rd edition:: 3.2.7 Stream Objects
  105. class PDFStream(PDFDict):
  106.     def __init__(self,stream=""):
  107.         PDFDict.__init__(self)
  108.         self.stream=stream
  109.         self.filtered=self.stream
  110.         self.filters = []
  111.     def appendFilter(self, filter):
  112.         self.filters.append(filter)
  113.         self._applyFilters() #yeah every time .. so what!
  114.     def _applyFilters(self):
  115.         self.filtered = self.stream
  116.         for f in self.filters:
  117.                 self.filtered = f.encode(self.filtered)
  118.         self.add('Length', len(self.filtered))
  119.         if len(self.filters)>0:
  120.             self.add('Filter', PDFArray([f.name for f in self.filters]))
  121.         #Add Filter parameters ?
  122.     def __str__(self):
  123.         self._applyFilters() #yeah every time .. so what!
  124.         s=""
  125.         s+=PDFDict.__str__(self)
  126.         s+="\nstream\n"
  127.         s+=self.filtered
  128.         s+="\nendstream"
  129.         return s
  130.  
  131. ## PDF REference 3rd edition:: 3.2.8 Null Object
  132. class PDFNull(PDFObject):
  133.     def __init__(self):
  134.         PDFObject.__init__(self)
  135.  
  136.     def __str__(self):
  137.         return "null"
  138.  
  139. ## PDF REference 3rd edition:: 3.2.9 Indirect Objects
  140. class PDFRef(PDFObject):
  141.     def __init__(self,obj):
  142.         PDFObject.__init__(self)
  143.         self.obj=[obj]
  144.     def __str__(self):
  145.         return "%d %d R"%(self.obj[0].n,self.obj[0].v)
  146.     def get(self):
  147.         return self.obj[0]
  148.  
  149. ## PDF REference 3rd edition:: 3.3 Filters
  150. ## Example Filter...
  151. class FlateDecode:
  152.     name = PDFName('FlateDecode')
  153.     def __init__(self):
  154.         pass
  155.     def encode(self,stream):
  156.         return zlib.compress(stream)
  157.     def decode(self,stream):
  158.         return zlib.decompress(stream)
  159.  
  160. ## PDF REference 3rd edition:: 3.4 File Structure
  161. ## Simplest file structure...
  162. class PDFDoc():
  163.     def __init__(self,obfuscate=0):
  164.         self.objs=[]
  165.         self.info=None
  166.         self.root=None
  167.     def setRoot(self,root):
  168.         self.root=root
  169.     def getRoot(self):
  170.         return self.root
  171.     def setInfo(self,info):
  172.         self.info=info
  173.     def _add(self,obj):
  174.         if obj.v!=None or obj.n!=None:
  175.             raise "Already added!!!"
  176.         obj.v=0
  177.         obj.n=1+len(self.objs)
  178.         self.objs.append(obj)
  179.     def add(self,obj):
  180.         if type(obj) != type([]):
  181.             self._add(obj);        
  182.         else:
  183.             for o in obj:  
  184.                 self._add(o)
  185.     def _header(self):
  186.         return "%PDF-1.3\n%\xE7\xF3\xCF\xD3\n"
  187.     def __str__(self):
  188.         doc1 = self._header()
  189.         xref = {}
  190.         for obj in self.objs:
  191.             xref[obj.n] = len(doc1)
  192.             doc1+="%d %d obj\n"%(obj.n,obj.v)
  193.             doc1+=obj.__str__()
  194.             doc1+="\nendobj\n"
  195.         posxref=len(doc1)
  196.         doc1+="xref\n"
  197.         doc1+="0 %d\n"%(len(self.objs)+1)
  198.         doc1+="0000000000 65535 f \n"
  199.         for xr in xref.keys():
  200.             doc1+= "%010d %05d n \n"%(xref[xr],0)
  201.         doc1+="trailer\n"
  202.         trailer =  PDFDict()
  203.         trailer.add("Size",len(self.objs)+1)
  204.         trailer.add("Root",PDFRef(self.root))
  205.         if self.info:
  206.             trailer.add("Info",PDFRef(self.info))
  207.         doc1+=trailer.__str__()
  208.         doc1+="\nstartxref\n%d\n"%posxref
  209.         doc1+="%%EOF"
  210.         return doc1
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top