Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

feliam

By: a guest on Feb 11th, 2010  |  syntax: Python  |  size: 6.12 KB  |  views: 782  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. ##########################################################################
  2. ####   Felipe Andres Manzano     *   felipe.andres.manzano@gmail.com  ####
  3. ####   http://twitter.com/feliam *   http://wordpress.com/feliam      ####
  4. ##########################################################################
  5. import struct
  6. import zlib
  7.  
  8. ##TODO
  9. ## getattribute and setattribute must by pythonic!!
  10. ## References may be used to get actual object!
  11.  
  12.  
  13. #For constructing a minimal pdf file
  14. ## PDF REference 3rd edition:: 3.2 Objects
  15. class PDFObject:
  16.     def __init__(self):
  17.         self.n=None
  18.         self.v=None
  19.     def __str__(self):
  20.         raise "Fail"
  21.  
  22. ## PDF REference 3rd edition:: 3.2.1 Booleans Objects
  23. class PDFBool(PDFObject):
  24.     def __init__(self,s):
  25.         PDFObject.__init__(self)
  26.         self.s=s
  27.     def __str__(self):
  28.         if self.s:
  29.             return "true"
  30.         return "false"
  31.  
  32. ## PDF REference 3rd edition:: 3.2.2 Numeric Objects
  33. class PDFNum(PDFObject):
  34.     def __init__(self,s):
  35.         PDFObject.__init__(self)
  36.         self.s=s
  37.     def __str__(self):
  38.         return "%s"%self.s
  39.  
  40. ## PDF REference 3rd edition:: 3.2.3 String Objects
  41. class PDFString(PDFObject):
  42.     def __init__(self,s):
  43.         PDFObject.__init__(self)
  44.         self.s=s
  45.     def __str__(self):
  46.         return "(%s)"%self.s
  47.  
  48. ## PDF REference 3rd edition:: 3.2.3 String Objects / Hexadecimal Strings
  49. class PDFHexString(PDFObject):
  50.     def __init__(self,s):
  51.         PDFObject.__init__(self)
  52.         self.s=s
  53.     def __str__(self):
  54.         return "<" + "".join(["%02x"%ord(c) for c in self.s]) + ">"
  55.  
  56. ## A convenient type of literal Strings
  57. class PDFOctalString(PDFObject):
  58.     def __init__(self,s):
  59.         PDFObject.__init__(self)
  60.         self.s="".join(["\\%03o"%ord(c) for c in s])
  61.     def __str__(self):
  62.         return "(%s)"%self.s
  63.  
  64. ## PDF REference 3rd edition:: 3.2.4 Name Objects
  65. class PDFName(PDFObject):
  66.     def __init__(self,s):
  67.         PDFObject.__init__(self)
  68.         self.s=s
  69.     def __str__(self):
  70.         return "/%s"%self.s
  71.  
  72. ## PDF REference 3rd edition:: 3.2.5 Array Objects
  73. class PDFArray(PDFObject):
  74.     def __init__(self,s):
  75.         PDFObject.__init__(self)
  76.         self.s=s
  77.     def __str__(self):
  78.         return "[%s]"%(" ".join([ o.__str__() for o in self.s]))
  79.  
  80. ## PDF REference 3rd edition:: 3.2.6 Dictionary Objects
  81. class PDFDict(PDFObject):
  82.     def __init__(self, d={}):
  83.         PDFObject.__init__(self)
  84.         self.dict = {}
  85.         for k in d:
  86.             self.dict[k]=d[k]
  87.  
  88.     def add(self,name,obj):
  89.         self.dict[name] = obj
  90.  
  91.     def __getattr__(self,name):
  92.         try:
  93.             return self.dict[name]
  94.         except:
  95.             raise AttributeError
  96.  
  97.     def __str__(self):
  98.         s="<<"
  99.         for name in self.dict:
  100.             s+="%s %s "%(PDFName(name),self.dict[name])
  101.         s+=">>"
  102.         return s
  103.  
  104. ## PDF REference 3rd edition:: 3.2.7 Stream Objects
  105. class PDFStream(PDFDict):
  106.     def __init__(self,stream=""):
  107.         PDFDict.__init__(self)
  108.         self.stream=stream
  109.         self.filtered=self.stream
  110.         self.filters = []
  111.     def appendFilter(self, filter):
  112.         self.filters.append(filter)
  113.         self._applyFilters() #yeah every time .. so what!
  114.     def _applyFilters(self):
  115.         self.filtered = self.stream
  116.         for f in self.filters:
  117.                 self.filtered = f.encode(self.filtered)
  118.         self.add('Length', len(self.filtered))
  119.         if len(self.filters)>0:
  120.             self.add('Filter', PDFArray([f.name for f in self.filters]))
  121.         #Add Filter parameters ?
  122.     def __str__(self):
  123.         self._applyFilters() #yeah every time .. so what!
  124.         s=""
  125.         s+=PDFDict.__str__(self)
  126.         s+="\nstream\n"
  127.         s+=self.filtered
  128.         s+="\nendstream"
  129.         return s
  130.  
  131. ## PDF REference 3rd edition:: 3.2.8 Null Object
  132. class PDFNull(PDFObject):
  133.     def __init__(self):
  134.         PDFObject.__init__(self)
  135.  
  136.     def __str__(self):
  137.         return "null"
  138.  
  139. ## PDF REference 3rd edition:: 3.2.9 Indirect Objects
  140. class PDFRef(PDFObject):
  141.     def __init__(self,obj):
  142.         PDFObject.__init__(self)
  143.         self.obj=[obj]
  144.     def __str__(self):
  145.         return "%d %d R"%(self.obj[0].n,self.obj[0].v)
  146.     def get(self):
  147.         return self.obj[0]
  148.  
  149. ## PDF REference 3rd edition:: 3.3 Filters
  150. ## Example Filter...
  151. class FlateDecode:
  152.     name = PDFName('FlateDecode')
  153.     def __init__(self):
  154.         pass
  155.     def encode(self,stream):
  156.         return zlib.compress(stream)
  157.     def decode(self,stream):
  158.         return zlib.decompress(stream)
  159.  
  160. ## PDF REference 3rd edition:: 3.4 File Structure
  161. ## Simplest file structure...
  162. class PDFDoc():
  163.     def __init__(self,obfuscate=0):
  164.         self.objs=[]
  165.         self.info=None
  166.         self.root=None
  167.     def setRoot(self,root):
  168.         self.root=root
  169.     def getRoot(self):
  170.         return self.root
  171.     def setInfo(self,info):
  172.         self.info=info
  173.     def _add(self,obj):
  174.         if obj.v!=None or obj.n!=None:
  175.             raise "Already added!!!"
  176.         obj.v=0
  177.         obj.n=1+len(self.objs)
  178.         self.objs.append(obj)
  179.     def add(self,obj):
  180.         if type(obj) != type([]):
  181.             self._add(obj);        
  182.         else:
  183.             for o in obj:  
  184.                 self._add(o)
  185.     def _header(self):
  186.         return "%PDF-1.3\n%\xE7\xF3\xCF\xD3\n"
  187.     def __str__(self):
  188.         doc1 = self._header()
  189.         xref = {}
  190.         for obj in self.objs:
  191.             xref[obj.n] = len(doc1)
  192.             doc1+="%d %d obj\n"%(obj.n,obj.v)
  193.             doc1+=obj.__str__()
  194.             doc1+="\nendobj\n"
  195.         posxref=len(doc1)
  196.         doc1+="xref\n"
  197.         doc1+="0 %d\n"%(len(self.objs)+1)
  198.         doc1+="0000000000 65535 f \n"
  199.         for xr in xref.keys():
  200.             doc1+= "%010d %05d n \n"%(xref[xr],0)
  201.         doc1+="trailer\n"
  202.         trailer =  PDFDict()
  203.         trailer.add("Size",len(self.objs)+1)
  204.         trailer.add("Root",PDFRef(self.root))
  205.         if self.info:
  206.             trailer.add("Info",PDFRef(self.info))
  207.         doc1+=trailer.__str__()
  208.         doc1+="\nstartxref\n%d\n"%posxref
  209.         doc1+="%%EOF"
  210.         return doc1