Guest User

ODSReader.py patched

a guest
Nov 29th, 2012
1,360
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Copyright 2011 Marco Conti
  2.  
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6.  
  7. #   http://www.apache.org/licenses/LICENSE-2.0
  8.  
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14.  
  15. # Thanks to grt for the fixes
  16.  
  17. import odf.opendocument
  18. from odf.table import *
  19. from odf.text import P
  20.  
  21. class ODSReader:
  22.  
  23.     # loads the file
  24.     def __init__(self, file):
  25.         self.doc = odf.opendocument.load(file)
  26.         self.SHEETS = {}
  27.         for sheet in self.doc.spreadsheet.getElementsByType(Table):
  28.             self.readSheet(sheet)
  29.    
  30.  
  31.     # reads a sheet in the sheet dictionary, storing each sheet as an array (rows) of arrays (columns)
  32.     def readSheet(self, sheet):
  33.         name = sheet.getAttribute("name")
  34.         rows = sheet.getElementsByType(TableRow)
  35.         arrRows = []
  36.        
  37.         # for each row
  38.         for row in rows:
  39.             row_comment = ""
  40.             arrCells = []
  41.             cells = row.getElementsByType(TableCell)
  42.            
  43.             # for each cell
  44.             for cell in cells:
  45.                 # repeated value?
  46.                 repeat = cell.getAttribute("numbercolumnsrepeated")
  47.                 if(not repeat):
  48.                     repeat = 1
  49.                    
  50.                 ps = cell.getElementsByType(P)
  51.                 textContent = ""
  52.                                
  53.                 # for each text/text:span node
  54.                 for p in ps:
  55.                     for n in p.childNodes:
  56.                                                 if (n.nodeType == 1 and n.tagName == "text:span"):
  57.                             for c in n.childNodes:
  58.                                 if (c.nodeType == 3):
  59.                                     textContent = textContent + unicode(c.data)
  60.                            
  61.                         if (n.nodeType == 3):
  62.                             textContent = textContent + unicode(n.data)
  63.                    
  64.                 if(textContent):
  65.                     if(textContent[0] != "#"): # ignore comments cells
  66.                         for rr in range(int(repeat)): # repeated?
  67.                             arrCells.append(textContent)
  68.                     else:
  69.                         row_comment = row_comment + textContent + " ";
  70.                 else:
  71.                     for rr in range(int(repeat)):
  72.                         arrCells.append("")
  73.  
  74.             # if row contained something
  75.             if(len(arrCells)):
  76.                 arrRows.append(arrCells)
  77.                
  78.             #else:
  79.             #   print "Empty or commented row (", row_comment, ")"
  80.        
  81.         self.SHEETS[name] = arrRows
  82.        
  83.     # returns a sheet as an array (rows) of arrays (columns)
  84.     def getSheet(self, name):
  85.         return self.SHEETS[name]
RAW Paste Data