Advertisement
Guest User

.doxy2swig.py

a guest
Dec 17th, 2013
159
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.31 KB | None | 0 0
  1. #!/usr/bin/env python
  2. """Doxygen XML to SWIG docstring converter.
  3.  
  4. Usage:
  5.  
  6.  doxy2swig.py [options] input.xml output.i
  7.  
  8. Converts Doxygen generated XML files into a file containing docstrings
  9. that can be used by SWIG-1.3.x.  Note that you need to get SWIG
  10. version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
  11. the resulting output.
  12.  
  13. input.xml is your doxygen generated XML file and output.i is where the
  14. output will be written (the file will be clobbered).
  15.  
  16. """
  17. ######################################################################
  18. #
  19. # This code is implemented using Mark Pilgrim's code as a guideline:
  20. #   http://www.faqs.org/docs/diveintopython/kgp_divein.html
  21. #
  22. # Author: Prabhu Ramachandran
  23. # License: BSD style
  24. #
  25. # Thanks:
  26. #   Johan Hake:  the include_function_definition feature
  27. #   Bill Spotz:  bug reports and testing.
  28. #
  29. ######################################################################
  30.  
  31. from xml.dom import minidom
  32. import re
  33. import textwrap
  34. import sys
  35. import types
  36. import os.path
  37. import optparse
  38.  
  39.  
  40. def my_open_read(source):
  41.     if hasattr(source, "read"):
  42.         return source
  43.     else:
  44.         return open(source)
  45.  
  46. def my_open_write(dest):
  47.     if hasattr(dest, "write"):
  48.         return dest
  49.     else:
  50.         return open(dest, 'w')
  51.  
  52.  
  53. class Doxy2SWIG:
  54.     """Converts Doxygen generated XML files into a file containing
  55.    docstrings that can be used by SWIG-1.3.x that have support for
  56.    feature("docstring").  Once the data is parsed it is stored in
  57.    self.pieces.
  58.  
  59.    """
  60.  
  61.     def __init__(self, src, include_function_definition=True, quiet=False):
  62.         """Initialize the instance given a source object.  `src` can
  63.        be a file or filename.  If you do not want to include function
  64.        definitions from doxygen then set
  65.        `include_function_definition` to `False`.  This is handy since
  66.        this allows you to use the swig generated function definition
  67.        using %feature("autodoc", [0,1]).
  68.  
  69.        """
  70.         f = my_open_read(src)
  71.         self.my_dir = os.path.dirname(f.name)
  72.         self.xmldoc = minidom.parse(f).documentElement
  73.         f.close()
  74.  
  75.         self.pieces = []
  76.         self.pieces.append('\n// File: %s\n'%\
  77.                            os.path.basename(f.name))
  78.  
  79.         self.space_re = re.compile(r'\s+')
  80.         self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
  81.         self.multi = 0
  82.         self.ignores = ['inheritancegraph', 'param', 'listofallmembers',
  83.                         'innerclass', 'name', 'declname', 'incdepgraph',
  84.                         'invincdepgraph', 'programlisting', 'type',
  85.                         'references', 'referencedby', 'location',
  86.                         'collaborationgraph', 'reimplements',
  87.                         'reimplementedby', 'derivedcompoundref',
  88.                         'basecompoundref']
  89.         #self.generics = []
  90.         self.include_function_definition = include_function_definition
  91.         if not include_function_definition:
  92.             self.ignores.append('argsstring')
  93.  
  94.         self.quiet = quiet
  95.  
  96.  
  97.     def generate(self):
  98.         """Parses the file set in the initialization.  The resulting
  99.        data is stored in `self.pieces`.
  100.  
  101.        """
  102.         self.parse(self.xmldoc)
  103.  
  104.     def parse(self, node):
  105.         """Parse a given node.  This function in turn calls the
  106.        `parse_<nodeType>` functions which handle the respective
  107.        nodes.
  108.  
  109.        """
  110.         pm = getattr(self, "parse_%s"%node.__class__.__name__)
  111.         pm(node)
  112.  
  113.     def parse_Document(self, node):
  114.         self.parse(node.documentElement)
  115.  
  116.     def parse_Text(self, node):
  117.         txt = node.data
  118.         txt = txt.replace('\\', r'\\\\')
  119.         txt = txt.replace('"', r'\"')
  120.         # ignore pure whitespace
  121.         m = self.space_re.match(txt)
  122.         if m and len(m.group()) == len(txt):
  123.             pass
  124.         else:
  125.             self.add_text(textwrap.fill(txt, break_long_words=False))
  126.  
  127.     def parse_Element(self, node):
  128.         """Parse an `ELEMENT_NODE`.  This calls specific
  129.        `do_<tagName>` handers for different elements.  If no handler
  130.        is available the `generic_parse` method is called.  All
  131.        tagNames specified in `self.ignores` are simply ignored.
  132.  
  133.        """
  134.         name = node.tagName
  135.         ignores = self.ignores
  136.         if name in ignores:
  137.             return
  138.         attr = "do_%s" % name
  139.         if hasattr(self, attr):
  140.             handlerMethod = getattr(self, attr)
  141.             handlerMethod(node)
  142.         else:
  143.             self.generic_parse(node)
  144.             #if name not in self.generics: self.generics.append(name)
  145.  
  146.     def parse_Comment(self, node):
  147.         """Parse a `COMMENT_NODE`.  This does nothing for now."""
  148.         return
  149.  
  150.     def add_text(self, value):
  151.         """Adds text corresponding to `value` into `self.pieces`."""
  152.         if isinstance(value, list) or isinstance(value, tuple):
  153.             self.pieces.extend(value)
  154.         else:
  155.             self.pieces.append(value)
  156.  
  157.     def get_specific_nodes(self, node, names):
  158.         """Given a node and a sequence of strings in `names`, return a
  159.        dictionary containing the names as keys and child
  160.        `ELEMENT_NODEs`, that have a `tagName` equal to the name.
  161.  
  162.        """
  163.         nodes = [(x.tagName, x) for x in node.childNodes \
  164.                  if x.nodeType == x.ELEMENT_NODE and \
  165.                  x.tagName in names]
  166.         return dict(nodes)
  167.  
  168.     def generic_parse(self, node, pad=0):
  169.         """A Generic parser for arbitrary tags in a node.
  170.  
  171.        Parameters:
  172.  
  173.         - node:  A node in the DOM.
  174.         - pad: `int` (default: 0)
  175.  
  176.           If 0 the node data is not padded with newlines.  If 1 it
  177.           appends a newline after parsing the childNodes.  If 2 it
  178.           pads before and after the nodes are processed.  Defaults to
  179.           0.
  180.  
  181.        """
  182.         npiece = 0
  183.         if pad:
  184.             npiece = len(self.pieces)
  185.             if pad == 2:
  186.                 self.add_text('\n')
  187.         for n in node.childNodes:
  188.             self.parse(n)
  189.         if pad:
  190.             if len(self.pieces) > npiece:
  191.                 self.add_text('\n')
  192.  
  193.     def space_parse(self, node):
  194.         self.add_text(' ')
  195.         self.generic_parse(node)
  196.  
  197.     do_ref = space_parse
  198.     do_emphasis = space_parse
  199.     do_bold = space_parse
  200.     do_computeroutput = space_parse
  201.     do_formula = space_parse
  202.  
  203.     def do_compoundname(self, node):
  204.         self.add_text('\n\n')
  205.         data = node.firstChild.data
  206.         self.add_text('%%feature("docstring") %s "\n'%data)
  207.  
  208.     def do_compounddef(self, node):
  209.         kind = node.attributes['kind'].value
  210.         if kind in ('class', 'struct'):
  211.             prot = node.attributes['prot'].value
  212.             if prot != 'public':
  213.                 return
  214.             names = ('compoundname', 'briefdescription',
  215.                      'detaileddescription', 'includes')
  216.             first = self.get_specific_nodes(node, names)
  217.             for n in names:
  218.                 if n in first:
  219.                     self.parse(first[n])
  220.             self.add_text(['";','\n'])
  221.             for n in node.childNodes:
  222.                 if n not in first.values():
  223.                     self.parse(n)
  224.         elif kind in ('file', 'namespace'):
  225.             nodes = node.getElementsByTagName('sectiondef')
  226.             for n in nodes:
  227.                 self.parse(n)
  228.  
  229.     def do_includes(self, node):
  230.         self.add_text('C++ includes: ')
  231.         self.generic_parse(node, pad=1)
  232.  
  233.     def do_parameterlist(self, node):
  234.         text='unknown'
  235.         for key, val in node.attributes.items():
  236.             if key == 'kind':
  237.                 if val == 'param': text = 'Parameters'
  238.                 elif val == 'exception': text = 'Exceptions'
  239.                 else: text = val
  240.                 break
  241.         self.add_text(['\n', '\n', text, ':', '\n'])
  242.         self.generic_parse(node, pad=1)
  243.  
  244.     def do_para(self, node):
  245.         self.add_text('\n')
  246.         self.generic_parse(node, pad=1)
  247.  
  248.     def do_parametername(self, node):
  249.         self.add_text('\n')
  250.         try:
  251.             data=node.firstChild.data
  252.         except AttributeError: # perhaps a <ref> tag in it
  253.             data=node.firstChild.firstChild.data
  254.         if data.find('Exception') != -1:
  255.             self.add_text(data)
  256.         else:
  257.             self.add_text("%s: "%data)
  258.  
  259.     def do_parameterdefinition(self, node):
  260.         self.generic_parse(node, pad=1)
  261.  
  262.     def do_detaileddescription(self, node):
  263.         self.generic_parse(node, pad=1)
  264.  
  265.     def do_briefdescription(self, node):
  266.         self.generic_parse(node, pad=1)
  267.  
  268.     def do_memberdef(self, node):
  269.         prot = node.attributes['prot'].value
  270.         id = node.attributes['id'].value
  271.         kind = node.attributes['kind'].value
  272.         tmp = node.parentNode.parentNode.parentNode
  273.         compdef = tmp.getElementsByTagName('compounddef')[0]
  274.         cdef_kind = compdef.attributes['kind'].value
  275.  
  276.         if prot == 'public':
  277.             first = self.get_specific_nodes(node, ('definition', 'name'))
  278.             name = first['name'].firstChild.data
  279.             if name[:8] == 'operator': # Don't handle operators yet.
  280.                 return
  281.  
  282.             if not ('definition' in first) or \
  283.                    kind in ['variable', 'typedef']:
  284.                 return
  285.  
  286.             if self.include_function_definition:
  287.                 defn = first['definition'].firstChild.data
  288.             else:
  289.                 defn = ""
  290.             self.add_text('\n')
  291.             self.add_text('%feature("docstring") ')
  292.  
  293.             anc = node.parentNode.parentNode
  294.             if cdef_kind in ('file', 'namespace'):
  295.                 ns_node = anc.getElementsByTagName('innernamespace')
  296.                 if not ns_node and cdef_kind == 'namespace':
  297.                     ns_node = anc.getElementsByTagName('compoundname')
  298.                 if ns_node:
  299.                     ns = ns_node[0].firstChild.data
  300.                     self.add_text(' %s::%s "\n%s'%(ns, name, defn))
  301.                 else:
  302.                     self.add_text(' %s "\n%s'%(name, defn))
  303.             elif cdef_kind in ('class', 'struct'):
  304.                 # Get the full function name.
  305.                 anc_node = anc.getElementsByTagName('compoundname')
  306.                 cname = anc_node[0].firstChild.data
  307.                 self.add_text(' %s::%s "\n%s'%(cname, name, defn))
  308.  
  309.             for n in node.childNodes:
  310.                 if n not in first.values():
  311.                     self.parse(n)
  312.             self.add_text(['";', '\n'])
  313.  
  314.     def do_definition(self, node):
  315.         data = node.firstChild.data
  316.         self.add_text('%s "\n%s'%(data, data))
  317.  
  318.     def do_sectiondef(self, node):
  319.         kind = node.attributes['kind'].value
  320.         if kind in ('public-func', 'func', 'user-defined', ''):
  321.             self.generic_parse(node)
  322.  
  323.     def do_header(self, node):
  324.         """For a user defined section def a header field is present
  325.        which should not be printed as such, so we comment it in the
  326.        output."""
  327.         data = node.firstChild.data
  328.         self.add_text('\n/*\n %s \n*/\n'%data)
  329.         # If our immediate sibling is a 'description' node then we
  330.         # should comment that out also and remove it from the parent
  331.         # node's children.
  332.         parent = node.parentNode
  333.         idx = parent.childNodes.index(node)
  334.         if len(parent.childNodes) >= idx + 2:
  335.             nd = parent.childNodes[idx+2]
  336.             if nd.nodeName == 'description':
  337.                 nd = parent.removeChild(nd)
  338.                 self.add_text('\n/*')
  339.                 self.generic_parse(nd)
  340.                 self.add_text('\n*/\n')
  341.  
  342.     def do_simplesect(self, node):
  343.         kind = node.attributes['kind'].value
  344.         if kind in ('date', 'rcs', 'version'):
  345.             pass
  346.         elif kind == 'warning':
  347.             self.add_text(['\n', 'WARNING: '])
  348.             self.generic_parse(node)
  349.         elif kind == 'see':
  350.             self.add_text('\n')
  351.             self.add_text('See: ')
  352.             self.generic_parse(node)
  353.         else:
  354.             self.generic_parse(node)
  355.  
  356.     def do_argsstring(self, node):
  357.         self.generic_parse(node, pad=1)
  358.  
  359.     def do_member(self, node):
  360.         kind = node.attributes['kind'].value
  361.         refid = node.attributes['refid'].value
  362.         if kind == 'function' and refid[:9] == 'namespace':
  363.             self.generic_parse(node)
  364.  
  365.     def do_doxygenindex(self, node):
  366.         self.multi = 1
  367.         comps = node.getElementsByTagName('compound')
  368.         for c in comps:
  369.             refid = c.attributes['refid'].value
  370.             fname = refid + '.xml'
  371.             if not os.path.exists(fname):
  372.                 fname = os.path.join(self.my_dir,  fname)
  373.             if not self.quiet:
  374.                 print ("parsing file: %s", fname)
  375.             p = Doxy2SWIG(fname, self.include_function_definition, self.quiet)
  376.             p.generate()
  377.             self.pieces.extend(self.clean_pieces(p.pieces))
  378.  
  379.     def write(self, fname):
  380.         o = my_open_write(fname)
  381.         if self.multi:
  382.             o.write("".join(self.pieces))
  383.         else:
  384.             o.write("".join(self.clean_pieces(self.pieces)))
  385.         o.close()
  386.  
  387.     def clean_pieces(self, pieces):
  388.         """Cleans the list of strings given as `pieces`.  It replaces
  389.        multiple newlines by a maximum of 2 and returns a new list.
  390.        It also wraps the paragraphs nicely.
  391.  
  392.        """
  393.         ret = []
  394.         count = 0
  395.         for i in pieces:
  396.             if i == '\n':
  397.                 count = count + 1
  398.             else:
  399.                 if i == '";':
  400.                     if count:
  401.                         ret.append('\n')
  402.                 elif count > 2:
  403.                     ret.append('\n\n')
  404.                 elif count:
  405.                     ret.append('\n'*count)
  406.                 count = 0
  407.                 ret.append(i)
  408.  
  409.         _data = "".join(ret)
  410.         ret = []
  411.         for i in _data.split('\n\n'):
  412.             if i == 'Parameters:' or i == 'Exceptions:':
  413.                 ret.extend([i, '\n-----------', '\n\n'])
  414.             elif i.find('// File:') > -1: # leave comments alone.
  415.                 ret.extend([i, '\n'])
  416.             else:
  417.                 _tmp = textwrap.fill(i.strip(), break_long_words=False)
  418.                 _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
  419.                 ret.extend([_tmp, '\n\n'])
  420.         return ret
  421.  
  422.  
  423. def convert(input, output, include_function_definition=True, quiet=False):
  424.     p = Doxy2SWIG(input, include_function_definition, quiet)
  425.     p.generate()
  426.     p.write(output)
  427.  
  428. def main():
  429.     usage = __doc__
  430.     parser = optparse.OptionParser(usage)
  431.     parser.add_option("-n", '--no-function-definition',
  432.                       action='store_true',
  433.                       default=False,
  434.                       dest='func_def',
  435.                       help='do not include doxygen function definitions')
  436.     parser.add_option("-q", '--quiet',
  437.                       action='store_true',
  438.                       default=False,
  439.                       dest='quiet',
  440.                       help='be quiet and minimise output')
  441.  
  442.     options, args = parser.parse_args()
  443.     if len(args) != 2:
  444.         parser.error("error: no input and output specified")
  445.  
  446.     convert(args[0], args[1], not options.func_def, options.quiet)
  447.  
  448.  
  449. if __name__ == '__main__':
  450.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement