Advertisement
aboes

Python Parser - Newick to JSON

Jun 3rd, 2013
271
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.25 KB | None | 0 0
  1. #--------------------------------------------------------------------------------
  2. # Name:        Python Newick to JSON Parser
  3. # Purpose:     Reads the Newick-Topology/-Phylogeny Format into a JSON Structure
  4. #              JSON = { "lab":node_label, "dist":distance, "tree":[{ "lab":leaf_label, "dist":distance }] }
  5. # Author:      aboes (few modification on code by Damian Kao - http://www.biostars.org/p/48424/#48442)
  6. #
  7. # Created:     03.06.2013
  8. # Licence:     CC-BY 3.0
  9. #--------------------------------------------------------------------------------
  10. # Some valid Newick-Topologies (but ALL Semicolons will be removed!)
  11. #--------------------------------------------------------------------------------
  12. a = "(,(,));"                           # Minimal Newick-Tree
  13. b = "(A,((B,C),D));"                    # Kladogramm, Leaves mit Labels
  14. c = "(A,((B,C),D,E),F,G);"              # Einige Polytomien
  15. d = "(A,((B,C)node1,D)node2)root;"      # Interne Nodes mit Labels
  16. e = "(A:0.1,((B:0.2,C:0.3),D:0.4));"    # Leaves mit Distanzen
  17. f = "(A,((B,C):0.5,D):0.8):0.9;"        # Interne Nodes mit Distanzen
  18. g = "(A,((B,C)node1:0.3,D)node2:0.8);"  # Interne Nodes mit Labels und Distanzen
  19. h = "(A:1,(B:2,C:3)E:4)F:5;"            # Interne Nodes und Leaves mit Labels und Distanzen
  20. i = "(:0.3,:0.5,(0.2,0.9));"            # Interne Nodes und Leaves nur mit Distanzen
  21. long = "(Ceratophyllales,(Ranunculales,(Sabiales,(Proteales,(Trochodendrales,(Buxales,(Gunnerales,((Dilleniales,(Saxifragales,(Vitales,((Zygophyllales,((Celastrales,(Oxalidales,Malpighiales)),(Fabales,(Rosales,(Cucurbitales,Fagales))))),((Geraniales,Myrtales),(Crossosomatales,(Picraminales,(Sapindales,(Huerteales,(Malvales,Brassicales)))))))))),(Berberidopsidales,(Santalales,(Caryophyllales,(Cornales,(Ericales,((Garryales,(Gentianales,(Lamiales,(Solanales,Boraginales)))),(Aquifoliales,(Asterales,(Escalloniales,(Bruniales,(Apiales,(Paracryphiales,Dipsacales))))))))))))))))))));" # It's a cladogram of the Orders of Eudicots according to apgIII
  22. spaces = "(aa bb, (cc_dd, ee-ff));"
  23. brackets = "(a[3],(b<12>,c{3}));"
  24.  
  25. test = brackets
  26. #--------------------------------------------------------------------------------
  27. # actual code
  28. #--------------------------------------------------------------------------------
  29. def parseNode(nwString):
  30.     parenCount = 0
  31.  
  32.     tree = ''
  33.     processed = ''
  34.     index = 0
  35.     for char in nwString:
  36.         if char == "(":
  37.             parenCount += 1
  38.             if parenCount == 1:
  39.                 continue
  40.         elif char == ")":
  41.             parenCount -= 1
  42.             if parenCount == 0:
  43.                 if index + 2 > len(nwString):
  44.                     break
  45.                 else:
  46.                     tree = nwString[index + 2:]
  47.                     break
  48.  
  49.         if char == ",":
  50.             if parenCount != 1:
  51.                 processed += "|"
  52.             else:
  53.                 processed += ","
  54.         else:
  55.             processed += char
  56.  
  57.         index += 1
  58.  
  59.     data = processed.split(',')
  60.  
  61.     for i in range(len(data)):
  62.         data[i] = data[i].replace('|',',')
  63.  
  64.     t = tree.strip()
  65.     if t.find(":") == -1:
  66.         label = t
  67.         dist = ""
  68.     else:
  69.         label = t[:t.find(":")]
  70.         dist = t[t.find(":")+1:]
  71.  
  72.     return (label, dist, data)
  73.  
  74. def recurseBuild(nwString):
  75.     nwString = nwString.replace(";","")
  76.     if nwString.find('(') == -1:
  77.         if len(nwString.split(',')) == 1:
  78.             if nwString.find(":") == -1:
  79.                 label = nwString
  80.                 dist = ""
  81.             else:
  82.                 label = nwString[:nwString.find(":")]
  83.                 dist = float(nwString[nwString.find(":")+1:])
  84.             return {"lab":label,"dist":dist}
  85.         else:
  86.             return nwString.split(',')
  87.     else:
  88.         label, dist, data = parseNode(nwString)
  89.  
  90.         dataArray = []
  91.         for item in data:
  92.             dataArray.append(recurseBuild(item))
  93.  
  94.         return {"lab":label,"dist":dist,"tree":dataArray}
  95. #--------------------------------------------------------------------------------
  96. # to show some results from the test-topologies
  97. #--------------------------------------------------------------------------------
  98. result = recurseBuild(test)
  99. print(result)
  100. import json
  101. print(json.dumps(result, sort_keys=True, indent=1, separators=(",",":")))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement