Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #--------------------------------------------------------------------------------
- # Name: Python Newick to JSON Parser
- # Purpose: Reads the Newick-Topology/-Phylogeny Format into a JSON Structure
- # JSON = { "lab":node_label, "dist":distance, "tree":[{ "lab":leaf_label, "dist":distance }] }
- # Author: aboes (few modification on code by Damian Kao - http://www.biostars.org/p/48424/#48442)
- #
- # Created: 03.06.2013
- # Licence: CC-BY 3.0
- #--------------------------------------------------------------------------------
- # Some valid Newick-Topologies (but ALL Semicolons will be removed!)
- #--------------------------------------------------------------------------------
- a = "(,(,));" # Minimal Newick-Tree
- b = "(A,((B,C),D));" # Kladogramm, Leaves mit Labels
- c = "(A,((B,C),D,E),F,G);" # Einige Polytomien
- d = "(A,((B,C)node1,D)node2)root;" # Interne Nodes mit Labels
- e = "(A:0.1,((B:0.2,C:0.3),D:0.4));" # Leaves mit Distanzen
- f = "(A,((B,C):0.5,D):0.8):0.9;" # Interne Nodes mit Distanzen
- g = "(A,((B,C)node1:0.3,D)node2:0.8);" # Interne Nodes mit Labels und Distanzen
- h = "(A:1,(B:2,C:3)E:4)F:5;" # Interne Nodes und Leaves mit Labels und Distanzen
- i = "(:0.3,:0.5,(0.2,0.9));" # Interne Nodes und Leaves nur mit Distanzen
- long = "(Ceratophyllales,(Ranunculales,(Sabiales,(Proteales,(Trochodendrales,(Buxales,(Gunnerales,((Dilleniales,(Saxifragales,(Vitales,((Zygophyllales,((Celastrales,(Oxalidales,Malpighiales)),(Fabales,(Rosales,(Cucurbitales,Fagales))))),((Geraniales,Myrtales),(Crossosomatales,(Picraminales,(Sapindales,(Huerteales,(Malvales,Brassicales)))))))))),(Berberidopsidales,(Santalales,(Caryophyllales,(Cornales,(Ericales,((Garryales,(Gentianales,(Lamiales,(Solanales,Boraginales)))),(Aquifoliales,(Asterales,(Escalloniales,(Bruniales,(Apiales,(Paracryphiales,Dipsacales))))))))))))))))))));" # It's a cladogram of the Orders of Eudicots according to apgIII
- spaces = "(aa bb, (cc_dd, ee-ff));"
- brackets = "(a[3],(b<12>,c{3}));"
- test = brackets
- #--------------------------------------------------------------------------------
- # actual code
- #--------------------------------------------------------------------------------
- def parseNode(nwString):
- parenCount = 0
- tree = ''
- processed = ''
- index = 0
- for char in nwString:
- if char == "(":
- parenCount += 1
- if parenCount == 1:
- continue
- elif char == ")":
- parenCount -= 1
- if parenCount == 0:
- if index + 2 > len(nwString):
- break
- else:
- tree = nwString[index + 2:]
- break
- if char == ",":
- if parenCount != 1:
- processed += "|"
- else:
- processed += ","
- else:
- processed += char
- index += 1
- data = processed.split(',')
- for i in range(len(data)):
- data[i] = data[i].replace('|',',')
- t = tree.strip()
- if t.find(":") == -1:
- label = t
- dist = ""
- else:
- label = t[:t.find(":")]
- dist = t[t.find(":")+1:]
- return (label, dist, data)
- def recurseBuild(nwString):
- nwString = nwString.replace(";","")
- if nwString.find('(') == -1:
- if len(nwString.split(',')) == 1:
- if nwString.find(":") == -1:
- label = nwString
- dist = ""
- else:
- label = nwString[:nwString.find(":")]
- dist = float(nwString[nwString.find(":")+1:])
- return {"lab":label,"dist":dist}
- else:
- return nwString.split(',')
- else:
- label, dist, data = parseNode(nwString)
- dataArray = []
- for item in data:
- dataArray.append(recurseBuild(item))
- return {"lab":label,"dist":dist,"tree":dataArray}
- #--------------------------------------------------------------------------------
- # to show some results from the test-topologies
- #--------------------------------------------------------------------------------
- result = recurseBuild(test)
- print(result)
- import json
- print(json.dumps(result, sort_keys=True, indent=1, separators=(",",":")))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement