
Untitled
By: a guest on
May 4th, 2012 | syntax:
None | size: 1.45 KB | hits: 12 | expires: Never
Context dependent split of a string in python
s = '2-Methyl-3-phythyl-1,4-naphthochinon,Vitamin, K1,Antihemorrhagic vitamin'
splitS = ['2-Methyl-3-phythyl-1,4-naphthochinon', 'Vitamin, K1', 'Antihemorrhagic vitamin']
>>> s = '2-Methyl-3-phythyl-1,4-naphthochinon,Vitamin, K1,Antihemorrhagic vitamin'
>>> pat = re.compile("([^ds],[^ds])|([^s],[^ds])|([^ds],[^s])")
>>> re.split(pat, s)
['2-Methyl-3-phythyl-1,4-naphthochino', 'n,V', None, None, 'itamin, K', None, '1,A', None, 'ntihemorrhagic vitamin']
(?<!d),(?! )|(?<=d),(?![d ])
>>> re.split(r'(?<!d),(?! )|(?<=d),(?![d ])', s)
['2-Methyl-3-phythyl-1,4-naphthochinon', 'Vitamin, K1', 'Antihemorrhagic vitamin']
(?<!d), # match a comma that is not preceeded by a digit...
(?! ) # ... as long as it is not followed by a space
| # OR
(?<=d), # match a comma that is preceeded by a digit...
(?![d ]) # ... as long as it is not followed by a digit or a space
(?<!d),(?! )|,(?![d ])
s = '2-Methyl-3-phythyl-1,4-naphthochinon,Vitamin, K1,Antihemorrhagic vitamin'
all_commas = [match.start() for match in re.finditer(r',', s)]
special_commas = [match.start()+1 for match in re.finditer(r'd,d|.,s', s)]
split_commas = set(all_commas) - set(special_commas)
splitS = []
start = -1
for end in sorted(split_commas) + [None]:
splitS.append(s[start+1:end])
start = end
>>> splitS
['2-Methyl-3-phythyl-1,4-naphthochinon', 'Vitamin, K1', 'Antihemorrhagic vitamin']