Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def splite_sentence(text):
- long_sep = u'。!?;!?;'
- short_sep = u',, '
- long_sents = []
- offset_begin = 0
- short_sents = []
- for i, e in enumerate(text):
- if e in short_sep:
- short_sents.append(text[offset_begin: i+1])
- offset_begin = i+1
- elif e in long_sep:
- short_sents.append(text[offset_begin: i+1])
- long_sents.append(short_sents)
- short_sents = []
- offset_begin = i+1
- else:
- pass
- if offset_begin != len(text):
- short_sents.append(text[offset_begin:])
- if short_sents:
- long_sents.append(short_sents)
- return long_sents
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement