Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # encoding: utf8
- """
- Created on 2017.08.17
- @author: yalei
- """
- from __future__ import unicode_literals
- import pinyin
- from Pinyin2Hanzi import DefaultDagParams
- from Pinyin2Hanzi import dag
- dagParams = DefaultDagParams()
- def fulanhua(string, verbose=True):
- s = pinyin.get(string, format="strip", delimiter=" ")
- words = s.split()
- res = []
- rules = {'ong': 'eng'}
- for i, word in enumerate(words):
- new_word = word.encode('utf-8')
- if new_word.startswith('hu'):
- new_word = 'f' + new_word[2:]
- elif new_word.startswith('h'):
- new_word = 'f' + new_word[1:]
- elif new_word.startswith('n'):
- new_word = 'l' + new_word[1:]
- for i, j in rules.items():
- new_word = new_word.replace(i, j)
- res.append(new_word)
- if verbose:
- print('%s -> %s' % (word, res[i]))
- result = dag(dagParams, res, path_num=10, log=False)
- return ' '.join(res), result
- if __name__ == '__main__':
- import sys
- string = sys.argv[1]
- py, hz = fulanhua(string, verbose=False)
- for item in hz:
- score = item.score
- res = ''.join(item.path)
- print(' %s %s' % (score, res))
- if not hz:
- print(py)
- """
- >>> python fulanhua.py 你能不能别说你是湖南人
- 3.27469370593e-05 理冷不冷别说历史辅懒人
- >>> python fulanhua.py 你能不能别说普通话
- 0.000149049266441 理冷不冷别说扑腾发
- >>> python fulanhua.py 黄花机场
- 0.0794434315813 方法机场
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement