Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import regex
- from calibre import replace_entities, prepare_string_for_xml
- def replace(match, number, file_name, metadata, dictionaries, data, functions, *args, **kwargs):
- def fix_word(m):
- word = m.group()
- if dictionaries.recognized(word):
- return word
- for i in xrange(1, len(word) - 1):
- a, b = word[:i], word[i:]
- if dictionaries.recognized(a) and dictionaries.recognized(b):
- return a + ' ' + b
- m = regex.match(r"(\w+)((?:[dlnmts]|qu(?:oi|el)qu|puisqu|lorsqu|jusqu|qu)[’'`]\w+)", word)
- if m:
- return m.group(1) + " " + m.group(2)
- return word
- text = replace_entities(match.group(1))
- text = regex.sub(r"\b\w(?:[\w’'`-]*\w|\w+)\b", fix_word, text, flags=regex.VERSION1)
- text = prepare_string_for_xml(text)
- return '>' + text + '<'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement