Advertisement
Guest User

Untitled

a guest
May 24th, 2019
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.34 KB | None | 0 0
  1. import Levenshtein
  2. from math import log
  3. import sqlite3
  4. import interactive_spellchecker
  5. import os
  6.  
  7. r'''
  8. def another_init():
  9. conn = sqlite3.connect(r'C:\Users\pervu
  10. \Desktop\levenshtein_speller\speller.db')
  11. cursor = conn.cursor()
  12. cursor.execute("CREATE VIRTUAL TABLE freq USING fts5(names)")
  13. with open(r'C:\Users\pervu\Desktop\Python\hagen_freq_desc\freq.txt') as f:
  14. for l in f:
  15. parts = l.split(" | ")
  16. lem = parts[3]
  17. cursor.execute("insert into freq values(?)", (lem,))
  18. conn.commit()
  19. conn.close()
  20. '''
  21.  
  22.  
  23. def main():
  24. # не хочу просто перебор
  25. # реализация закона Цифра
  26. # (частота слова обратно пропорциональна его порядковому номеру)
  27. # (можно не хранить частоту)
  28. # st='шикарноможноделитьноиногданельзяаявообщекошечка'
  29. st = input()
  30. sp = Splitter()
  31. res = sp.infer_spaces(st)
  32. print(res)
  33. return res
  34.  
  35.  
  36. class Splitter():
  37. def __init__(self):
  38. st = r'speller.db'
  39. conn = sqlite3.connect(os.path.join(
  40. os.path.dirname(os.path.abspath(__file__)), st))
  41. cursor = conn.cursor()
  42. cursor.execute('SELECT * FROM freq')
  43. list = cursor.fetchall()
  44. conn.commit()
  45. conn.close()
  46. words = interactive_spellchecker.appropriate_list(list)
  47. self.wordcost = dict((k, log((i + 1) * log(len(words))))
  48. for i, k in enumerate(words))
  49. self.maxword = max(len(x) for x in words)
  50.  
  51. def infer_spaces(self, s):
  52. def best_match(i):
  53. candidates = enumerate(reversed(cost[max(0, i - self.maxword):i]))
  54. return min(
  55. (c + self.wordcost.get(s[i - k - 1:i],
  56. 9e999), k + 1) for k, c in candidates)
  57. #
  58. cost = [0]
  59. for i in range(1, len(s) + 1):
  60. c, k = best_match(i)
  61. cost.append(c)
  62. #
  63. out = []
  64. i = len(s)
  65. while i > 0:
  66. c, k = best_match(i)
  67. assert c == cost[i]
  68. out.append(s[i - k:i])
  69. i -= k
  70. return " ".join(reversed(out))
  71.  
  72.  
  73. if __name__ == '__main__':
  74. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement