Advertisement
Guest User

pywin

a guest
Nov 24th, 2015
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.58 KB | None | 0 0
  1. # Nikola Jovanovic 0081/15
  2.  
  3. from string import ascii_lowercase
  4. from string import ascii_uppercase
  5. ascii_letters = ascii_lowercase + ascii_uppercase   ## Bukvalno vec postoji ascii_letters u string :D
  6.  
  7. def import_standard_etaoin(dat_name=''):    ## Podrazumevani argument bas i nije primeren (-0)
  8.     std_freq = {}
  9.     try:
  10.         letterfreq = open(dat_name+'.txt.', 'r')
  11.         for row in letterfreq:
  12.             row = row.replace('\n', '')
  13.             std_freq[row[0]] = float(row.split('\t')[1][:-1])/100
  14.         print ('Fajl ucitan.')
  15.         return std_freq
  16.     except:
  17.         print ('Koriscenje predefinisanih ucestalosti.')
  18.     return {'e': 0.12702,
  19.             't': 0.09056,
  20.             'a': 0.08167,
  21.             'o': 0.07507,
  22.             'i': 0.06966,
  23.             'n': 0.06749,
  24.             's': 0.06327,
  25.             'h': 0.06094,
  26.             'r': 0.05987,
  27.             'd': 0.04253,
  28.             'l': 0.04025,
  29.             'u': 0.02782,
  30.             'c': 0.02758,
  31.             'm': 0.02406,
  32.             'w': 0.02361,
  33.             'f': 0.02228,
  34.             'g': 0.02015,
  35.             'y': 0.01974,
  36.             'p': 0.01929,
  37.             'b': 0.01492,
  38.             'v': 0.00978,
  39.             'k': 0.00772,
  40.             'j': 0.00153,
  41.             'x': 0.00150,
  42.             'q': 0.00095,
  43.             'z': 0.00074}    
  44.  
  45. def m_o_succ(succs):
  46.     succ_list = [(apps, l) for l, apps in succs.items()]
  47.     maxv = max(succ_list)
  48.     return maxv[1]
  49.  
  50. def precess_file(dat_name):
  51.     global ascii_lowercase, ascii_letters
  52.     letts_in_dat= 0
  53.     dat_lett_count = {c: 0 for c in ascii_lowercase}
  54.     lett_succs = {k: {c: 0 for c in ascii_lowercase} for k in ascii_lowercase}
  55.     # raw_input('Unesite ime datoteke (bez .txt): ')
  56.     try:
  57.         text_dat = open(dat_name+'.txt')
  58.     except:
  59.         print ('Greska prilikom ucitavanja fajla')
  60.         exit()
  61.     pc = ''
  62.     for i, c in enumerate(text_dat.read()):
  63.         if c in ascii_letters:
  64.             c = c.lower()
  65.             dat_lett_count[c] += 1
  66.             letts_in_dat += 1
  67.             if len(pc) > 0: lett_succs[pc][c] += 1
  68.             pc = c
  69.         else:
  70.             pc = ''
  71.     text_dat.close()
  72.     return {c: [float(dat_lett_count[c])/letts_in_dat, m_o_succ(lett_succs[c])] for c in ascii_lowercase}
  73.  
  74. def compare_freq(std_etaoin, dat_data):
  75.     diff = {}
  76.     for c in ascii_lowercase:
  77.         diff[c] = std_etaoin[c] / round(dat_data[c][0], 5)
  78.     return diff
  79.  
  80. def print_results(std_etaoin, dat_data, comp):  ## Sjajan ispis, bolji nego sa pprint
  81.     lett_list = [(dat_data[x][0], x, dat_data[x][1]) for x in dat_data.keys()]
  82.     print ('slovo | dobijeni % | ocekivani % | odnos % | sledbenik')
  83.     for x in sorted(lett_list)[::-1]:
  84.         print (x[1]+'     | {:9.3f}% | {:10.3f}% | {:7.3f} | '.format(round(x[0]*100, 3), std_etaoin[x[1]]*100, comp[x[1]])+x[2])
  85.  
  86. def save_results(dat_name, diff):
  87.     out_dat = open(dat_name+'-etaoin.txt', 'w')
  88.     out = []
  89.     for c in ascii_lowercase:
  90.         out.append(c + '\t{:.3f}'.format(round(diff[c], 3)))
  91.     out_dat.write('\n'.join(out))
  92.     out_dat.close()
  93.  
  94. std_etaoin_dat_name = raw_input ('Unesite ime datoteke sa ucestanoscu slova (bez .txt): ')
  95.  
  96. standard_freq = import_standard_etaoin(std_etaoin_dat_name)
  97.  
  98. dat_name = raw_input ('Unesite ime tekstulane datoteke (bez .txt): ')
  99.  
  100. file_data = precess_file(dat_name)
  101.  
  102. comp = compare_freq(standard_freq, file_data)
  103.  
  104. print_results(standard_freq, file_data, comp)
  105. ## Ne ispisuje se broj ponavljanja slova, ali se racuna, pa je ok (-0)
  106.  
  107. save_results(dat_name, comp)
  108.  
  109. ## Sve je u potpunosti ispravno
  110. ## Poeni: 80/80
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement