Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/awk -f
- BEGIN {
- FS=OFS="\t"
- lastch="|"
- coef=1.00
- changes=0
- }
- FILENAME ~ /bigram[^/]*$/ && NR>1 {
- if(!bigram) { print "bigram"; bigram=1 }
- if(length($1)>2) {
- alph_str=$1
- no=length(alph_str)
- for(i=1; i<=no; i++) {
- alphi[substr(alph_str,i,1)]=i-1
- alphs[i-1]=substr(alph_str,i,1)
- }
- print no,alph_str
- } else if(length($1)==1) {
- p1[alphi[$1]]=$5
- } else if(length($1)==2) {
- ci1=alphi[substr($1,1,1)]
- ci2=alphi[substr($1,2,1)]
- p2[ci1,ci2]=p1[ci1]*$6
- #print ci1,ci2,p2[ci1,ci2], $1, alphs[ci1], alphs[ci2]
- }
- }
- function putch(c) {
- # if(!index(alphc, c)) { alphc=alphc c; alphcs[c]="" }
- count0++
- count1[lastch]++
- count2[lastch,c]++
- lastch=c
- }
- FILENAME ~ /text[^/]*$/ {
- if(!got_text) { print "text"; got_text=1 }
- gsub(/[ \t]/,"",$0)
- thetext = thetext $0
- for(i=1; i<=length($0); i++) putch(substr($0,i,1))
- }
- function abs(a) { return a>0?a:-a }
- #function abs(a) { return a^2 }
- function recalc_cv(rebuild_d) {
- if(rebuild_d) {
- delete d
- for(c1=0; c1<nc2; c1++) for(c2=0; c2<nc2; c2++)
- d[ckey[c1],ckey[c2]] += pc2[c1,c2]
- }
- cv=0
- for(c1=0; c1<no; c1++) for(c2=0; c2<no; c2++)
- cv += (ed[c1,c2]=abs(p2[c1,c2]-coef*d[c1,c2]))
- return cv
- }
- function init_perm() {
- nc2=no; if(nc2<nc) nc2=nc
- for(i=0; i<nc2; i++) {
- # if(i<no) ckey[i]=i
- # else ckey[i]=no-1
- ckey[i]=i%(no-1)
- }
- # ckey: closed letters -> open letters
- recalc_cv(1)
- }
- function fix_v_1(c1,c2) {
- fix_v_ed2 = abs(p2[c1,c2]-coef*d[c1,c2])
- cv += fix_v_ed2-ed[c1,c2]
- ed[c1,c2] = fix_v_ed2
- }
- function fix_v(co1,co2) {
- for(fix_v_co=0; fix_v_co<no; fix_v_co++) {
- fix_v_1(co1,fix_v_co)
- fix_v_1(co2,fix_v_co)
- fix_v_1(fix_v_co,co1)
- fix_v_1(fix_v_co,co2)
- }
- }
- function add_c(cc,co,k) { # modify d matrix: add cc->co weighted k to key (k=1 -- add, k=-1 -- remove)
- for(add_c_i=0; add_c_i<nc; add_c_i++) {
- if(add_c_i!=cc) {
- d[co,ckey[add_c_i]] += k*pc2[cc,add_c_i]
- d[ckey[add_c_i],co] += k*pc2[add_c_i,cc]
- } else
- d[co,co] += k*pc2[cc,cc]
- }
- }
- function swap(cc1,cc2) {
- swap_oldv=cv
- swap_co1=ckey[cc1]
- swap_co2=ckey[cc2]
- add_c(cc1,swap_co1,-1); add_c(cc1,swap_co2,1); ckey[cc1]=swap_co2
- add_c(cc2,swap_co2,-1); add_c(cc2,swap_co1,1); ckey[cc2]=swap_co1
- fix_v(swap_co1,swap_co2)
- return cv-swap_oldv
- }
- function decrypt() {
- decrypt_d=""
- for(decrypt_i=1; decrypt_i<=length(thetext); decrypt_i++) {
- decrypt_c=substr(thetext,decrypt_i,1)
- decrypt_d = decrypt_d alphs[ckey[alphci[decrypt_c]]]
- }
- return decrypt_d
- }
- function search() {
- a=0; b=1
- while(1) {
- if(a+b>=nc2) {
- a=0; b++
- print changes,cv,decrypt(); fflush()
- }
- if(a+b>=nc2) {
- a=0; b=1
- #break
- }
- dif=swap(a,a+b)
- # print "try ",alphcs[a] alphs[ckey[a]],alphcs[a+b] alphs[ckey[a+b]], "dif=",dif, "cv=",cv
- if(dif>=0) { swap(a,a+b) }
- else {
- changes++
- # a=0; b=1
- # print cv,decrypt()
- ## else print "good:"
- }
- a++
- }
- }
- END {
- sum_diag=0
- for(i=0; i<no; i++) sum_diag += p1[i]^2
- print "sum_diag=" sum_diag
- putch("|") # now sum_i count2[a,i] = sum_i count2[i,a]
- alphc=""
- PROCINFO["sorted_in"]="@val_num_desc"
- nc=0
- for(c in count1) if(c!="|") {
- alphc=alphc c
- alphci[c]=nc
- alphcs[nc]=c
- nc++
- }
- count0-=2 # remove "|x" and "x|"
- print nc,alphc
- for(c in count1) {
- pc1[alphci[c]]=(count1[c]+1)/(count0+length(alphc))*(1-p_any) + p_any/length(alphc)
- #print c, count1[c], count0, count1[c]/count0, pc1[c]
- }
- for(c1 in count1) for(c2 in count1) {
- # pc2_c1_c2 = (1+count2[c1,c2])/(1/p1[c2]+count1[c1])*(1-p_resync) + p_resync*p1[c2]
- # pc2[alphci[c1],alphci[c2]] = pc1[c1] * pc2_c1_c2
- pc2[alphci[c1],alphci[c2]] = count2[c1,c2]/count0
- #print c1 c2, count2[c1,c2]+0, count1[c1], count0, (0+count2[c1,c2])/count1[c1], p2[c1,c2], (0+count2[c1,c2])/count1[c1]/(count1[c2]+1e-40)*count0, pc2[c1,c2]/pc1[c2]
- }
- PROCINFO["sorted_in"]="@unsorted"
- init_perm()
- search()
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement