Advertisement
Guest User

jakobsen.awk

a guest
Oct 24th, 2013
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Awk 4.10 KB | None | 0 0
  1. #! /usr/bin/awk -f
  2.  
  3. BEGIN {
  4.   FS=OFS="\t"
  5.   lastch="|"
  6.   coef=1.00
  7.   changes=0
  8. }
  9.  
  10. FILENAME ~ /bigram[^/]*$/ && NR>1 {
  11.   if(!bigram) { print "bigram"; bigram=1 }
  12.   if(length($1)>2) {
  13.     alph_str=$1
  14.     no=length(alph_str)
  15.     for(i=1; i<=no; i++) {
  16.       alphi[substr(alph_str,i,1)]=i-1
  17.       alphs[i-1]=substr(alph_str,i,1)
  18.     }
  19.     print no,alph_str
  20.   } else if(length($1)==1) {
  21.     p1[alphi[$1]]=$5
  22.   } else if(length($1)==2) {
  23.     ci1=alphi[substr($1,1,1)]
  24.     ci2=alphi[substr($1,2,1)]
  25.     p2[ci1,ci2]=p1[ci1]*$6
  26.     #print ci1,ci2,p2[ci1,ci2], $1, alphs[ci1], alphs[ci2]
  27.   }
  28. }
  29.  
  30. function putch(c) {
  31.   # if(!index(alphc, c)) { alphc=alphc c; alphcs[c]="" }
  32.   count0++
  33.   count1[lastch]++
  34.   count2[lastch,c]++
  35.   lastch=c
  36. }
  37.  
  38. FILENAME ~ /text[^/]*$/ {
  39.   if(!got_text) { print "text"; got_text=1 }
  40.   gsub(/[ \t]/,"",$0)
  41.   thetext = thetext $0
  42.   for(i=1; i<=length($0); i++) putch(substr($0,i,1))
  43. }
  44.  
  45. function abs(a) { return a>0?a:-a }
  46. #function abs(a) { return a^2 }
  47.  
  48. function recalc_cv(rebuild_d) {
  49.   if(rebuild_d) {
  50.     delete d
  51.     for(c1=0; c1<nc2; c1++) for(c2=0; c2<nc2; c2++)
  52.       d[ckey[c1],ckey[c2]] += pc2[c1,c2]
  53.   }
  54.   cv=0
  55.   for(c1=0; c1<no; c1++) for(c2=0; c2<no; c2++)
  56.     cv += (ed[c1,c2]=abs(p2[c1,c2]-coef*d[c1,c2]))
  57.   return cv
  58. }
  59.  
  60. function init_perm() {
  61.   nc2=no; if(nc2<nc) nc2=nc
  62.   for(i=0; i<nc2; i++) {
  63. #    if(i<no) ckey[i]=i
  64. #    else ckey[i]=no-1
  65.     ckey[i]=i%(no-1)
  66.   }
  67.   # ckey: closed letters -> open letters
  68.  
  69.   recalc_cv(1)
  70. }
  71.  
  72. function fix_v_1(c1,c2) {
  73.   fix_v_ed2 = abs(p2[c1,c2]-coef*d[c1,c2])
  74.   cv += fix_v_ed2-ed[c1,c2]
  75.   ed[c1,c2] = fix_v_ed2
  76. }
  77.  
  78. function fix_v(co1,co2) {
  79.   for(fix_v_co=0; fix_v_co<no; fix_v_co++) {
  80.     fix_v_1(co1,fix_v_co)
  81.     fix_v_1(co2,fix_v_co)
  82.     fix_v_1(fix_v_co,co1)
  83.     fix_v_1(fix_v_co,co2)
  84.   }
  85. }
  86.  
  87. function add_c(cc,co,k) { # modify d matrix: add cc->co weighted k to key (k=1 -- add, k=-1 -- remove)
  88.   for(add_c_i=0; add_c_i<nc; add_c_i++) {
  89.     if(add_c_i!=cc) {
  90.       d[co,ckey[add_c_i]] += k*pc2[cc,add_c_i]
  91.       d[ckey[add_c_i],co] += k*pc2[add_c_i,cc]
  92.     } else
  93.       d[co,co] += k*pc2[cc,cc]
  94.   }
  95. }
  96.  
  97. function swap(cc1,cc2) {
  98.   swap_oldv=cv
  99.   swap_co1=ckey[cc1]
  100.   swap_co2=ckey[cc2]
  101.   add_c(cc1,swap_co1,-1); add_c(cc1,swap_co2,1); ckey[cc1]=swap_co2
  102.   add_c(cc2,swap_co2,-1); add_c(cc2,swap_co1,1); ckey[cc2]=swap_co1
  103.   fix_v(swap_co1,swap_co2)
  104.   return cv-swap_oldv
  105. }
  106.  
  107. function decrypt() {
  108.   decrypt_d=""
  109.   for(decrypt_i=1; decrypt_i<=length(thetext); decrypt_i++) {
  110.     decrypt_c=substr(thetext,decrypt_i,1)
  111.     decrypt_d = decrypt_d alphs[ckey[alphci[decrypt_c]]]
  112.   }
  113.   return decrypt_d
  114. }
  115.  
  116. function search() {
  117.   a=0; b=1
  118.  
  119.   while(1) {
  120.     if(a+b>=nc2) {
  121.       a=0; b++
  122.       print changes,cv,decrypt(); fflush()
  123.     }
  124.     if(a+b>=nc2) {
  125.       a=0; b=1
  126.       #break
  127.     }
  128.     dif=swap(a,a+b)
  129.     # print "try ",alphcs[a] alphs[ckey[a]],alphcs[a+b] alphs[ckey[a+b]], "dif=",dif, "cv=",cv
  130.     if(dif>=0) { swap(a,a+b) }
  131.     else {
  132.       changes++
  133.     #  a=0; b=1
  134.     #  print cv,decrypt()
  135.     ## else print "good:"
  136.     }
  137.     a++
  138.   }
  139. }
  140.  
  141. END {
  142.   sum_diag=0
  143.   for(i=0; i<no; i++)  sum_diag += p1[i]^2
  144.   print "sum_diag=" sum_diag
  145.  
  146.   putch("|")  # now sum_i count2[a,i] = sum_i count2[i,a]
  147.  
  148.   alphc=""
  149.   PROCINFO["sorted_in"]="@val_num_desc"
  150.   nc=0
  151.   for(c in count1) if(c!="|") {
  152.     alphc=alphc c
  153.     alphci[c]=nc
  154.     alphcs[nc]=c
  155.     nc++
  156.   }
  157.   count0-=2  # remove "|x" and "x|"
  158.  
  159.   print nc,alphc
  160.  
  161.   for(c in count1) {
  162.     pc1[alphci[c]]=(count1[c]+1)/(count0+length(alphc))*(1-p_any) + p_any/length(alphc)
  163.     #print c, count1[c], count0, count1[c]/count0, pc1[c]
  164.   }
  165.  
  166.   for(c1 in count1) for(c2 in count1) {
  167. #    pc2_c1_c2 = (1+count2[c1,c2])/(1/p1[c2]+count1[c1])*(1-p_resync) + p_resync*p1[c2]
  168. #    pc2[alphci[c1],alphci[c2]] = pc1[c1] * pc2_c1_c2
  169.     pc2[alphci[c1],alphci[c2]] = count2[c1,c2]/count0
  170.     #print c1 c2, count2[c1,c2]+0, count1[c1], count0, (0+count2[c1,c2])/count1[c1], p2[c1,c2], (0+count2[c1,c2])/count1[c1]/(count1[c2]+1e-40)*count0, pc2[c1,c2]/pc1[c2]
  171.   }
  172.   PROCINFO["sorted_in"]="@unsorted"
  173.  
  174.   init_perm()
  175.   search()
  176. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement