Advertisement
Guest User

run_vecalign

a guest
Feb 29th, 2020
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.86 KB | None | 0 0
  1. import os
  2. import subprocess
  3. import sys
  4.  
  5. def run_vecalign(name,volume):
  6.     j_file = f"./all_volumes/{name}_jp_{volume}.txt"
  7.     k_file = f"./all_volumes/{name}_ko_{volume}.txt"
  8.     j_overlap = f"./all_volumes/{name}_jp_{volume}.overlap.txt"
  9.     k_overlap = f"./all_volumes/{name}_ko_{volume}.overlap.txt"
  10.     j_embed = f"./all_volumes/{name}_jp_{volume}.embed"
  11.     k_embed = f"./all_volumes/{name}_ko_{volume}.embed"
  12.     alignment = f"./alignments/{name}_{volume}.txt"
  13.     alignment_v = f"./alignments/{name}_{volume}.verbose.txt"
  14.  
  15.     subprocess.run(f"./vecalign/overlap.py -i {j_file} -o {j_overlap} -n 3",shell=True)
  16.     subprocess.run(f"./vecalign/overlap.py -i {k_file} -o {k_overlap} -n 3",shell=True)
  17.     subprocess.run(f"./LASER/tasks/embed/embed.sh {j_overlap} ja {j_embed}",shell=True)
  18.     subprocess.run(f"./LASER/tasks/embed/embed.sh {k_overlap} ko {k_embed}",shell=True)
  19.     subprocess.run(f"./vecalign/vecalign.py --alignment_max_size 3 --src {j_file} --tgt {k_file} --src_embed {j_overlap} {j_embed} --tgt_embed {k_overlap} {k_embed} > {alignment}",shell=True)
  20.     # with open(alignment,'r',encoding='utf-8') as file:
  21.     #     lines = file.readlines()
  22.     # with open(j_file,'r',encoding='utf-8') as f:
  23.     #     jp = f.read().split('\n')
  24.     # with open(k_file,'r',encoding='utf-8') as f:
  25.     #     ko = f.read().split('\n')
  26.     # with open(alignment_v,'w',encoding='utf-8') as destfile:
  27.     #     for line in lines:
  28.     #         ret = ""
  29.     #         t=line.split(':')
  30.     #         tgt,dst,score = t[0],t[1],t[2]
  31.     #         tgt = tgt.replace('[','').replace(']','').split(',')
  32.     #         dst = dst.replace('[','').replace(']','').split(',')
  33.     #         if len(tgt[0])==0:
  34.     #             continue
  35.     #         if len(dst[0])==0:
  36.     #             continue
  37.     #         for no in tgt:
  38.     #             ret += jp[int(no)]
  39.     #         for no in dst:
  40.     #             ret += ko[int(no)]
  41.     #         ret += score
  42.     #         destfile.write(ret)
  43.     os.remove(j_overlap)
  44.     os.remove(k_overlap)
  45.     os.remove(j_embed)
  46.     os.remove(k_embed)
  47.  
  48. def parse_infofile(infofile):
  49.     ret={}
  50.     with open(infofile,'r') as file:
  51.         t = file.read()
  52.     l = t.split('\n')
  53.     current_key = ""
  54.     for content in l:
  55.         if content.strip().isalpha() and len(content)>1:
  56.             current_key = content.strip()
  57.             ret[current_key] = []
  58.         else:
  59.             for num in content.split():
  60.                 ret[current_key].append(num)
  61.     return ret
  62.  
  63. if __name__ == "__main__":
  64.     info = parse_infofile("# volumes.txt")
  65.     print("start.")
  66.     for name in info.keys():
  67.         for vol in info[name]:
  68.             try:
  69.                 run_vecalign(name,vol)
  70.             except KeyboardInterrupt:
  71.                 raise KeyboardInterrupt
  72.             except:
  73.                 print(f"error with {name} {vol}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement