Advertisement
msoutopico

merge_2lang_tmx

Apr 12th, 2019
491
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Groovy 3.82 KB | None | 0 0
  1. /* :name = Merge 2 TMX with different source  :description=
  2.  * Purpose: To merge two TMX files in the /tm folder, taking the source text of the first one and the target text of the second one, and combining them in one third TMX file.
  3.  * #Files:  Expectes first file to be called Translator1.tmx and the second one to be called Translator2.tmx
  4.             Renames Translator2.tmx as Translator2.tmx.fra-bak and outputs new Translator2.tmx
  5.  * #Format: TMX v.1.4
  6.  *
  7.  * @author  Manuel Souto Pico, Kos Ivantsov
  8.  * @date    2019-04-12
  9.  * @version 0.2
  10.  */
  11.  
  12. import groovy.swing.SwingBuilder
  13. import groovy.util.XmlSlurper
  14. import groovy.util.GroovyCollections
  15. // import groovy.xml.XmlUtil
  16. import java.awt.FlowLayout
  17. import java.nio.file.Files
  18. import java.nio.file.Paths
  19. import java.util.zip.ZipFile
  20. import javax.swing.JOptionPane
  21. import javax.swing.WindowConstants as WC
  22. import org.apache.commons.io.FileUtils
  23. import org.omegat.util.Preferences
  24. import org.omegat.util.StaticUtils
  25. import org.omegat.util.StringUtil
  26. import static javax.swing.JOptionPane.*
  27. import static org.omegat.util.Platform.*
  28.  
  29. utils = (StringUtil.getMethods().toString().findAll("makeValidXML")) ? StringUtil : StaticUtils
  30.  
  31. def prop = project.projectProperties;
  32. def targetLang = prop.getTargetLanguage();
  33. def tmDir = prop.getTMRoot();
  34. // console.println(targetLang);
  35.  
  36. tmxOne = new File(tmDir.toString() + File.separator + "Translator1.tmx");
  37. tmxTwo = new File(tmDir.toString() + File.separator + "Translator2.tmx");
  38.  
  39. console.println('Merging source text from\n' + tmxOne + '\nwith target text from\n' + tmxTwo + '...\n')
  40.  
  41. parser=new XmlSlurper()
  42. parser.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false)
  43. parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
  44.  
  45. if (tmxOne.exists()) {
  46.     // def t1EngTmx = new XmlSlurper().parse(tmxOne)
  47.     tmxOneEntries = parser.parse(tmxOne)
  48. }
  49. if (tmxTwo.exists()) {
  50.     // def t1EngTmx = new XmlSlurper().parse(tmxOne)
  51.     tmxTwoEntries = parser.parse(tmxTwo)
  52. }
  53. //console.println(tmxOneEntries)    // outputs long string
  54. // console.println tmxOneEntries.getClass()
  55.  
  56. finalTMX_contents = new StringWriter()
  57. preamble = '''<?xml version="1.0" encoding="UTF-8"?>
  58. <!DOCTYPE tmx SYSTEM "tmx11.dtd">
  59. <tmx version="1.1">
  60.  <header creationtool="OmegaT" o-tmf="OmegaT TMX" adminlang="EN-US" datatype="plaintext" creationtoolversion="4.1.5_3_10485" segtype="sentence" srclang="en-ZZ"/>
  61.  <body>'''
  62. end = '''  </body>
  63. </tmx>'''
  64.  
  65. def count = 0
  66. while (count < tmxOneEntries.body.tu.size()) {
  67.     //console.println(tmxOneEntries.body.tu[count].tuv[0].seg)
  68.     //console.println(tmxTwoEntries.body.tu[count].tuv[1].seg)
  69.  
  70.     // Converting node to string, which also escapes <, > and & (but not ' and ")
  71.     def src_node = tmxOneEntries.body.tu[count].tuv[0].seg
  72.     //console.println src.getClass() // gives class groovy.util.slurpersupport.NodeChildren
  73.     def src_str = new groovy.xml.StreamingMarkupBuilder().bindNode(src_node) as String
  74.     // console.println src_str.getClass() // gives class java.lang.String
  75.     // console.println(src_str)
  76.  
  77.     def tgt_node = tmxOneEntries.body.tu[count].tuv[0].seg
  78.     def tgt_str = new groovy.xml.StreamingMarkupBuilder().bindNode(tgt_node) as String
  79.  
  80.     // def tgt = tmxTwoEntries.body.tu[count].tuv[1].seg
  81.  
  82.     finalTMX_contents << """    <tu>
  83.      <tuv lang="en-ZZ">
  84.        ${src_str}
  85.      </tuv>
  86.      <tuv lang="${targetLang}">
  87.        ${tgt_str}
  88.      </tuv>
  89.    </tu>
  90. """
  91.     count++
  92. }
  93.  
  94. //console.println(finalTMX_contents)
  95.  
  96. String origPath = tmxTwo
  97. tmxTwo.renameTo origPath.concat(".fra-bak")
  98.  
  99. finalTMX =  new File(tmDir + File.separator + "Translator2.tmx")
  100. finalTMX.write(preamble + finalTMX_contents + end, "UTF-8")
  101.  
  102. console.println("Translator2.tmx has been re-built now with English as the source language.")
  103.  
  104. //return console.println(tmxOneEntries.body.tu[0].tuv[0].seg)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement