Advertisement
KosIvantsov

write_TMX_with_usefulTU.groovy

Sep 10th, 2013
221
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Groovy 8.08 KB | None | 0 0
  1. /* Development of this script has been sponsored by Qabiria - www.qabiria.com
  2.  *  
  3.  * Purpose: Export only those TU's that are relevant for the current project
  4.  *  from TMX files in /tm into a new TMX file
  5.  * #Files:  Writes 'exported_relevant.tmx'
  6.  *  in subfolder 'tmx_export" in current project's root
  7.  * #File format:    TMX v.1.4
  8.  * #Details:    http://wp.me/p3fHEs-7x
  9.  *
  10.  * @author  Kos Ivantsov
  11.  * @date    2013-09-10
  12.  * @version 0.1
  13.  */
  14.  
  15. /*
  16.  * Set "select_files" to 'yes' if you want to use file selector
  17.  * to specify files for export. If anything else is specified, the script
  18.  * will work with the complete project.
  19.  */
  20. select_files = 'no'
  21.  
  22. /*
  23.  * Specify similarity threshold for found matches. Only the ones
  24.  * above it will make into the exported TMX file
  25.  */
  26. int similarity = 75
  27. /*
  28.  * Specify wait time (in milliseconds) for each segment. It's the time
  29.  * the script will wait for match pane to update. If may experiment with it
  30.  * keeping in mind that if it's too low, you may end up having wrong TU's
  31.  * (i.e. from previous segments) exported.
  32.  */
  33. int sleeptime = 500
  34.  
  35. import javax.swing.JFileChooser
  36. import org.omegat.core.Core
  37. import org.omegat.util.StaticUtils
  38. import org.omegat.util.TMXReader
  39. import static javax.swing.JOptionPane.*
  40. import static org.omegat.util.Platform.*
  41.  
  42. def prop = project.projectProperties
  43. if (!prop) {
  44.     final def title = 'Export relevant TU\'s'
  45.     final def msg   = 'Please try again after you open a project.'
  46.     showMessageDialog null, msg, title, INFORMATION_MESSAGE
  47.     return
  48. }
  49.  
  50. if (prop.isSentenceSegmentingEnabled())
  51.     segmenting = TMXReader.SEG_SENTENCE
  52.     else
  53.     segmenting = TMXReader.SEG_PARAGRAPH
  54.  
  55. def sourceLocale = prop.getSourceLanguage().toString()
  56. def targetLocale = prop.getTargetLanguage().toString()
  57. def folder = prop.projectRoot+'/tmx_export'
  58. def fileloc = folder+'/exported_relevant.tmx'
  59. relevant_mem = new File(fileloc)
  60. sourceroot = prop.getSourceRoot().toString() as String
  61.  
  62. // create file if it doesn't exist
  63. if (! (new File (folder)).exists()) {
  64.     (new File(folder)).mkdir()
  65.     }
  66.  
  67. relevant_mem.write("",'UTF-8')
  68. relevant_mem.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 'UTF-8')
  69. relevant_mem.append("<!DOCTYPE tmx SYSTEM \"tmx11.dtd\">\n", 'UTF-8')
  70. relevant_mem.append("<tmx version=\"1.4\">\n", 'UTF-8')
  71. relevant_mem.append(" <header\n", 'UTF-8')
  72. relevant_mem.append("  creationtool=\"OmegaTScripting\"\n", 'UTF-8')
  73. relevant_mem.append("  segtype=\"" + segmenting + "\"\n", 'UTF-8')
  74. relevant_mem.append("  o-tmf=\"OmegaT TMX\"\n", 'UTF-8')
  75. relevant_mem.append("  adminlang=\"EN-US\"\n", 'UTF-8')
  76. relevant_mem.append("  srclang=\"" + sourceLocale + "\"\n", 'UTF-8')
  77. relevant_mem.append("  datatype=\"plaintext\"\n", 'UTF-8')
  78. relevant_mem.append(" >\n", 'UTF-8')
  79. relevant_mem.append(" </header>\n", 'UTF-8')
  80. relevant_mem.append("  <body>\n", 'UTF-8')
  81.  
  82. if ((select_files == 'yes')) {
  83.     srcroot = new File(prop.getSourceRoot())
  84.    
  85.     JFileChooser fc = new JFileChooser(
  86.     currentDirectory: srcroot,
  87.     dialogTitle: "Choose files to export",
  88.     fileSelectionMode: JFileChooser.FILES_ONLY,
  89.     //the file filter must show also directories, in order to be able to look into them
  90.     multiSelectionEnabled: true)
  91.  
  92.     if(fc.showOpenDialog() != JFileChooser.APPROVE_OPTION) {
  93.     console.println "Canceled"
  94.     return
  95.     }
  96.    
  97.     if (!(fc.selectedFiles =~ sourceroot.replaceAll(/\\+/, '\\\\\\\\'))) {
  98.         console.println "Selection outside of ${prop.getSourceRoot()} folder"
  99.         final def title = 'Wrong file(s) selected'
  100.         final def msg   = "Files must be in ${prop.getSourceRoot()} folder."
  101.         console.println msg
  102.         showMessageDialog null, msg, title, INFORMATION_MESSAGE
  103.         return
  104.     }
  105.     files = fc.selectedFiles
  106. }else{
  107.     files = project.projectFiles.filePath}
  108.  
  109. active_segment = editor.currentEntry.entryNum()
  110. count = 0
  111. hitcount = 0
  112.  
  113. def match_find_write = Thread.start {
  114.         files.each{
  115.         fl = "${it.toString()}" - "$sourceroot"
  116.         proj_files = project.projectFiles
  117.         proj_files.each{
  118.             if ( "${it.filePath}" != "$fl" ) {
  119.             /*ignore*/
  120.             //console.println "file \"$fl\" is not supported by OmegaT"
  121.             }else{
  122.             it.entries.each {
  123.             count++
  124.             editor.gotoEntry(it.entryNum())
  125.             info = project.getTranslationInfo(it)
  126.             if (info.isTranslated()) {
  127.                 hitcount++
  128.                 changeId = info.changer
  129.                 changeDate = info.changeDate
  130.                 creationId = info.creator
  131.                 creationDate = info.creationDate
  132.                 alt = 'unknown'
  133.                 source = StaticUtils.makeValidXML(it.srcText)
  134.                 target = StaticUtils.makeValidXML(info.translation)
  135.                 relevant_mem.append("    <tu>\n", 'UTF-8')
  136.                 relevant_mem.append("      <tuv xml:lang=\"" + sourceLocale + "\">\n", 'UTF-8')
  137.                 relevant_mem.append("        <seg>" + "$source" + "</seg>\n", 'UTF-8')
  138.                 relevant_mem.append("      </tuv>\n", 'UTF-8')
  139.                 relevant_mem.append("      <tuv xml:lang=\"" + targetLocale + "\"", 'UTF-8')
  140.                 relevant_mem.append(" changeid=\"${changeId ?: alt }\"", 'UTF-8')
  141.                 relevant_mem.append(" changedate=\"${ changeDate > 0 ? new Date(changeDate).format("yyyyMMdd'T'HHmmss'Z'") : alt }\"", 'UTF-8')
  142.                 relevant_mem.append(" creationid=\"${creationId ?: alt }\"", 'UTF-8')
  143.                 relevant_mem.append(" creationdate=\"${ creationDate > 0 ? new Date(creationDate).format("yyyyMMdd'T'HHmmss'Z'") : alt }\"", 'UTF-8')
  144.                 relevant_mem.append(">\n", 'UTF-8')
  145.                 relevant_mem.append("        <seg>" + "$target" + "</seg>\n", 'UTF-8')
  146.                 relevant_mem.append("      </tuv>\n", 'UTF-8')
  147.                 relevant_mem.append("    </tu>\n", 'UTF-8')
  148.                 console.println "-------\nFound translation for segment ${it.entryNum()}. Exporting"
  149.                 }else{
  150.             sleep sleeptime
  151.             near = Core.getMatcher().getActiveMatch()
  152.             if (near != null) {
  153.                 if (near.scores[0].score > similarity) {
  154.                     hitcount++
  155.                     changeId = near.changer
  156.                     changeDate = near.changedDate
  157.                     creationId = near.creator
  158.                     creationDate = near.creationDate
  159.                     alt = 'unknown'
  160.                     source = StaticUtils.makeValidXML(near.source)
  161.                     target = StaticUtils.makeValidXML(near.translation)
  162.                     relevant_mem.append("    <tu>\n", 'UTF-8')
  163.                     relevant_mem.append("      <tuv xml:lang=\"" + sourceLocale + "\">\n", 'UTF-8')
  164.                     relevant_mem.append("        <seg>" + "$source" + "</seg>\n", 'UTF-8')
  165.                     relevant_mem.append("      </tuv>\n", 'UTF-8')
  166.                     relevant_mem.append("      <tuv xml:lang=\"" + targetLocale + "\"", 'UTF-8')
  167.                     relevant_mem.append(" changeid=\"${changeId ?: alt }\"", 'UTF-8')
  168.                     relevant_mem.append(" changedate=\"${ changeDate > 0 ? new Date(changeDate).format("yyyyMMdd'T'HHmmss'Z'") : alt }\"", 'UTF-8')
  169.                     relevant_mem.append(" creationid=\"${creationId ?: alt }\"", 'UTF-8')
  170.                     relevant_mem.append(" creationdate=\"${ creationDate > 0 ? new Date(creationDate).format("yyyyMMdd'T'HHmmss'Z'") : alt }\"", 'UTF-8')
  171.                     relevant_mem.append(">\n", 'UTF-8')
  172.                     relevant_mem.append("        <seg>" + "$target" + "</seg>\n", 'UTF-8')
  173.                     relevant_mem.append("      </tuv>\n", 'UTF-8')
  174.                     relevant_mem.append("    </tu>\n", 'UTF-8')
  175.                     console.println "-------\nFound good match for segment ${it.entryNum()}"
  176.                     console.println "Segment source text is: \n${editor.currentEntry.getSrcText()}"
  177.                     console.println "\nMatch source is: \n$near.source"
  178.                     console.println "Match translation is: \n$near.translation\n"
  179.                     }else{
  180.                         console.println "-------\nNo good match found for segment ${it.entryNum()}"
  181.                         }
  182.             }else{
  183.                 console.println "-------\nNo match found for segment ${it.entryNum()}"
  184.                     }
  185.                 }
  186.             }
  187.             }
  188.         }
  189.     }
  190.         editor.gotoEntry(active_segment)
  191.         relevant_mem.append("  </body>\n", 'UTF-8')
  192.         relevant_mem.append("</tmx>", 'UTF-8')
  193.        
  194.         if (hitcount == 0){
  195.         relevant_mem.delete()
  196.         final def msg   = """\
  197. The script has processed $count segments.
  198. 0 TU were exported.
  199. Empty file $relevant_mem has been deleted.\
  200. """
  201.         final def title = 'Export result'
  202.         console.println msg
  203.         showMessageDialog null, msg, title, INFORMATION_MESSAGE
  204.         }else{
  205.         final def msg = """\
  206. The script has processed $count segments.
  207. $hitcount TU were exported to $relevant_mem.\
  208. """
  209.         final def title = 'Export result'
  210.         console.println msg
  211.         showMessageDialog null, msg, title, INFORMATION_MESSAGE
  212.         }
  213. }
  214.  
  215. return
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement