Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* Development of this script has been sponsored by Qabiria - www.qabiria.com
- *
- * Purpose: Export only those TU's that are relevant for the current project
- * from TMX files in /tm into a new TMX file
- * #Files: Writes 'exported_relevant.tmx'
- * in subfolder 'tmx_export" in current project's root
- * #File format: TMX v.1.4
- * #Details: http://wp.me/p3fHEs-7x
- *
- * @author Kos Ivantsov
- * @date 2013-09-10
- * @version 0.1
- */
- /*
- * Set "select_files" to 'yes' if you want to use file selector
- * to specify files for export. If anything else is specified, the script
- * will work with the complete project.
- */
- select_files = 'no'
- /*
- * Specify similarity threshold for found matches. Only the ones
- * above it will make into the exported TMX file
- */
- int similarity = 75
- /*
- * Specify wait time (in milliseconds) for each segment. It's the time
- * the script will wait for match pane to update. If may experiment with it
- * keeping in mind that if it's too low, you may end up having wrong TU's
- * (i.e. from previous segments) exported.
- */
- int sleeptime = 500
- import javax.swing.JFileChooser
- import org.omegat.core.Core
- import org.omegat.util.StaticUtils
- import org.omegat.util.TMXReader
- import static javax.swing.JOptionPane.*
- import static org.omegat.util.Platform.*
- def prop = project.projectProperties
- if (!prop) {
- final def title = 'Export relevant TU\'s'
- final def msg = 'Please try again after you open a project.'
- showMessageDialog null, msg, title, INFORMATION_MESSAGE
- return
- }
- if (prop.isSentenceSegmentingEnabled())
- segmenting = TMXReader.SEG_SENTENCE
- else
- segmenting = TMXReader.SEG_PARAGRAPH
- def sourceLocale = prop.getSourceLanguage().toString()
- def targetLocale = prop.getTargetLanguage().toString()
- def folder = prop.projectRoot+'/tmx_export'
- def fileloc = folder+'/exported_relevant.tmx'
- relevant_mem = new File(fileloc)
- sourceroot = prop.getSourceRoot().toString() as String
- // create file if it doesn't exist
- if (! (new File (folder)).exists()) {
- (new File(folder)).mkdir()
- }
- relevant_mem.write("",'UTF-8')
- relevant_mem.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 'UTF-8')
- relevant_mem.append("<!DOCTYPE tmx SYSTEM \"tmx11.dtd\">\n", 'UTF-8')
- relevant_mem.append("<tmx version=\"1.4\">\n", 'UTF-8')
- relevant_mem.append(" <header\n", 'UTF-8')
- relevant_mem.append(" creationtool=\"OmegaTScripting\"\n", 'UTF-8')
- relevant_mem.append(" segtype=\"" + segmenting + "\"\n", 'UTF-8')
- relevant_mem.append(" o-tmf=\"OmegaT TMX\"\n", 'UTF-8')
- relevant_mem.append(" adminlang=\"EN-US\"\n", 'UTF-8')
- relevant_mem.append(" srclang=\"" + sourceLocale + "\"\n", 'UTF-8')
- relevant_mem.append(" datatype=\"plaintext\"\n", 'UTF-8')
- relevant_mem.append(" >\n", 'UTF-8')
- relevant_mem.append(" </header>\n", 'UTF-8')
- relevant_mem.append(" <body>\n", 'UTF-8')
- if ((select_files == 'yes')) {
- srcroot = new File(prop.getSourceRoot())
- JFileChooser fc = new JFileChooser(
- currentDirectory: srcroot,
- dialogTitle: "Choose files to export",
- fileSelectionMode: JFileChooser.FILES_ONLY,
- //the file filter must show also directories, in order to be able to look into them
- multiSelectionEnabled: true)
- if(fc.showOpenDialog() != JFileChooser.APPROVE_OPTION) {
- console.println "Canceled"
- return
- }
- if (!(fc.selectedFiles =~ sourceroot.replaceAll(/\\+/, '\\\\\\\\'))) {
- console.println "Selection outside of ${prop.getSourceRoot()} folder"
- final def title = 'Wrong file(s) selected'
- final def msg = "Files must be in ${prop.getSourceRoot()} folder."
- console.println msg
- showMessageDialog null, msg, title, INFORMATION_MESSAGE
- return
- }
- files = fc.selectedFiles
- }else{
- files = project.projectFiles.filePath}
- active_segment = editor.currentEntry.entryNum()
- count = 0
- hitcount = 0
- def match_find_write = Thread.start {
- files.each{
- fl = "${it.toString()}" - "$sourceroot"
- proj_files = project.projectFiles
- proj_files.each{
- if ( "${it.filePath}" != "$fl" ) {
- /*ignore*/
- //console.println "file \"$fl\" is not supported by OmegaT"
- }else{
- it.entries.each {
- count++
- editor.gotoEntry(it.entryNum())
- info = project.getTranslationInfo(it)
- if (info.isTranslated()) {
- hitcount++
- changeId = info.changer
- changeDate = info.changeDate
- creationId = info.creator
- creationDate = info.creationDate
- alt = 'unknown'
- source = StaticUtils.makeValidXML(it.srcText)
- target = StaticUtils.makeValidXML(info.translation)
- relevant_mem.append(" <tu>\n", 'UTF-8')
- relevant_mem.append(" <tuv xml:lang=\"" + sourceLocale + "\">\n", 'UTF-8')
- relevant_mem.append(" <seg>" + "$source" + "</seg>\n", 'UTF-8')
- relevant_mem.append(" </tuv>\n", 'UTF-8')
- relevant_mem.append(" <tuv xml:lang=\"" + targetLocale + "\"", 'UTF-8')
- relevant_mem.append(" changeid=\"${changeId ?: alt }\"", 'UTF-8')
- relevant_mem.append(" changedate=\"${ changeDate > 0 ? new Date(changeDate).format("yyyyMMdd'T'HHmmss'Z'") : alt }\"", 'UTF-8')
- relevant_mem.append(" creationid=\"${creationId ?: alt }\"", 'UTF-8')
- relevant_mem.append(" creationdate=\"${ creationDate > 0 ? new Date(creationDate).format("yyyyMMdd'T'HHmmss'Z'") : alt }\"", 'UTF-8')
- relevant_mem.append(">\n", 'UTF-8')
- relevant_mem.append(" <seg>" + "$target" + "</seg>\n", 'UTF-8')
- relevant_mem.append(" </tuv>\n", 'UTF-8')
- relevant_mem.append(" </tu>\n", 'UTF-8')
- console.println "-------\nFound translation for segment ${it.entryNum()}. Exporting"
- }else{
- sleep sleeptime
- near = Core.getMatcher().getActiveMatch()
- if (near != null) {
- if (near.scores[0].score > similarity) {
- hitcount++
- changeId = near.changer
- changeDate = near.changedDate
- creationId = near.creator
- creationDate = near.creationDate
- alt = 'unknown'
- source = StaticUtils.makeValidXML(near.source)
- target = StaticUtils.makeValidXML(near.translation)
- relevant_mem.append(" <tu>\n", 'UTF-8')
- relevant_mem.append(" <tuv xml:lang=\"" + sourceLocale + "\">\n", 'UTF-8')
- relevant_mem.append(" <seg>" + "$source" + "</seg>\n", 'UTF-8')
- relevant_mem.append(" </tuv>\n", 'UTF-8')
- relevant_mem.append(" <tuv xml:lang=\"" + targetLocale + "\"", 'UTF-8')
- relevant_mem.append(" changeid=\"${changeId ?: alt }\"", 'UTF-8')
- relevant_mem.append(" changedate=\"${ changeDate > 0 ? new Date(changeDate).format("yyyyMMdd'T'HHmmss'Z'") : alt }\"", 'UTF-8')
- relevant_mem.append(" creationid=\"${creationId ?: alt }\"", 'UTF-8')
- relevant_mem.append(" creationdate=\"${ creationDate > 0 ? new Date(creationDate).format("yyyyMMdd'T'HHmmss'Z'") : alt }\"", 'UTF-8')
- relevant_mem.append(">\n", 'UTF-8')
- relevant_mem.append(" <seg>" + "$target" + "</seg>\n", 'UTF-8')
- relevant_mem.append(" </tuv>\n", 'UTF-8')
- relevant_mem.append(" </tu>\n", 'UTF-8')
- console.println "-------\nFound good match for segment ${it.entryNum()}"
- console.println "Segment source text is: \n${editor.currentEntry.getSrcText()}"
- console.println "\nMatch source is: \n$near.source"
- console.println "Match translation is: \n$near.translation\n"
- }else{
- console.println "-------\nNo good match found for segment ${it.entryNum()}"
- }
- }else{
- console.println "-------\nNo match found for segment ${it.entryNum()}"
- }
- }
- }
- }
- }
- }
- editor.gotoEntry(active_segment)
- relevant_mem.append(" </body>\n", 'UTF-8')
- relevant_mem.append("</tmx>", 'UTF-8')
- if (hitcount == 0){
- relevant_mem.delete()
- final def msg = """\
- The script has processed $count segments.
- 0 TU were exported.
- Empty file $relevant_mem has been deleted.\
- """
- final def title = 'Export result'
- console.println msg
- showMessageDialog null, msg, title, INFORMATION_MESSAGE
- }else{
- final def msg = """\
- The script has processed $count segments.
- $hitcount TU were exported to $relevant_mem.\
- """
- final def title = 'Export result'
- console.println msg
- showMessageDialog null, msg, title, INFORMATION_MESSAGE
- }
- }
- return
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement