Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 20.27 KB | None | 0 0
  1. ################################################
  2. ### CONFIGURATION FILE FOR AN SMT EXPERIMENT ###
  3. ################################################
  4.  
  5. [GENERAL]
  6.  
  7. thisFile = /home/nlp/Downloads/SMALL
  8.  
  9. ### directory in which experiment is run
  10. #
  11. working-dir = $thisFile/
  12. opt-dir = /home/nlp/opt
  13.  
  14. # specification of the language pair
  15. input-extension = pl
  16. output-extension = en
  17. pair-extension = pl-en
  18.  
  19. ### directories that contain tools and data
  20. #
  21. # moses
  22. moses-src-dir = $opt-dir/moses
  23. #
  24. # moses binaries
  25. moses-bin-dir = $moses-src-dir/bin
  26. #
  27. # moses scripts
  28. moses-script-dir = $moses-src-dir/scripts
  29. #
  30. # directory where GIZA++/MGIZA programs resides
  31. external-bin-dir = $opt-dir/mgizapp/bin
  32. #
  33. # srilm
  34. srilm-dir = $opt-dir/srilm/bin/i686-m64
  35. #
  36. # irstlm
  37. #irstlm-dir = $opt-dir/irstlm/bin
  38. #
  39. # randlm
  40. #randlm-dir = $opt-dir/randlm/bin
  41. #
  42. # data
  43. wmt12-data = $thisFile
  44.  
  45. ### basic tools
  46. #
  47. # moses decoder
  48. decoder = $moses-bin-dir/moses
  49.  
  50. # conversion of phrase table into binary on-disk format
  51. #ttable-binarizer = $moses-bin-dir/processPhraseTable
  52. filter-settings = "-MinScore 2:0.0001,0:0.0001"
  53.  
  54. # conversion of rule table into binary on-disk format
  55. ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
  56.  
  57. input-tokenizer = "/home/nlp/opt/moses/scripts/tokenizer/normalize-punctuation.perl $input-extension | $moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
  58. output-tokenizer = "/home/nlp/opt/moses/scripts/tokenizer/normalize-punctuation.perl $output-extension | $moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
  59. #input-truecaser = /$moses-script-dir/recaser/truecase.perl
  60. #output-truecaser = /$moses-script-dir/recaser/truecase.perl
  61. #detruecaser = $moses-script-dir/recaser/detruecase.perl
  62.  
  63. ### generic parallelizer for cluster and multi-core machines
  64. # you may specify a script that allows the parallel execution
  65. # parallizable steps (see meta file). you also need specify
  66. # the number of jobs (cluster) or cores (multicore)
  67. #
  68. #generic-parallelizer = $moses-script-dir/ems/support/generic-parallelizer.perl
  69. generic-parallelizer = $moses-script-dir/ems/support/generic-multicore-parallelizer.perl
  70.  
  71. ### cluster settings (if run on a cluster machine)
  72. # number of jobs to be submitted in parallel
  73. #
  74. #jobs = 10
  75.  
  76. # arguments to qsub when scheduling a job
  77. #qsub-settings = ""
  78.  
  79. # project for priviledges and usage accounting
  80. #qsub-project = iccs_smt
  81.  
  82. # memory and time
  83. #qsub-memory = 4
  84. #qsub-hours = 48
  85.  
  86. ### multi-core settings
  87. # when the generic parallelizer is used, the number of cores
  88. # specified here
  89. cores = 2
  90. #input-splitter = $moses-script-dir/generic/compound-splitter.perl
  91.  
  92.  
  93. #################################################################
  94. ### PARALLEL CORPUS PREPARATION:
  95. ### create a tokenized, sentence-aligned corpus, ready for training
  96. #################################################################
  97.  
  98. [CORPUS]
  99.  
  100. ### long sentences are filtered out, since they slow down GIZA++
  101. # and are a less reliable source of data. set here the maximum
  102. # length of a sentence
  103. #
  104. max-sentence-length = 80
  105.  
  106. [CORPUS:tags]
  107.  
  108. ### command to run to get raw corpus files
  109. #
  110. # get-corpus-script =
  111.  
  112. ### raw corpus files (untokenized, but sentence aligned)
  113. #
  114. raw-stem = $wmt12-data/train
  115.  
  116. ### tokenized corpus files (may contain long sentences)
  117. #
  118. #tokenized-stem =
  119.  
  120. ### if sentence filtering should be skipped,
  121. # point to the clean training data
  122. #
  123. #clean-stem =
  124.  
  125. ### if corpus preparation should be skipped,
  126. # point to the prepared training data
  127. #
  128. #lowercased-stem =
  129.  
  130. #[CORPUS:inne]
  131.  
  132. ### command to run to get raw corpus files
  133. #
  134. # get-corpus-script =
  135.  
  136. ### raw corpus files (untokenized, but sentence aligned)
  137. #
  138. #raw-stem = $wmt12-data/inne
  139.  
  140.  
  141.  
  142. #################################################################
  143. ### LANGUAGE MODEL TRAINING
  144. #################################################################
  145.  
  146. [LM]
  147.  
  148. ### tool to be used for language model training
  149. # srilm
  150. lm-training = $srilm-dir/ngram-count
  151. settings = "-interpolate -kndiscount -unk"
  152.  
  153. # irstlm training
  154. # msb = modified kneser ney; p=0 no singleton pruning
  155. #lm-training = "$moses-script-dir/generic/trainlm-irst2.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/tmp"
  156. #settings = "-s msb -p 0"
  157.  
  158. # order of the language model
  159. order = 3
  160.  
  161. ### tool to be used for training randomized language model from scratch
  162. # (more commonly, a SRILM is trained)
  163. #
  164. #rlm-training = "$randlm-dir/buildlm -falsepos 8 -values 8"
  165.  
  166. ### script to use for binary table format for irstlm or kenlm
  167. # (default: no binarization)
  168.  
  169. # irstlm
  170. #lm-binarizer = $irstlm-dir/compile-lm
  171.  
  172. # kenlm, also set type to 8
  173. lm-binarizer = $moses-bin-dir/build_binary
  174. type = 8
  175.  
  176. ### script to create quantized language model format (irstlm)
  177. # (default: no quantization)
  178. #
  179. #lm-quantizer = $irstlm-dir/quantize-lm
  180.  
  181. ### script to use for converting into randomized table format
  182. # (default: no randomization)
  183. #
  184. #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
  185.  
  186. ### each language model to be used has its own section here
  187.  
  188.  
  189. [LM:traintags]
  190.  
  191. ### command to run to get raw corpus files
  192. #
  193. #get-corpus-script = ""
  194.  
  195. ### raw corpus (untokenized)
  196. #
  197. raw-corpus = $wmt12-data/lm.$output-extension
  198.  
  199. ### tokenized corpus files (may contain long sentences)
  200. #
  201. #tokenized-corpus =
  202.  
  203. ### if corpus preparation should be skipped,
  204. # point to the prepared language model
  205. #
  206. #lm =
  207.  
  208. #[LM:trainfr]
  209. #raw-corpus = $wmt12-data/trainfr.$output-extension
  210.  
  211.  
  212. #[LM:traintags=pos]
  213. #factors = "pos"
  214. #order = 6
  215. #settings = "-interpolate -unk"
  216. #raw-corpus = $wmt12-data/trainbigfr.$output-extension
  217.  
  218.  
  219.  
  220. #################################################################
  221. ### FACTOR DEFINITION
  222. #################################################################
  223.  
  224. #[INPUT-FACTOR]
  225.  
  226. # also used for output factors
  227. #temp-dir = $working-dir/training/factor
  228.  
  229.  
  230. #[OUTPUT-FACTOR:pos]
  231.  
  232. ### script that generates this factor
  233. #
  234. #mxpost = $opt-dir/moses/contrib/jmx
  235. #factor-script = "$moses-script-dir/training/wrappers/make-factor-en-pos.mxpost.perl -mxpost -mx8096m $mxpost"
  236.  
  237.  
  238.  
  239. #################################################################
  240. ### INTERPOLATING LANGUAGE MODELS
  241. #################################################################
  242.  
  243. #[INTERPOLATED-LM]
  244.  
  245. # if multiple language models are used, these may be combined
  246. # by optimizing perplexity on a tuning set
  247. # see, for instance [Koehn and Schwenk, IJCNLP 2008]
  248.  
  249. ### script to interpolate language models
  250. # if commented out, no interpolation is performed
  251. #
  252. #script = $moses-script-dir/ems/support/interpolate-lm.perl
  253.  
  254. ### tuning set
  255. # you may use the same set that is used for mert tuning (reference set)
  256. #
  257. #tuning-sgm = $wmt12-data/dev.$pair-extension.$output-extension.xml
  258. #raw-tuning =
  259. #tokenized-tuning =
  260. #factored-tuning =
  261. #lowercased-tuning =
  262. #split-tuning =
  263.  
  264. ### group language models for hierarchical interpolation
  265. # (flat interpolation is limited to 10 language models)
  266. #group = "first,second fourth,fifth"
  267.  
  268. ### script to use for binary table format for irstlm or kenlm
  269. # (default: no binarization)
  270.  
  271. # irstlm
  272. #lm-binarizer = $irstlm-dir/compile-lm
  273.  
  274. # kenlm, also set type to 8
  275. lm-binarizer = $moses-bin-dir/build_binary
  276. type = 8
  277.  
  278. ### script to create quantized language model format (irstlm)
  279. # (default: no quantization)
  280. #
  281. #lm-quantizer = $irstlm-dir/quantize-lm
  282.  
  283. ### script to use for converting into randomized table format
  284. # (default: no randomization)
  285. #
  286. #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
  287.  
  288.  
  289.  
  290. #################################################################
  291. ### MODIFIED MOORE LEWIS FILTERING
  292. #################################################################
  293.  
  294. #[MML]
  295.  
  296. ### specifications for language models to be trained
  297. #
  298. #lm-training = $srilm-dir/ngram-count
  299. #lm-settings = "-interpolate -kndiscount -unk"
  300. #lm-binarizer = $moses-src-dir/bin/build_binary
  301. #lm-query = $moses-src-dir/bin/query
  302. #order = 5
  303. #type = 8
  304.  
  305. ### in-/out-of-domain source/target corpora to train the 4 language model
  306. #
  307. #in-domain = [CORPUS:tags]
  308. #outdomain-stem = [CORPUS:inne]
  309.  
  310. # ... or to two separate monolingual corpora
  311. #indomain-target = [LM:toy:lowercased-corpus]
  312. #raw-indomain-source = $wmt12-data/train.pl
  313. #raw-indomain-target = $wmt12-data/train.en
  314.  
  315. # point to out-of-domain parallel corpus
  316. #outdomain-stem = $wmt12-data/inne
  317.  
  318. # settings: number of lines sampled from the corpora to train each language model on
  319. # (if used at all, should be small as a percentage of corpus)
  320. #settings = "--line-count 100000"
  321.  
  322.  
  323.  
  324. #################################################################
  325. ### TRANSLATION MODEL TRAINING
  326. #################################################################
  327.  
  328. [TRAINING]
  329.  
  330. ### training script to be used: either a legacy script or
  331. # current moses training script (default)
  332. #
  333. script = $moses-script-dir/training/train-model.perl
  334.  
  335. ### general options
  336. # these are options that are passed on to train-model.perl, for instance
  337. # * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
  338. # * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
  339. # * "-sort-parallel 8 -cores 8" to speed up phrase table building
  340. #
  341. training-options = "-mgiza -mgiza-cpus $cores -sort-buffer-size 2G -sort-compress gzip -sort-parallel $cores -cores $cores"
  342.  
  343. ### factored training: specify here which factors used
  344. # if none specified, single factor training is assumed
  345. # (one translation step, surface to surface)
  346. #
  347. #input-factors = word
  348. #output-factors = word pos
  349. #alignment-factors = "word -> word"
  350. #translation-factors = "word -> word+pos"
  351. #reordering-factors = "word -> word"
  352. #generation-factors =
  353. #decoding-steps = "t0"
  354.  
  355. ### parallelization of data preparation step
  356. # the two directions of the data preparation can be run in parallel
  357. # comment out if not needed
  358. #
  359. parallel = yes
  360.  
  361. ### pre-computation for giza++
  362. # giza++ has a more efficient data structure that needs to be
  363. # initialized with snt2cooc. if run in parallel, this may reduces
  364. # memory requirements. set here the number of parts
  365. #
  366. #run-giza-in-parts = 5
  367.  
  368. ### symmetrization method to obtain word alignments from giza output
  369. # (commonly used: grow-diag-final-and)
  370. #
  371. alignment-symmetrization-method = grow-diag-final-and
  372.  
  373. ### use of berkeley aligner for word alignment
  374. #
  375. #use-berkeley = true
  376. #alignment-symmetrization-method = berkeley
  377. #berkeley-train = $moses-script-dir/ems/support/berkeley-train.sh
  378. #berkeley-process = $moses-script-dir/ems/support/berkeley-process.sh
  379. #berkeley-jar = /your/path/to/berkeleyaligner-1.1/berkeleyaligner.jar
  380. #berkeley-java-options = "-server -mx30000m -ea"
  381. #berkeley-training-options = "-Main.iters 5 5 -EMWordAligner.numThreads 8"
  382. #berkeley-process-options = "-EMWordAligner.numThreads 8"
  383. #berkeley-posterior = 0.5
  384.  
  385. ### use of baseline alignment model (incremental training)
  386. #
  387. #baseline = 68
  388. #baseline-alignment-model = "$working-dir/training/prepared.$baseline/$input-extension.vcb \
  389. # $working-dir/training/prepared.$baseline/$output-extension.vcb \
  390. # $working-dir/training/giza.$baseline/${output-extension}-$input-extension.cooc \
  391. # $working-dir/training/giza-inverse.$baseline/${input-extension}-$output-extension.cooc \
  392. # $working-dir/training/giza.$baseline/${output-extension}-$input-extension.thmm.5 \
  393. # $working-dir/training/giza.$baseline/${output-extension}-$input-extension.hhmm.5 \
  394. # $working-dir/training/giza-inverse.$baseline/${input-extension}-$output-extension.thmm.5 \
  395. # $working-dir/training/giza-inverse.$baseline/${input-extension}-$output-extension.hhmm.5"
  396.  
  397. ### if word alignment should be skipped,
  398. # point to word alignment files
  399. #
  400. #word-alignment = $working-dir/model/aligned.1
  401.  
  402. ### filtering some corpora with modified Moore-Lewis
  403. # specify corpora to be filtered and ratio to be kept, either before or after word alignment
  404. #mml-filter-corpora = inne
  405. #mml-before-wa = "-proportion 0.8"
  406. #mml-after-wa = "-proportion 0.8"
  407.  
  408. ### create a bilingual concordancer for the model
  409. #
  410. #biconcor = $moses-script-dir/ems/biconcor/biconcor
  411.  
  412. ### lexicalized reordering: specify orientation type
  413. # (default: only distance-based reordering model)
  414. #
  415. lexicalized-reordering = msd-bidirectional-fe
  416.  
  417. #operation-sequence-model = "yes"
  418. #operation-sequence-model-order = 5
  419. #operation-sequence-model-settings = ""
  420.  
  421. ### hierarchical rule set
  422. #
  423. #hierarchical-rule-set = true
  424.  
  425. domain-features = "sparse indicator"
  426.  
  427. ### settings for rule extraction
  428. #
  429. #extract-settings = ""
  430. max-phrase-length = 5
  431.  
  432. ### add extracted phrases from baseline model
  433. #
  434. #baseline-extract = $working-dir/model/extract.$baseline
  435. #
  436. # requires aligned parallel corpus for re-estimating lexical translation probabilities
  437. #baseline-corpus = $working-dir/training/corpus.$baseline
  438. #baseline-alignment = $working-dir/model/aligned.$baseline.$alignment-symmetrization-method
  439.  
  440. ### unknown word labels (target syntax only)
  441. # enables use of unknown word labels during decoding
  442. # label file is generated during rule extraction
  443. #
  444. #use-unknown-word-labels = true
  445.  
  446. ### if phrase extraction should be skipped,
  447. # point to stem for extract files
  448. #
  449. # extracted-phrases =
  450.  
  451. ### settings for rule scoring
  452. #
  453. score-settings = "--GoodTuring --MinScore 2:0.0001"
  454.  
  455. ### include word alignment in phrase table
  456. #
  457. #include-word-alignment-in-rules = yes
  458.  
  459. ### sparse lexical features
  460. #
  461. sparse-lexical-features = "target-word-insertion all, source-word-deletion all, word-translation all, phrase-length"
  462.  
  463. #"target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
  464.  
  465. ### domain adaptation settings
  466. # options: sparse, any of: indicator, subset, ratio
  467. #domain-features = "subset"
  468.  
  469. ### if phrase table training should be skipped,
  470. # point to phrase translation table
  471. #
  472. # phrase-translation-table =
  473.  
  474. ### if reordering table training should be skipped,
  475. # point to reordering table
  476. #
  477. # reordering-table =
  478.  
  479. ### filtering the phrase table based on significance tests
  480. # Johnson, Martin, Foster and Kuhn. (2007): "Improving Translation Quality by Discarding Most of the Phrasetable"
  481. # options: -n number of translations; -l 'a+e', 'a-e', or a positive real value -log prob threshold
  482. #salm-index = /path/to/project/salm/Bin/Linux/Index/IndexSA.O64
  483. #sigtest-filter = "-l a+e -n 50"
  484.  
  485. ### if training should be skipped,
  486. # point to a configuration file that contains
  487. # pointers to all relevant model files
  488. #
  489. #config-with-reused-weights =
  490.  
  491.  
  492.  
  493. #################################################################
  494. ### TUNING: finding good weights for model components
  495. #################################################################
  496.  
  497. [TUNING]
  498.  
  499. ### instead of tuning with this setting, old weights may be recycled
  500. # specify here an old configuration file with matching weights
  501. #
  502. #weight-config = $working-dir/tuning/moses.weight-reused.ini.1
  503.  
  504. ### tuning script to be used
  505. #
  506. tuning-script = $moses-script-dir/training/mert-moses.pl
  507. tuning-settings = "-mertdir $moses-src-dir/bin --batch-mira --return-best-dev -maximum-iterations 2"
  508.  
  509. ### specify the corpus used for tuning
  510. # it should contain 1000s of sentences
  511. #
  512. input-sgm = $wmt12-data/dev.$pair-extension.$input-extension.xml
  513. #raw-input =
  514. #tokenized-input =
  515. #factorized-input =
  516. #input =
  517. #
  518. reference-sgm = $wmt12-data/dev.$pair-extension.$output-extension.xml
  519. #raw-reference =
  520. #tokenized-reference =
  521. #factorized-reference =
  522. #reference =
  523.  
  524. ### size of n-best list used (typically 100)
  525. #
  526. nbest = 100
  527.  
  528. ### ranges for weights for random initialization
  529. # if not specified, the tuning script will use generic ranges
  530. # it is not clear, if this matters
  531. #
  532. # lambda =
  533.  
  534. ### additional flags for the filter script
  535. #
  536. filter-settings = ""
  537.  
  538. ### additional flags for the decoder
  539. #
  540. decoder-settings = "-threads $cores -mp -search-algorithm 1 -cube-pruning-pop-limit 1000 -s 1000 -feature-overwrite 'TranslationModel0 table-limit=100' -max-trans-opt-per-coverage 100"
  541.  
  542. ### if tuning should be skipped, specify this here
  543. # and also point to a configuration file that contains
  544. # pointers to all relevant model files
  545. #
  546. #config =
  547.  
  548.  
  549.  
  550. #################################################################
  551. ### RECASER: restore case, this part only trains the model
  552. #################################################################
  553.  
  554. #[RECASING]
  555.  
  556. #decoder = $moses-bin-dir/moses
  557.  
  558. ### training data
  559. # raw input needs to be still tokenized,
  560. # also also tokenized input may be specified
  561. #
  562. #tokenized = [LM:traintags:tokenized-corpus]
  563.  
  564. # recase-config =
  565.  
  566. #lm-training = $srilm-dir/ngram-count
  567.  
  568.  
  569.  
  570. #################################################################
  571. ### TRUECASER: train model to truecase corpora and input
  572. #################################################################
  573.  
  574. #[TRUECASER]
  575.  
  576. ###script to train truecaser models
  577. #
  578. #trainer = $moses-script-dir/recaser/train-truecaser.perl
  579.  
  580. ### training data
  581. # data on which truecaser is trained
  582. # if no training data is specified, parallel corpus is used
  583. #
  584. # raw-stem =
  585. # tokenized-stem =
  586.  
  587. ### trained model
  588. #
  589. # truecase-model =
  590.  
  591.  
  592.  
  593. ##########################################################################
  594. ### EVALUATION: translating a test set using the tuned system and score it
  595. ##########################################################################
  596.  
  597. [EVALUATION]
  598.  
  599. ### number of jobs (if parallel execution on cluster)
  600. #
  601. #jobs = 10
  602.  
  603. ### additional flags for the filter script
  604. #
  605. #filter-settings = ""
  606.  
  607. ### additional decoder settings
  608. # switches for the Moses decoder
  609. # common choices:
  610. # "-threads N" for multi-threading
  611. # "-mbr" for MBR decoding
  612. # "-drop-unknown" for dropping unknown source words
  613. # "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" for cube pruning
  614. #
  615. decoder-settings = "-mbr -threads $cores -mp -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 -feature-overwrite 'TranslationModel0 table-limit=100' -max-trans-opt-per-coverage 100"
  616. #
  617. ### specify size of n-best list, if produced
  618. #
  619. #nbest = 100
  620.  
  621. ### multiple reference translations
  622. #
  623. #multiref = yes
  624.  
  625. ### prepare system output for scoring
  626. # this may include detokenization and wrapping output in sgm
  627. # (needed for nist-bleu, ter, meteor)
  628. #
  629. detokenizer = "$moses-script-dir/tokenizer/detokenizer.perl -l $output-extension"
  630. #recaser = $moses-script-dir/recaser/recase.perl
  631. wrapping-script = "$moses-script-dir/ems/support/wrap-xml.perl $output-extension"
  632. #output-sgm =
  633.  
  634. ### BLEU
  635. #
  636. nist-bleu = $moses-script-dir/generic/mteval-v13a.pl
  637. nist-bleu-c = "$moses-script-dir/generic/mteval-v13a.pl -c"
  638. multi-bleu = $moses-script-dir/generic/multi-bleu.perl
  639. #ibm-bleu =
  640.  
  641. ### TER: translation error rate (BBN metric) based on edit distance
  642. # not yet integrated
  643. #
  644. # ter =
  645.  
  646. ### METEOR: gives credit to stem / worknet synonym matches
  647. # not yet integrated
  648. #
  649. # meteor =
  650.  
  651. ### Analysis: carry out various forms of analysis on the output
  652. #
  653. analysis = $moses-script-dir/ems/support/analysis.perl
  654. #
  655. # also report on input coverage
  656. analyze-coverage = yes
  657. #
  658. # also report on phrase mappings used
  659. report-segmentation = yes
  660. #
  661. # report precision of translations for each input word, broken down by
  662. # count of input word in corpus and model
  663. #report-precision-by-coverage = yes
  664. #
  665. # further precision breakdown by factor
  666. #precision-by-coverage-factor = pos
  667. #
  668. # visualization of the search graph in tree-based models
  669. #analyze-search-graph = yes
  670.  
  671.  
  672. [EVALUATION:traintags]
  673.  
  674. ### input data
  675. #
  676. input-sgm = $wmt12-data/test.$pair-extension.$input-extension.xml
  677. # raw-input =
  678. # tokenized-input =
  679. # factorized-input =
  680. # input =
  681.  
  682. ### reference data
  683. #
  684. reference-sgm = $wmt12-data/test.$pair-extension.$output-extension.xml
  685. # raw-reference =
  686. # tokenized-reference =
  687. # reference =
  688.  
  689. ### analysis settings
  690. # may contain any of the general evaluation analysis settings
  691. # specific setting: base coverage statistics on earlier run
  692. #
  693. #precision-by-coverage-base = $working-dir/evaluation/test.analysis.5
  694.  
  695. ### wrapping frame
  696. # for nist-bleu and other scoring scripts, the output needs to be wrapped
  697. # in sgm markup (typically like the input sgm)
  698. #
  699. wrapping-frame = $input-sgm
  700.  
  701.  
  702.  
  703. #################################################################
  704. ### REPORTING: summarize evaluation scores
  705. #################################################################
  706.  
  707. [REPORTING]
  708.  
  709. ### currently no parameters for reporting section
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement