Guest User

Untitled

a guest
Jul 26th, 2013
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.45 KB | None | 0 0
  1. # Configuration script for sphinx trainer -*-mode:Perl-*-
  2.  
  3. $CFG_VERBOSE = 1; # Determines how much goes to the screen.
  4.  
  5. # These are filled in at configuration time
  6. $CFG_DB_NAME = "all";
  7. # Experiment name, will be used to name model files and log files
  8. $CFG_EXPTNAME = "$CFG_DB_NAME";
  9.  
  10. # Directory containing SphinxTrain binaries
  11. $CFG_BASE_DIR = "/tmp/ppaikens_files_20130709_tr_tr_filtred";
  12.  
  13. $CFG_SPHINXTRAIN_DIR = "/home/ppaikens/runa/tools/sphinx/lib/sphinxtrain";
  14. $CFG_BIN_DIR = "/home/ppaikens/runa/tools/sphinx/libexec/sphinxtrain";
  15. $CFG_SCRIPT_DIR = "/home/ppaikens/runa/tools/sphinx/lib/sphinxtrain/scripts";
  16.  
  17.  
  18. # Audio waveform and feature file information
  19. $CFG_WAVFILES_DIR = "/storage/ppaikens/runa/corpus/files_20130709/audio";
  20. $CFG_WAVFILE_EXTENSION = 'wav';
  21. $CFG_WAVFILE_TYPE = 'mswav'; # one of nist, mswav, raw
  22. $CFG_FEATFILES_DIR = "$CFG_BASE_DIR/feat";
  23. $CFG_FEATFILE_EXTENSION = 'mfc';
  24. $CFG_VECTOR_LENGTH = 13;
  25.  
  26. # Feature extraction parameters
  27. $CFG_WAVFILE_SRATE = 16000.0;
  28. $CFG_NUM_FILT = 40; # For wideband speech it's 40, for telephone 8khz reasonable value is 31
  29. $CFG_LO_FILT = 133.3334; # For telephone 8kHz speech value is 200
  30. $CFG_HI_FILT = 6855.4976; # For telephone 8kHz speech value is 3500
  31.  
  32. $CFG_MIN_ITERATIONS = 1; # BW Iterate at least this many times
  33. $CFG_MAX_ITERATIONS = 10; # BW Don't iterate more than this, somethings likely wrong.
  34.  
  35. # (none/max) Type of AGC to apply to input files
  36. $CFG_AGC = 'none';
  37. # (current/none) Type of cepstral mean subtraction/normalization
  38. # to apply to input files
  39. $CFG_CMN = 'current';
  40. # (yes/no) Normalize variance of input files to 1.0
  41. $CFG_VARNORM = 'no';
  42. # (yes/no) Train full covariance matrices
  43. $CFG_FULLVAR = 'no';
  44. # (yes/no) Use diagonals only of full covariance matrices for
  45. # Forward-Backward evaluation (recommended if CFG_FULLVAR is yes)
  46. $CFG_DIAGFULL = 'no';
  47.  
  48. # (yes/no) Perform vocal tract length normalization in training. This
  49. # will result in a "normalized" model which requires VTLN to be done
  50. # during decoding as well.
  51. $CFG_VTLN = 'no';
  52. # Starting warp factor for VTLN
  53. $CFG_VTLN_START = 0.80;
  54. # Ending warp factor for VTLN
  55. $CFG_VTLN_END = 1.40;
  56. # Step size of warping factors
  57. $CFG_VTLN_STEP = 0.05;
  58.  
  59. # Directory to write queue manager logs to
  60. $CFG_QMGR_DIR = "$CFG_BASE_DIR/qmanager";
  61. # Directory to write training logs to
  62. $CFG_LOG_DIR = "$CFG_BASE_DIR/logdir";
  63. # Directory for re-estimation counts
  64. $CFG_BWACCUM_DIR = "$CFG_BASE_DIR/bwaccumdir";
  65. # Directory to write model parameter files to
  66. $CFG_MODEL_DIR = "$CFG_BASE_DIR/model_parameters";
  67.  
  68. # Directory containing transcripts and control files for
  69. # speaker-adaptive training
  70. $CFG_LIST_DIR = "$CFG_BASE_DIR/etc";
  71.  
  72. # Decoding variables for MMIE training
  73. $CFG_LANGUAGEWEIGHT = "11.5";
  74. $CFG_BEAMWIDTH = "1e-100";
  75. $CFG_WORDBEAM = "1e-80";
  76. $CFG_LANGUAGEMODEL = "$CFG_LIST_DIR/all.lm.DMP";
  77. $CFG_WORDPENALTY = "0.2";
  78.  
  79. # Lattice pruning variables
  80. $CFG_ABEAM = "1e-50";
  81. $CFG_NBEAM = "1e-10";
  82. $CFG_PRUNED_DENLAT_DIR = "$CFG_BASE_DIR/pruned_denlat";
  83.  
  84. # MMIE training related variables
  85. $CFG_MMIE = "no";
  86. $CFG_MMIE_MAX_ITERATIONS = 5;
  87. $CFG_LATTICE_DIR = "$CFG_BASE_DIR/lattice";
  88. $CFG_MMIE_TYPE = "rand"; # Valid values are "rand", "best" or "ci"
  89. $CFG_MMIE_CONSTE = "3.0";
  90. $CFG_NUMLAT_DIR = "$CFG_BASE_DIR/numlat";
  91. $CFG_DENLAT_DIR = "$CFG_BASE_DIR/denlat";
  92.  
  93. # Variables used in main training of models
  94. $CFG_DICTIONARY = "$CFG_LIST_DIR/${CFG_DB_NAME}.dic";
  95. $CFG_RAWPHONEFILE = "$CFG_LIST_DIR/$CFG_DB_NAME.phone";
  96. $CFG_FILLERDICT = "$CFG_LIST_DIR/$CFG_DB_NAME.filler";
  97. $CFG_LISTOFFILES = "$CFG_LIST_DIR/${CFG_DB_NAME}_filtred.fileids";
  98. $CFG_TRANSCRIPTFILE = "$CFG_LIST_DIR/${CFG_DB_NAME}_train_filtred.transcription";
  99. $CFG_FEATPARAMS = "$CFG_LIST_DIR/feat.params";
  100.  
  101. # Variables used in characterizing models
  102.  
  103. $CFG_HMM_TYPE = '.cont.'; # Sphinx 4, PocketSphinx
  104. #$CFG_HMM_TYPE = '.semi.'; # PocketSphinx
  105. #$CFG_HMM_TYPE = '.ptm.'; # PocketSphinx (larger data sets)
  106.  
  107. if (($CFG_HMM_TYPE ne ".semi.")
  108. and ($CFG_HMM_TYPE ne ".ptm.")
  109. and ($CFG_HMM_TYPE ne ".cont.")) {
  110. die "Please choose one CFG_HMM_TYPE out of '.cont.', '.ptm.', or '.semi.', " .
  111. "currently $CFG_HMM_TYPE\n";
  112. }
  113.  
  114. # This configuration is fastest and best for most acoustic models in
  115. # PocketSphinx and Sphinx-III. See below for Sphinx-II.
  116. $CFG_STATESPERHMM = 3;
  117. $CFG_SKIPSTATE = 'no';
  118.  
  119. if ($CFG_HMM_TYPE eq '.semi.') {
  120. $CFG_DIRLABEL = 'semi';
  121. # Four stream features for PocketSphinx
  122. $CFG_FEATURE = "s2_4x";
  123. $CFG_NUM_STREAMS = 4;
  124. $CFG_INITIAL_NUM_DENSITIES = 256;
  125. $CFG_FINAL_NUM_DENSITIES = 256;
  126. die "For semi continuous models, the initial and final models have the same density"
  127. if ($CFG_INITIAL_NUM_DENSITIES != $CFG_FINAL_NUM_DENSITIES);
  128. } elsif ($CFG_HMM_TYPE eq '.ptm.') {
  129. $CFG_DIRLABEL = 'ptm';
  130. # Four stream features for PocketSphinx
  131. $CFG_FEATURE = "s2_4x";
  132. $CFG_NUM_STREAMS = 4;
  133. $CFG_INITIAL_NUM_DENSITIES = 64;
  134. $CFG_FINAL_NUM_DENSITIES = 64;
  135. die "For phonetically tied models, the initial and final models have the same density"
  136. if ($CFG_INITIAL_NUM_DENSITIES != $CFG_FINAL_NUM_DENSITIES);
  137. } elsif ($CFG_HMM_TYPE eq '.cont.') {
  138. $CFG_DIRLABEL = 'cont';
  139. # Single stream features - Sphinx 3
  140. $CFG_FEATURE = "1s_c_d_dd";
  141. $CFG_NUM_STREAMS = 1;
  142. $CFG_INITIAL_NUM_DENSITIES = 1;
  143. $CFG_FINAL_NUM_DENSITIES = 8;
  144. die "The initial has to be less than the final number of densities"
  145. if ($CFG_INITIAL_NUM_DENSITIES > $CFG_FINAL_NUM_DENSITIES);
  146. }
  147.  
  148. # Number of top gaussians to score a frame. A little bit less accurate computations
  149. # make training significantly faster. Uncomment to apply this during the training
  150. # For good accuracy make sure you are using the same setting in decoder
  151. # In theory this can be different for various training stages. For example 4 for
  152. # CI stage and 16 for CD stage
  153. # $CFG_CI_TOPN = 4;
  154. # $CFG_CD_TOPN = 16;
  155.  
  156. # (yes/no) Train multiple-gaussian context-independent models (useful
  157. # for alignment, use 'no' otherwise) in the models created
  158. # specifically for forced alignment
  159. $CFG_FALIGN_CI_MGAU = 'no';
  160. # (yes/no) Train multiple-gaussian context-independent models (useful
  161. # for alignment, use 'no' otherwise)
  162. $CFG_CI_MGAU = 'no';
  163. # Number of tied states (senones) to create in decision-tree clustering
  164. $CFG_N_TIED_STATES = 200;
  165. # How many parts to run Forward-Backward estimatinon in
  166. $CFG_NPART = 1;
  167.  
  168. # (yes/no) Train a single decision tree for all phones (actually one
  169. # per state) (useful for grapheme-based models, use 'no' otherwise)
  170. $CFG_CROSS_PHONE_TREES = 'no';
  171.  
  172. # Use force-aligned transcripts (if available) as input to training
  173. $CFG_FORCEDALIGN = 'no';
  174.  
  175. # Use a specific set of models for force alignment. If not defined,
  176. # context-independent models for the current experiment will be used.
  177. $CFG_FORCE_ALIGN_MDEF = "$CFG_BASE_DIR/model_architecture/$CFG_EXPTNAME.falign_ci.mdef";
  178. $CFG_FORCE_ALIGN_MODELDIR = "$CFG_MODEL_DIR/$CFG_EXPTNAME.falign_ci_$CFG_DIRLABEL";
  179.  
  180. # Use a specific dictionary and filler dictionary for force alignment.
  181. # If these are not defined, a dictionary and filler dictionary will be
  182. # created from $CFG_DICTIONARY and $CFG_FILLERDICT, with noise words
  183. # removed from the filler dictionary and added to the dictionary (this
  184. # is because the force alignment is not very good at inserting them)
  185.  
  186. # $CFG_FORCE_ALIGN_DICTIONARY = "$ST::CFG_BASE_DIR/falignout$ST::CFG_EXPTNAME.falign.dict";;
  187. # $CFG_FORCE_ALIGN_FILLERDICT = "$ST::CFG_BASE_DIR/falignout/$ST::CFG_EXPTNAME.falign.fdict";;
  188.  
  189. # Use a particular beam width for force alignment. The wider
  190. # (i.e. smaller numerically) the beam, the fewer sentences will be
  191. # rejected for bad alignment.
  192. $CFG_FORCE_ALIGN_BEAM = 1e-60;
  193.  
  194. # Calculate an LDA/MLLT transform?
  195. $CFG_LDA_MLLT = 'no';
  196. # Dimensionality of LDA/MLLT output
  197. $CFG_LDA_DIMENSION = 29;
  198.  
  199. # This is actually just a difference in log space (it doesn't make
  200. # sense otherwise, because different feature parameters have very
  201. # different likelihoods)
  202. $CFG_CONVERGENCE_RATIO = 0.1;
  203.  
  204. # Queue::POSIX for multiple CPUs on a local machine
  205. # Queue::PBS to use a PBS/TORQUE queue
  206. $CFG_QUEUE_TYPE = "Queue";
  207.  
  208. # Name of queue to use for PBS/TORQUE
  209. $CFG_QUEUE_NAME = "workq";
  210.  
  211. # (yes/no) Build questions for decision tree clustering automatically
  212. $CFG_MAKE_QUESTS = "yes";
  213. # If CFG_MAKE_QUESTS is yes, questions are written to this file.
  214. # If CFG_MAKE_QUESTS is no, questions are read from this file.
  215. $CFG_QUESTION_SET = "${CFG_BASE_DIR}/model_architecture/${CFG_EXPTNAME}.tree_questions";
  216. #$CFG_QUESTION_SET = "${CFG_BASE_DIR}/linguistic_questions";
  217.  
  218. $CFG_CP_OPERATION = "${CFG_BASE_DIR}/model_architecture/${CFG_EXPTNAME}.cpmeanvar";
  219.  
  220. # Configuration for grapheme-to-phoneme model
  221. $CFG_G2P_MODEL= 'no';
  222.  
  223. # Configuration script for sphinx decoder
  224.  
  225. # Variables starting with $DEC_CFG_ refer to decoder specific
  226. # arguments, those starting with $CFG_ refer to trainer arguments,
  227. # some of them also used by the decoder.
  228.  
  229. $DEC_CFG_VERBOSE = 1; # Determines how much goes to the screen.
  230.  
  231. # These are filled in at configuration time
  232.  
  233. # Name of the decoding script to use (psdecode.pl or s3decode.pl, probably)
  234. $DEC_CFG_SCRIPT = 'psdecode.pl';
  235.  
  236. $DEC_CFG_EXPTNAME = "$CFG_EXPTNAME";
  237. $DEC_CFG_JOBNAME = "$CFG_EXPTNAME"."_job";
  238.  
  239. # Models to use.
  240. $DEC_CFG_MODEL_NAME = "$CFG_EXPTNAME.cd_${CFG_DIRLABEL}_${CFG_N_TIED_STATES}";
  241.  
  242. $DEC_CFG_FEATFILES_DIR = "$CFG_BASE_DIR/feat";
  243. $DEC_CFG_FEATFILE_EXTENSION = '.mfc';
  244. $DEC_CFG_VECTOR_LENGTH = $CFG_VECTOR_LENGTH;
  245. $DEC_CFG_AGC = $CFG_AGC;
  246. $DEC_CFG_CMN = $CFG_CMN;
  247. $DEC_CFG_VARNORM = $CFG_VARNORM;
  248.  
  249. $DEC_CFG_QMGR_DIR = "$CFG_BASE_DIR/qmanager";
  250. $DEC_CFG_LOG_DIR = "$CFG_BASE_DIR/logdir";
  251. $DEC_CFG_MODEL_DIR = "$CFG_MODEL_DIR";
  252.  
  253. $DEC_CFG_DICTIONARY = "$CFG_BASE_DIR/etc/$CFG_DB_NAME.dic";
  254. $DEC_CFG_FILLERDICT = "$CFG_BASE_DIR/etc/$CFG_DB_NAME.filler";
  255. $DEC_CFG_LISTOFFILES = "$CFG_BASE_DIR/etc/${CFG_DB_NAME}_filtred.fileids";
  256. $DEC_CFG_TRANSCRIPTFILE = "$CFG_BASE_DIR/etc/${CFG_DB_NAME}_test_filtred.transcription";
  257. $DEC_CFG_RESULT_DIR = "$CFG_BASE_DIR/result";
  258.  
  259. # This variables, used by the decoder, have to be user defined, and
  260. # may affect the decoder output
  261.  
  262. $DEC_CFG_LANGUAGEMODEL = "$CFG_BASE_DIR/etc/all.lm.DMP";
  263. $DEC_CFG_LANGUAGEWEIGHT = "10";
  264. $DEC_CFG_BEAMWIDTH = "1e-80";
  265. $DEC_CFG_WORDBEAM = "1e-40";
  266.  
  267. $DEC_CFG_ALIGN = "builtin";
  268.  
  269. $DEC_CFG_NPART = 1; # Define how many pieces to split decode in
  270.  
  271. # This variable has to be defined, otherwise utils.pl will not load.
  272. $CFG_DONE = 1;
  273.  
  274. return 1;
Advertisement
Add Comment
Please, Sign In to add comment