Guest User

Untitled

a guest
Feb 18th, 2013
40
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.35 KB | None | 0 0
  1. # Configuration script for sphinx trainer -*-mode:Perl-*-
  2.  
  3. $CFG_VERBOSE = 1; # Determines how much goes to the screen.
  4.  
  5. # These are filled in at configuration time
  6. $CFG_DB_NAME = "an4";
  7. # Experiment name, will be used to name model files and log files
  8. $CFG_EXPTNAME = "$CFG_DB_NAME";
  9.  
  10. # Directory containing SphinxTrain binaries
  11. $CFG_BASE_DIR = "/cygdrive/c/Downloads/src/cmusphinx-code/an4";
  12. $CFG_SPHINXTRAIN_DIR = "/usr/local/lib/sphinxtrain";
  13. $CFG_BIN_DIR = "/usr/local/libexec/sphinxtrain";
  14. $CFG_SCRIPT_DIR = "/usr/local/lib/sphinxtrain/scripts";
  15.  
  16.  
  17. # Audio waveform and feature file information
  18. $CFG_WAVFILES_DIR = "$CFG_BASE_DIR/wav";
  19. $CFG_WAVFILE_EXTENSION = 'sph';
  20. $CFG_WAVFILE_TYPE = 'nist'; # one of nist, mswav, raw
  21. $CFG_FEATFILES_DIR = "$CFG_BASE_DIR/feat";
  22. $CFG_FEATFILE_EXTENSION = 'mfc';
  23. $CFG_VECTOR_LENGTH = 13;
  24.  
  25. # Feature extraction parameters
  26. $CFG_WAVFILE_SRATE = 16000.0;
  27. $CFG_NUM_FILT = 40; # For wideband speech it's 40, for telephone 8khz reasonable value is 31
  28. $CFG_LO_FILT = 133.3334; # For telephone 8kHz speech value is 200
  29. $CFG_HI_FILT = 6855.4976; # For telephone 8kHz speech value is 3500
  30.  
  31. $CFG_MIN_ITERATIONS = 1; # BW Iterate at least this many times
  32. $CFG_MAX_ITERATIONS = 10; # BW Don't iterate more than this, somethings likely wrong.
  33.  
  34. # (none/max) Type of AGC to apply to input files
  35. $CFG_AGC = 'none';
  36. # (current/none) Type of cepstral mean subtraction/normalization
  37. # to apply to input files
  38. $CFG_CMN = 'current';
  39. # (yes/no) Normalize variance of input files to 1.0
  40. $CFG_VARNORM = 'no';
  41. # (yes/no) Train full covariance matrices
  42. $CFG_FULLVAR = 'no';
  43. # (yes/no) Use diagonals only of full covariance matrices for
  44. # Forward-Backward evaluation (recommended if CFG_FULLVAR is yes)
  45. $CFG_DIAGFULL = 'no';
  46.  
  47. # (yes/no) Perform vocal tract length normalization in training. This
  48. # will result in a "normalized" model which requires VTLN to be done
  49. # during decoding as well.
  50. $CFG_VTLN = 'no';
  51. # Starting warp factor for VTLN
  52. $CFG_VTLN_START = 0.80;
  53. # Ending warp factor for VTLN
  54. $CFG_VTLN_END = 1.40;
  55. # Step size of warping factors
  56. $CFG_VTLN_STEP = 0.05;
  57.  
  58. # Directory to write queue manager logs to
  59. $CFG_QMGR_DIR = "$CFG_BASE_DIR/qmanager";
  60. # Directory to write training logs to
  61. $CFG_LOG_DIR = "$CFG_BASE_DIR/logdir";
  62. # Directory for re-estimation counts
  63. $CFG_BWACCUM_DIR = "$CFG_BASE_DIR/bwaccumdir";
  64. # Directory to write model parameter files to
  65. $CFG_MODEL_DIR = "$CFG_BASE_DIR/model_parameters";
  66.  
  67. # Directory containing transcripts and control files for
  68. # speaker-adaptive training
  69. $CFG_LIST_DIR = "$CFG_BASE_DIR/etc";
  70.  
  71. # Decoding variables for MMIE training
  72. $CFG_LANGUAGEWEIGHT = "11.5";
  73. $CFG_BEAMWIDTH = "1e-100";
  74. $CFG_WORDBEAM = "1e-80";
  75. $CFG_LANGUAGEMODEL = "$CFG_LIST_DIR/$CFG_DB_NAME.lm.DMP";
  76. $CFG_WORDPENALTY = "0.2";
  77.  
  78. # Lattice pruning variables
  79. $CFG_ABEAM = "1e-50";
  80. $CFG_NBEAM = "1e-10";
  81. $CFG_PRUNED_DENLAT_DIR = "$CFG_BASE_DIR/pruned_denlat";
  82.  
  83. # MMIE training related variables
  84. $CFG_MMIE = "no";
  85. $CFG_MMIE_MAX_ITERATIONS = 5;
  86. $CFG_LATTICE_DIR = "$CFG_BASE_DIR/lattice";
  87. $CFG_MMIE_TYPE = "rand"; # Valid values are "rand", "best" or "ci"
  88. $CFG_MMIE_CONSTE = "3.0";
  89. $CFG_NUMLAT_DIR = "$CFG_BASE_DIR/numlat";
  90. $CFG_DENLAT_DIR = "$CFG_BASE_DIR/denlat";
  91.  
  92. # Variables used in main training of models
  93. $CFG_DICTIONARY = "$CFG_LIST_DIR/$CFG_DB_NAME.dic";
  94. $CFG_RAWPHONEFILE = "$CFG_LIST_DIR/$CFG_DB_NAME.phone";
  95. $CFG_FILLERDICT = "$CFG_LIST_DIR/$CFG_DB_NAME.filler";
  96. $CFG_LISTOFFILES = "$CFG_LIST_DIR/${CFG_DB_NAME}_train.fileids";
  97. $CFG_TRANSCRIPTFILE = "$CFG_LIST_DIR/${CFG_DB_NAME}_train.transcription";
  98. $CFG_FEATPARAMS = "$CFG_LIST_DIR/feat.params";
  99.  
  100. # Variables used in characterizing models
  101.  
  102. $CFG_HMM_TYPE = '.cont.'; # Sphinx 4, PocketSphinx
  103. #$CFG_HMM_TYPE = '.semi.'; # PocketSphinx
  104. #$CFG_HMM_TYPE = '.ptm.'; # PocketSphinx (larger data sets)
  105.  
  106. if (($CFG_HMM_TYPE ne ".semi.")
  107. and ($CFG_HMM_TYPE ne ".ptm.")
  108. and ($CFG_HMM_TYPE ne ".cont.")) {
  109. die "Please choose one CFG_HMM_TYPE out of '.cont.', '.ptm.', or '.semi.', " .
  110. "currently $CFG_HMM_TYPE\n";
  111. }
  112.  
  113. # This configuration is fastest and best for most acoustic models in
  114. # PocketSphinx and Sphinx-III. See below for Sphinx-II.
  115. $CFG_STATESPERHMM = 3;
  116. $CFG_SKIPSTATE = 'no';
  117.  
  118. if ($CFG_HMM_TYPE eq '.semi.') {
  119. $CFG_DIRLABEL = 'semi';
  120. # Four stream features for PocketSphinx
  121. $CFG_FEATURE = "s2_4x";
  122. $CFG_NUM_STREAMS = 4;
  123. $CFG_INITIAL_NUM_DENSITIES = 256;
  124. $CFG_FINAL_NUM_DENSITIES = 256;
  125. die "For semi continuous models, the initial and final models have the same density"
  126. if ($CFG_INITIAL_NUM_DENSITIES != $CFG_FINAL_NUM_DENSITIES);
  127. } elsif ($CFG_HMM_TYPE eq '.ptm.') {
  128. $CFG_DIRLABEL = 'ptm';
  129. # Four stream features for PocketSphinx
  130. $CFG_FEATURE = "s2_4x";
  131. $CFG_NUM_STREAMS = 4;
  132. $CFG_INITIAL_NUM_DENSITIES = 64;
  133. $CFG_FINAL_NUM_DENSITIES = 64;
  134. die "For phonetically tied models, the initial and final models have the same density"
  135. if ($CFG_INITIAL_NUM_DENSITIES != $CFG_FINAL_NUM_DENSITIES);
  136. } elsif ($CFG_HMM_TYPE eq '.cont.') {
  137. $CFG_DIRLABEL = 'cont';
  138. # Single stream features - Sphinx 3
  139. $CFG_FEATURE = "1s_c_d_dd";
  140. $CFG_NUM_STREAMS = 1;
  141. $CFG_INITIAL_NUM_DENSITIES = 1;
  142. $CFG_FINAL_NUM_DENSITIES = 8;
  143. die "The initial has to be less than the final number of densities"
  144. if ($CFG_INITIAL_NUM_DENSITIES > $CFG_FINAL_NUM_DENSITIES);
  145. }
  146.  
  147. # Number of top gaussians to score a frame. A little bit less accurate computations
  148. # make training significantly faster. Uncomment to apply this during the training
  149. # For good accuracy make sure you are using the same setting in decoder
  150. # In theory this can be different for various training stages. For example 4 for
  151. # CI stage and 16 for CD stage
  152. # $CFG_CI_TOPN = 4;
  153. # $CFG_CD_TOPN = 16;
  154.  
  155. # (yes/no) Train multiple-gaussian context-independent models (useful
  156. # for alignment, use 'no' otherwise) in the models created
  157. # specifically for forced alignment
  158. $CFG_FALIGN_CI_MGAU = 'no';
  159. # (yes/no) Train multiple-gaussian context-independent models (useful
  160. # for alignment, use 'no' otherwise)
  161. $CFG_CI_MGAU = 'no';
  162. # Number of tied states (senones) to create in decision-tree clustering
  163. $CFG_N_TIED_STATES = 200;
  164. # How many parts to run Forward-Backward estimatinon in
  165. $CFG_NPART = 1;
  166.  
  167. # (yes/no) Train a single decision tree for all phones (actually one
  168. # per state) (useful for grapheme-based models, use 'no' otherwise)
  169. $CFG_CROSS_PHONE_TREES = 'no';
  170.  
  171. # Use force-aligned transcripts (if available) as input to training
  172. $CFG_FORCEDALIGN = 'no';
  173.  
  174. # Use a specific set of models for force alignment. If not defined,
  175. # context-independent models for the current experiment will be used.
  176. $CFG_FORCE_ALIGN_MDEF = "$CFG_BASE_DIR/model_architecture/$CFG_EXPTNAME.falign_ci.mdef";
  177. $CFG_FORCE_ALIGN_MODELDIR = "$CFG_MODEL_DIR/$CFG_EXPTNAME.falign_ci_$CFG_DIRLABEL";
  178.  
  179. # Use a specific dictionary and filler dictionary for force alignment.
  180. # If these are not defined, a dictionary and filler dictionary will be
  181. # created from $CFG_DICTIONARY and $CFG_FILLERDICT, with noise words
  182. # removed from the filler dictionary and added to the dictionary (this
  183. # is because the force alignment is not very good at inserting them)
  184.  
  185. # $CFG_FORCE_ALIGN_DICTIONARY = "$ST::CFG_BASE_DIR/falignout$ST::CFG_EXPTNAME.falign.dict";;
  186. # $CFG_FORCE_ALIGN_FILLERDICT = "$ST::CFG_BASE_DIR/falignout/$ST::CFG_EXPTNAME.falign.fdict";;
  187.  
  188. # Use a particular beam width for force alignment. The wider
  189. # (i.e. smaller numerically) the beam, the fewer sentences will be
  190. # rejected for bad alignment.
  191. $CFG_FORCE_ALIGN_BEAM = 1e-60;
  192.  
  193. # Calculate an LDA/MLLT transform?
  194. $CFG_LDA_MLLT = 'no';
  195. # Dimensionality of LDA/MLLT output
  196. $CFG_LDA_DIMENSION = 29;
  197.  
  198. # This is actually just a difference in log space (it doesn't make
  199. # sense otherwise, because different feature parameters have very
  200. # different likelihoods)
  201. $CFG_CONVERGENCE_RATIO = 0.1;
  202.  
  203. # Queue::POSIX for multiple CPUs on a local machine
  204. # Queue::PBS to use a PBS/TORQUE queue
  205. $CFG_QUEUE_TYPE = "Queue";
  206.  
  207. # Name of queue to use for PBS/TORQUE
  208. $CFG_QUEUE_NAME = "workq";
  209.  
  210. # (yes/no) Build questions for decision tree clustering automatically
  211. $CFG_MAKE_QUESTS = "yes";
  212. # If CFG_MAKE_QUESTS is yes, questions are written to this file.
  213. # If CFG_MAKE_QUESTS is no, questions are read from this file.
  214. $CFG_QUESTION_SET = "${CFG_BASE_DIR}/model_architecture/${CFG_EXPTNAME}.tree_questions";
  215. #$CFG_QUESTION_SET = "${CFG_BASE_DIR}/linguistic_questions";
  216.  
  217. $CFG_CP_OPERATION = "${CFG_BASE_DIR}/model_architecture/${CFG_EXPTNAME}.cpmeanvar";
  218.  
  219. # Configuration for grapheme-to-phoneme model
  220. $CFG_G2P_MODEL= 'no';
  221.  
  222. # Configuration script for sphinx decoder
  223.  
  224. # Variables starting with $DEC_CFG_ refer to decoder specific
  225. # arguments, those starting with $CFG_ refer to trainer arguments,
  226. # some of them also used by the decoder.
  227.  
  228. $DEC_CFG_VERBOSE = 1; # Determines how much goes to the screen.
  229.  
  230. # These are filled in at configuration time
  231.  
  232. # Name of the decoding script to use (psdecode.pl or s3decode.pl, probably)
  233. $DEC_CFG_SCRIPT = 'psdecode.pl';
  234.  
  235. $DEC_CFG_EXPTNAME = "$CFG_EXPTNAME";
  236. $DEC_CFG_JOBNAME = "$CFG_EXPTNAME"."_job";
  237.  
  238. # Models to use.
  239. $DEC_CFG_MODEL_NAME = "$CFG_EXPTNAME.cd_${CFG_DIRLABEL}_${CFG_N_TIED_STATES}";
  240.  
  241. $DEC_CFG_FEATFILES_DIR = "$CFG_BASE_DIR/feat";
  242. $DEC_CFG_FEATFILE_EXTENSION = '.mfc';
  243. $DEC_CFG_VECTOR_LENGTH = $CFG_VECTOR_LENGTH;
  244. $DEC_CFG_AGC = $CFG_AGC;
  245. $DEC_CFG_CMN = $CFG_CMN;
  246. $DEC_CFG_VARNORM = $CFG_VARNORM;
  247.  
  248. $DEC_CFG_QMGR_DIR = "$CFG_BASE_DIR/qmanager";
  249. $DEC_CFG_LOG_DIR = "$CFG_BASE_DIR/logdir";
  250. $DEC_CFG_MODEL_DIR = "$CFG_MODEL_DIR";
  251.  
  252. $DEC_CFG_DICTIONARY = "$CFG_BASE_DIR/etc/$CFG_DB_NAME.dic";
  253. $DEC_CFG_FILLERDICT = "$CFG_BASE_DIR/etc/$CFG_DB_NAME.filler";
  254. $DEC_CFG_LISTOFFILES = "$CFG_BASE_DIR/etc/${CFG_DB_NAME}_test.fileids";
  255. $DEC_CFG_TRANSCRIPTFILE = "$CFG_BASE_DIR/etc/${CFG_DB_NAME}_test.transcription";
  256. $DEC_CFG_RESULT_DIR = "$CFG_BASE_DIR/result";
  257.  
  258. # This variables, used by the decoder, have to be user defined, and
  259. # may affect the decoder output
  260.  
  261. $DEC_CFG_LANGUAGEMODEL = "$CFG_BASE_DIR/etc/${CFG_DB_NAME}.lm.DMP";
  262. $DEC_CFG_LANGUAGEWEIGHT = "10";
  263. $DEC_CFG_BEAMWIDTH = "1e-80";
  264. $DEC_CFG_WORDBEAM = "1e-40";
  265.  
  266. $DEC_CFG_ALIGN = "builtin";
  267.  
  268. $DEC_CFG_NPART = 1; # Define how many pieces to split decode in
  269.  
  270. # This variable has to be defined, otherwise utils.pl will not load.
  271. $CFG_DONE = 1;
  272.  
  273. return 1;
Advertisement
Add Comment
Please, Sign In to add comment