Guest User

sphinxtrain

a guest
Jul 2nd, 2012
245
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 11.05 KB | None | 0 0
  1. # Configuration script for sphinx trainer                  -*-mode:Perl-*-
  2.  
  3. $CFG_VERBOSE = 1;       # Determines how much goes to the screen.
  4.  
  5. # These are filled in at configuration time
  6. $CFG_DB_NAME = "an4";
  7. $CFG_BASE_DIR = "/home/stvad/source/c++/GSoC/an4";
  8. $CFG_SPHINXTRAIN_DIR = "/home/stvad/source/c++/GSoC/local/lib/sphinxtrain";
  9.  
  10. # Directory containing SphinxTrain binaries
  11. $CFG_BIN_DIR = "/home/stvad/source/c++/GSoC/local/lib/sphinxtrain/../../libexec/sphinxtrain";
  12. $CFG_GIF_DIR = "$CFG_BASE_DIR/gifs";
  13. $CFG_SCRIPT_DIR = "/home/stvad/source/c++/GSoC/local/lib/sphinxtrain/scripts";
  14.  
  15. # Experiment name, will be used to name model files and log files
  16. $CFG_EXPTNAME = "$CFG_DB_NAME";
  17.  
  18. # Audio waveform and feature file information
  19. $CFG_WAVFILES_DIR = "$CFG_BASE_DIR/wav";
  20. $CFG_WAVFILE_EXTENSION = 'wav';
  21. $CFG_WAVFILE_TYPE = 'mswav'; # one of nist, mswav, raw
  22. $CFG_FEATFILES_DIR = "$CFG_BASE_DIR/feat";
  23. $CFG_FEATFILE_EXTENSION = 'mfc';
  24. $CFG_VECTOR_LENGTH = 13;
  25.  
  26. # Feature extraction parameters
  27. $CFG_WAVFILE_SRATE = 16000.0;
  28. $CFG_NUM_FILT = 40; # For wideband speech it's 40, for telephone 8khz reasonable value is 31
  29. $CFG_LO_FILT = 133.3334; # For telephone 8kHz speech value is 200
  30. $CFG_HI_FILT = 6855.4976; # For telephone 8kHz speech value is 3500
  31.  
  32. $CFG_MIN_ITERATIONS = 1;  # BW Iterate at least this many times
  33. $CFG_MAX_ITERATIONS = 10; # BW Don't iterate more than this, somethings likely wrong.
  34.  
  35. # (none/max) Type of AGC to apply to input files
  36. $CFG_AGC = 'none';
  37. # (current/none) Type of cepstral mean subtraction/normalization
  38. # to apply to input files
  39. $CFG_CMN = 'current';
  40. # (yes/no) Normalize variance of input files to 1.0
  41. $CFG_VARNORM = 'no';
  42. # (yes/no) Use letter-to-sound rules to guess pronunciations of
  43. # unknown words (English, 40-phone specific)
  44. $CFG_LTSOOV = 'no';
  45. # (yes/no) Train full covariance matrices
  46. $CFG_FULLVAR = 'no';
  47. # (yes/no) Use diagonals only of full covariance matrices for
  48. # Forward-Backward evaluation (recommended if CFG_FULLVAR is yes)
  49. $CFG_DIAGFULL = 'no';
  50.  
  51. # (yes/no) Perform vocal tract length normalization in training.  This
  52. # will result in a "normalized" model which requires VTLN to be done
  53. # during decoding as well.
  54. $CFG_VTLN = 'no';
  55. # Starting warp factor for VTLN
  56. $CFG_VTLN_START = 0.80;
  57. # Ending warp factor for VTLN
  58. $CFG_VTLN_END = 1.40;
  59. # Step size of warping factors
  60. $CFG_VTLN_STEP = 0.05;
  61.  
  62. # Directory to write queue manager logs to
  63. $CFG_QMGR_DIR = "$CFG_BASE_DIR/qmanager";
  64. # Directory to write training logs to
  65. $CFG_LOG_DIR = "$CFG_BASE_DIR/logdir";
  66. # Directory for re-estimation counts
  67. $CFG_BWACCUM_DIR = "$CFG_BASE_DIR/bwaccumdir";
  68. # Directory to write model parameter files to
  69. $CFG_MODEL_DIR = "$CFG_BASE_DIR/model_parameters";
  70.  
  71. # Directory containing transcripts and control files for
  72. # speaker-adaptive training
  73. $CFG_LIST_DIR = "$CFG_BASE_DIR/etc";
  74.  
  75. # Decoding variables for MMIE training
  76. $CFG_LANGUAGEWEIGHT = "11.5";
  77. $CFG_BEAMWIDTH      = "1e-100";
  78. $CFG_WORDBEAM       = "1e-80";
  79. $CFG_LANGUAGEMODEL  = "$CFG_LIST_DIR/$CFG_DB_NAME.lm.DMP";
  80. $CFG_WORDPENALTY    = "0.2";
  81.  
  82. # Lattice pruning variables
  83. $CFG_ABEAM              = "1e-50";
  84. $CFG_NBEAM              = "1e-10";
  85. $CFG_PRUNED_DENLAT_DIR  = "$CFG_BASE_DIR/pruned_denlat";
  86.  
  87. # MMIE training related variables
  88. $CFG_MMIE = "no";
  89. $CFG_MMIE_MAX_ITERATIONS = 5;
  90. $CFG_LATTICE_DIR = "$CFG_BASE_DIR/lattice";
  91. $CFG_MMIE_TYPE   = "rand"; # Valid values are "rand", "best" or "ci"
  92. $CFG_MMIE_CONSTE = "3.0";
  93. $CFG_NUMLAT_DIR  = "$CFG_BASE_DIR/numlat";
  94. $CFG_DENLAT_DIR  = "$CFG_BASE_DIR/denlat";
  95.  
  96. # Variables used in main training of models
  97. $CFG_DICTIONARY     = "$CFG_LIST_DIR/$CFG_DB_NAME.dic";
  98. $CFG_RAWPHONEFILE   = "$CFG_LIST_DIR/$CFG_DB_NAME.phone";
  99. $CFG_FILLERDICT     = "$CFG_LIST_DIR/$CFG_DB_NAME.filler";
  100. $CFG_LISTOFFILES    = "$CFG_LIST_DIR/${CFG_DB_NAME}_train.fileids";
  101. $CFG_TRANSCRIPTFILE = "$CFG_LIST_DIR/${CFG_DB_NAME}_train.transcription";
  102. $CFG_FEATPARAMS     = "$CFG_LIST_DIR/feat.params";
  103.  
  104. # Variables used in characterizing models
  105.  
  106. $CFG_HMM_TYPE = '.cont.'; # Sphinx 4, PocketSphinx
  107. #$CFG_HMM_TYPE  = '.semi.'; # PocketSphinx
  108. #$CFG_HMM_TYPE  = '.ptm.'; # PocketSphinx (larger data sets)
  109.  
  110. if (($CFG_HMM_TYPE ne ".semi.")
  111.     and ($CFG_HMM_TYPE ne ".ptm.")
  112.     and ($CFG_HMM_TYPE ne ".cont.")) {
  113.   die "Please choose one CFG_HMM_TYPE out of '.cont.', '.ptm.', or '.semi.', " .
  114.     "currently $CFG_HMM_TYPE\n";
  115. }
  116.  
  117. # This configuration is fastest and best for most acoustic models in
  118. # PocketSphinx and Sphinx-III.  See below for Sphinx-II.
  119. $CFG_STATESPERHMM = 3;
  120. $CFG_SKIPSTATE = 'no';
  121.  
  122. if ($CFG_HMM_TYPE eq '.semi.') {
  123.   $CFG_DIRLABEL = 'semi';
  124. # Four stream features for PocketSphinx
  125.   $CFG_FEATURE = "s2_4x";
  126.   $CFG_NUM_STREAMS = 4;
  127.   $CFG_INITIAL_NUM_DENSITIES = 256;
  128.   $CFG_FINAL_NUM_DENSITIES = 256;
  129.   die "For semi continuous models, the initial and final models have the same density"
  130.     if ($CFG_INITIAL_NUM_DENSITIES != $CFG_FINAL_NUM_DENSITIES);
  131. } elsif ($CFG_HMM_TYPE eq '.ptm.') {
  132.   $CFG_DIRLABEL = 'ptm';
  133. # Four stream features for PocketSphinx
  134.   $CFG_FEATURE = "s2_4x";
  135.   $CFG_NUM_STREAMS = 4;
  136.   $CFG_INITIAL_NUM_DENSITIES = 64;
  137.   $CFG_FINAL_NUM_DENSITIES = 64;
  138.   die "For phonetically tied models, the initial and final models have the same density"
  139.     if ($CFG_INITIAL_NUM_DENSITIES != $CFG_FINAL_NUM_DENSITIES);
  140. } elsif ($CFG_HMM_TYPE eq '.cont.') {
  141.   $CFG_DIRLABEL = 'cont';
  142. # Single stream features - Sphinx 3
  143.   $CFG_FEATURE = "1s_c_d_dd";
  144.   $CFG_NUM_STREAMS = 1;
  145.   $CFG_INITIAL_NUM_DENSITIES = 1;
  146.   $CFG_FINAL_NUM_DENSITIES = 8;
  147.   die "The initial has to be less than the final number of densities"
  148.     if ($CFG_INITIAL_NUM_DENSITIES > $CFG_FINAL_NUM_DENSITIES);
  149. }
  150.  
  151. # Number of top gaussians to score a frame. A little bit less accurate computations
  152. # make training significantly faster. Uncomment to apply this during the training
  153. # For good accuracy make sure you are using the same setting in decoder
  154. # In theory this can be different for various training stages. For example 4 for
  155. # CI stage and 16 for CD stage
  156. # $CFG_CI_TOPN = 4;
  157. # $CFG_CD_TOPN = 16;
  158.  
  159. # (yes/no) Train multiple-gaussian context-independent models (useful
  160. # for alignment, use 'no' otherwise) in the models created
  161. # specifically for forced alignment
  162. $CFG_FALIGN_CI_MGAU = 'no';
  163. # (yes/no) Train multiple-gaussian context-independent models (useful
  164. # for alignment, use 'no' otherwise)
  165. $CFG_CI_MGAU = 'no';
  166. # Number of tied states (senones) to create in decision-tree clustering
  167. $CFG_N_TIED_STATES = 200;
  168. # How many parts to run Forward-Backward estimatinon in
  169. $CFG_NPART = 1;
  170.  
  171. # (yes/no) Train a single decision tree for all phones (actually one
  172. # per state) (useful for grapheme-based models, use 'no' otherwise)
  173. $CFG_CROSS_PHONE_TREES = 'no';
  174.  
  175. # Use force-aligned transcripts (if available) as input to training
  176. $CFG_FORCEDALIGN = 'no';
  177.  
  178. # Use a specific set of models for force alignment.  If not defined,
  179. # context-independent models for the current experiment will be used.
  180. $CFG_FORCE_ALIGN_MDEF = "$CFG_BASE_DIR/model_architecture/$CFG_EXPTNAME.falign_ci.mdef";
  181. $CFG_FORCE_ALIGN_MODELDIR = "$CFG_MODEL_DIR/$CFG_EXPTNAME.falign_ci_$CFG_DIRLABEL";
  182.  
  183. # Use a specific dictionary and filler dictionary for force alignment.
  184. # If these are not defined, a dictionary and filler dictionary will be
  185. # created from $CFG_DICTIONARY and $CFG_FILLERDICT, with noise words
  186. # removed from the filler dictionary and added to the dictionary (this
  187. # is because the force alignment is not very good at inserting them)
  188.  
  189. # $CFG_FORCE_ALIGN_DICTIONARY = "$ST::CFG_BASE_DIR/falignout$ST::CFG_EXPTNAME.falign.dict";;
  190. # $CFG_FORCE_ALIGN_FILLERDICT = "$ST::CFG_BASE_DIR/falignout/$ST::CFG_EXPTNAME.falign.fdict";;
  191.  
  192. # Use a particular beam width for force alignment.  The wider
  193. # (i.e. smaller numerically) the beam, the fewer sentences will be
  194. # rejected for bad alignment.
  195. $CFG_FORCE_ALIGN_BEAM = 1e-60;
  196.  
  197. # Calculate an LDA/MLLT transform?
  198. $CFG_LDA_MLLT = 'no';
  199. # Dimensionality of LDA/MLLT output
  200. $CFG_LDA_DIMENSION = 29;
  201.  
  202. # This is actually just a difference in log space (it doesn't make
  203. # sense otherwise, because different feature parameters have very
  204. # different likelihoods)
  205. $CFG_CONVERGENCE_RATIO = 0.1;
  206.  
  207. # Queue::POSIX for multiple CPUs on a local machine
  208. # Queue::PBS to use a PBS/TORQUE queue
  209. $CFG_QUEUE_TYPE = "Queue";
  210.  
  211. # Name of queue to use for PBS/TORQUE
  212. $CFG_QUEUE_NAME = "workq";
  213.  
  214. # (yes/no) Build questions for decision tree clustering automatically
  215. $CFG_MAKE_QUESTS = "yes";
  216. # If CFG_MAKE_QUESTS is yes, questions are written to this file.
  217. # If CFG_MAKE_QUESTS is no, questions are read from this file.
  218. $CFG_QUESTION_SET = "${CFG_BASE_DIR}/model_architecture/${CFG_EXPTNAME}.tree_questions";
  219. #$CFG_QUESTION_SET = "${CFG_BASE_DIR}/linguistic_questions";
  220.  
  221. $CFG_CP_OPERATION = "${CFG_BASE_DIR}/model_architecture/${CFG_EXPTNAME}.cpmeanvar";
  222.  
  223. # Configuration script for sphinx decoder                  -*-mode:Perl-*-
  224.  
  225. # Variables starting with $DEC_CFG_ refer to decoder specific
  226. # arguments, those starting with $CFG_ refer to trainer arguments,
  227. # some of them also used by the decoder.
  228.  
  229. $DEC_CFG_VERBOSE = 1;       # Determines how much goes to the screen.
  230.  
  231. # These are filled in at configuration time
  232. $DEC_CFG_DB_NAME = 'an4';
  233. $DEC_CFG_BASE_DIR = '/home/stvad/source/c++/GSoC/an4';
  234. $DEC_CFG_SPHINXDECODER_DIR = '/home/stvad/source/c++/GSoC/local/lib/sphinxtrain';
  235.  
  236. # Name of the decoding script to use (psdecode.pl or s3decode.pl, probably)
  237. $DEC_CFG_SCRIPT = 'psdecode.pl';
  238.  
  239. $DEC_CFG_BIN_DIR = "/home/stvad/source/c++/GSoC/local/lib/sphinxtrain/../../bin";
  240. $DEC_CFG_GIF_DIR = "$DEC_CFG_BASE_DIR/gifs";
  241. $DEC_CFG_SCRIPT_DIR = "$DEC_CFG_BASE_DIR/scripts";
  242.  
  243. $DEC_CFG_EXPTNAME = "$CFG_EXPTNAME";
  244. $DEC_CFG_JOBNAME  = "$CFG_EXPTNAME"."_job";
  245.  
  246. # Models to use.
  247. $DEC_CFG_MODEL_NAME = "$CFG_EXPTNAME.cd_${CFG_DIRLABEL}_${CFG_N_TIED_STATES}";
  248.  
  249. $DEC_CFG_FEATFILES_DIR = "$DEC_CFG_BASE_DIR/feat";
  250. $DEC_CFG_FEATFILE_EXTENSION = '.mfc';
  251. $DEC_CFG_VECTOR_LENGTH = $CFG_VECTOR_LENGTH;
  252. $DEC_CFG_AGC = $CFG_AGC;
  253. $DEC_CFG_CMN = $CFG_CMN;
  254. $DEC_CFG_VARNORM = $CFG_VARNORM;
  255.  
  256. $DEC_CFG_QMGR_DIR = "$DEC_CFG_BASE_DIR/qmanager";
  257. $DEC_CFG_LOG_DIR = "$DEC_CFG_BASE_DIR/logdir";
  258. $DEC_CFG_MODEL_DIR = "$CFG_MODEL_DIR";
  259.  
  260. $DEC_CFG_DICTIONARY     = "$DEC_CFG_BASE_DIR/etc/$DEC_CFG_DB_NAME.dic";
  261. $DEC_CFG_FILLERDICT     = "$DEC_CFG_BASE_DIR/etc/$DEC_CFG_DB_NAME.filler";
  262. $DEC_CFG_LISTOFFILES    = "$DEC_CFG_BASE_DIR/etc/${DEC_CFG_DB_NAME}_test.fileids";
  263. $DEC_CFG_TRANSCRIPTFILE = "$DEC_CFG_BASE_DIR/etc/${DEC_CFG_DB_NAME}_test.transcription";
  264. $DEC_CFG_RESULT_DIR     = "$DEC_CFG_BASE_DIR/result";
  265.  
  266. # This variables, used by the decoder, have to be user defined, and
  267. # may affect the decoder output
  268.  
  269. $DEC_CFG_LANGUAGEMODEL_DIR = "$DEC_CFG_BASE_DIR/etc";
  270. $DEC_CFG_LANGUAGEMODEL  = "$DEC_CFG_LANGUAGEMODEL_DIR/${DEC_CFG_DB_NAME}.lm.DMP";
  271. $DEC_CFG_LANGUAGEWEIGHT = "10";
  272. $DEC_CFG_BEAMWIDTH = "1e-80";
  273. $DEC_CFG_WORDBEAM = "1e-40";
  274.  
  275. $DEC_CFG_ALIGN = "builtin";
  276.  
  277. $DEC_CFG_HMM_TYPE = $CFG_HMM_TYPE;
  278.  
  279. $DEC_CFG_NPART = 1;     #  Define how many pieces to split decode in
  280.  
  281. # This variable has to be defined, otherwise utils.pl will not load.
  282. $CFG_DONE = 1;
  283.  
  284. return 1;
Advertisement
Add Comment
Please, Sign In to add comment