Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Configuration script for sphinx trainer -*-mode:Perl-*-
- $CFG_VERBOSE = 1; # Determines how much goes to the screen.
- # These are filled in at configuration time
- $CFG_DB_NAME = "an4";
- $CFG_BASE_DIR = "/home/stvad/source/c++/GSoC/an4";
- $CFG_SPHINXTRAIN_DIR = "/home/stvad/source/c++/GSoC/local/lib/sphinxtrain";
- # Directory containing SphinxTrain binaries
- $CFG_BIN_DIR = "/home/stvad/source/c++/GSoC/local/lib/sphinxtrain/../../libexec/sphinxtrain";
- $CFG_GIF_DIR = "$CFG_BASE_DIR/gifs";
- $CFG_SCRIPT_DIR = "/home/stvad/source/c++/GSoC/local/lib/sphinxtrain/scripts";
- # Experiment name, will be used to name model files and log files
- $CFG_EXPTNAME = "$CFG_DB_NAME";
- # Audio waveform and feature file information
- $CFG_WAVFILES_DIR = "$CFG_BASE_DIR/wav";
- $CFG_WAVFILE_EXTENSION = 'wav';
- $CFG_WAVFILE_TYPE = 'mswav'; # one of nist, mswav, raw
- $CFG_FEATFILES_DIR = "$CFG_BASE_DIR/feat";
- $CFG_FEATFILE_EXTENSION = 'mfc';
- $CFG_VECTOR_LENGTH = 13;
- # Feature extraction parameters
- $CFG_WAVFILE_SRATE = 16000.0;
- $CFG_NUM_FILT = 40; # For wideband speech it's 40, for telephone 8khz reasonable value is 31
- $CFG_LO_FILT = 133.3334; # For telephone 8kHz speech value is 200
- $CFG_HI_FILT = 6855.4976; # For telephone 8kHz speech value is 3500
- $CFG_MIN_ITERATIONS = 1; # BW Iterate at least this many times
- $CFG_MAX_ITERATIONS = 10; # BW Don't iterate more than this, somethings likely wrong.
- # (none/max) Type of AGC to apply to input files
- $CFG_AGC = 'none';
- # (current/none) Type of cepstral mean subtraction/normalization
- # to apply to input files
- $CFG_CMN = 'current';
- # (yes/no) Normalize variance of input files to 1.0
- $CFG_VARNORM = 'no';
- # (yes/no) Use letter-to-sound rules to guess pronunciations of
- # unknown words (English, 40-phone specific)
- $CFG_LTSOOV = 'no';
- # (yes/no) Train full covariance matrices
- $CFG_FULLVAR = 'no';
- # (yes/no) Use diagonals only of full covariance matrices for
- # Forward-Backward evaluation (recommended if CFG_FULLVAR is yes)
- $CFG_DIAGFULL = 'no';
- # (yes/no) Perform vocal tract length normalization in training. This
- # will result in a "normalized" model which requires VTLN to be done
- # during decoding as well.
- $CFG_VTLN = 'no';
- # Starting warp factor for VTLN
- $CFG_VTLN_START = 0.80;
- # Ending warp factor for VTLN
- $CFG_VTLN_END = 1.40;
- # Step size of warping factors
- $CFG_VTLN_STEP = 0.05;
- # Directory to write queue manager logs to
- $CFG_QMGR_DIR = "$CFG_BASE_DIR/qmanager";
- # Directory to write training logs to
- $CFG_LOG_DIR = "$CFG_BASE_DIR/logdir";
- # Directory for re-estimation counts
- $CFG_BWACCUM_DIR = "$CFG_BASE_DIR/bwaccumdir";
- # Directory to write model parameter files to
- $CFG_MODEL_DIR = "$CFG_BASE_DIR/model_parameters";
- # Directory containing transcripts and control files for
- # speaker-adaptive training
- $CFG_LIST_DIR = "$CFG_BASE_DIR/etc";
- # Decoding variables for MMIE training
- $CFG_LANGUAGEWEIGHT = "11.5";
- $CFG_BEAMWIDTH = "1e-100";
- $CFG_WORDBEAM = "1e-80";
- $CFG_LANGUAGEMODEL = "$CFG_LIST_DIR/$CFG_DB_NAME.lm.DMP";
- $CFG_WORDPENALTY = "0.2";
- # Lattice pruning variables
- $CFG_ABEAM = "1e-50";
- $CFG_NBEAM = "1e-10";
- $CFG_PRUNED_DENLAT_DIR = "$CFG_BASE_DIR/pruned_denlat";
- # MMIE training related variables
- $CFG_MMIE = "no";
- $CFG_MMIE_MAX_ITERATIONS = 5;
- $CFG_LATTICE_DIR = "$CFG_BASE_DIR/lattice";
- $CFG_MMIE_TYPE = "rand"; # Valid values are "rand", "best" or "ci"
- $CFG_MMIE_CONSTE = "3.0";
- $CFG_NUMLAT_DIR = "$CFG_BASE_DIR/numlat";
- $CFG_DENLAT_DIR = "$CFG_BASE_DIR/denlat";
- # Variables used in main training of models
- $CFG_DICTIONARY = "$CFG_LIST_DIR/$CFG_DB_NAME.dic";
- $CFG_RAWPHONEFILE = "$CFG_LIST_DIR/$CFG_DB_NAME.phone";
- $CFG_FILLERDICT = "$CFG_LIST_DIR/$CFG_DB_NAME.filler";
- $CFG_LISTOFFILES = "$CFG_LIST_DIR/${CFG_DB_NAME}_train.fileids";
- $CFG_TRANSCRIPTFILE = "$CFG_LIST_DIR/${CFG_DB_NAME}_train.transcription";
- $CFG_FEATPARAMS = "$CFG_LIST_DIR/feat.params";
- # Variables used in characterizing models
- $CFG_HMM_TYPE = '.cont.'; # Sphinx 4, PocketSphinx
- #$CFG_HMM_TYPE = '.semi.'; # PocketSphinx
- #$CFG_HMM_TYPE = '.ptm.'; # PocketSphinx (larger data sets)
- if (($CFG_HMM_TYPE ne ".semi.")
- and ($CFG_HMM_TYPE ne ".ptm.")
- and ($CFG_HMM_TYPE ne ".cont.")) {
- die "Please choose one CFG_HMM_TYPE out of '.cont.', '.ptm.', or '.semi.', " .
- "currently $CFG_HMM_TYPE\n";
- }
- # This configuration is fastest and best for most acoustic models in
- # PocketSphinx and Sphinx-III. See below for Sphinx-II.
- $CFG_STATESPERHMM = 3;
- $CFG_SKIPSTATE = 'no';
- if ($CFG_HMM_TYPE eq '.semi.') {
- $CFG_DIRLABEL = 'semi';
- # Four stream features for PocketSphinx
- $CFG_FEATURE = "s2_4x";
- $CFG_NUM_STREAMS = 4;
- $CFG_INITIAL_NUM_DENSITIES = 256;
- $CFG_FINAL_NUM_DENSITIES = 256;
- die "For semi continuous models, the initial and final models have the same density"
- if ($CFG_INITIAL_NUM_DENSITIES != $CFG_FINAL_NUM_DENSITIES);
- } elsif ($CFG_HMM_TYPE eq '.ptm.') {
- $CFG_DIRLABEL = 'ptm';
- # Four stream features for PocketSphinx
- $CFG_FEATURE = "s2_4x";
- $CFG_NUM_STREAMS = 4;
- $CFG_INITIAL_NUM_DENSITIES = 64;
- $CFG_FINAL_NUM_DENSITIES = 64;
- die "For phonetically tied models, the initial and final models have the same density"
- if ($CFG_INITIAL_NUM_DENSITIES != $CFG_FINAL_NUM_DENSITIES);
- } elsif ($CFG_HMM_TYPE eq '.cont.') {
- $CFG_DIRLABEL = 'cont';
- # Single stream features - Sphinx 3
- $CFG_FEATURE = "1s_c_d_dd";
- $CFG_NUM_STREAMS = 1;
- $CFG_INITIAL_NUM_DENSITIES = 1;
- $CFG_FINAL_NUM_DENSITIES = 8;
- die "The initial has to be less than the final number of densities"
- if ($CFG_INITIAL_NUM_DENSITIES > $CFG_FINAL_NUM_DENSITIES);
- }
- # Number of top gaussians to score a frame. A little bit less accurate computations
- # make training significantly faster. Uncomment to apply this during the training
- # For good accuracy make sure you are using the same setting in decoder
- # In theory this can be different for various training stages. For example 4 for
- # CI stage and 16 for CD stage
- # $CFG_CI_TOPN = 4;
- # $CFG_CD_TOPN = 16;
- # (yes/no) Train multiple-gaussian context-independent models (useful
- # for alignment, use 'no' otherwise) in the models created
- # specifically for forced alignment
- $CFG_FALIGN_CI_MGAU = 'no';
- # (yes/no) Train multiple-gaussian context-independent models (useful
- # for alignment, use 'no' otherwise)
- $CFG_CI_MGAU = 'no';
- # Number of tied states (senones) to create in decision-tree clustering
- $CFG_N_TIED_STATES = 200;
- # How many parts to run Forward-Backward estimatinon in
- $CFG_NPART = 1;
- # (yes/no) Train a single decision tree for all phones (actually one
- # per state) (useful for grapheme-based models, use 'no' otherwise)
- $CFG_CROSS_PHONE_TREES = 'no';
- # Use force-aligned transcripts (if available) as input to training
- $CFG_FORCEDALIGN = 'no';
- # Use a specific set of models for force alignment. If not defined,
- # context-independent models for the current experiment will be used.
- $CFG_FORCE_ALIGN_MDEF = "$CFG_BASE_DIR/model_architecture/$CFG_EXPTNAME.falign_ci.mdef";
- $CFG_FORCE_ALIGN_MODELDIR = "$CFG_MODEL_DIR/$CFG_EXPTNAME.falign_ci_$CFG_DIRLABEL";
- # Use a specific dictionary and filler dictionary for force alignment.
- # If these are not defined, a dictionary and filler dictionary will be
- # created from $CFG_DICTIONARY and $CFG_FILLERDICT, with noise words
- # removed from the filler dictionary and added to the dictionary (this
- # is because the force alignment is not very good at inserting them)
- # $CFG_FORCE_ALIGN_DICTIONARY = "$ST::CFG_BASE_DIR/falignout$ST::CFG_EXPTNAME.falign.dict";;
- # $CFG_FORCE_ALIGN_FILLERDICT = "$ST::CFG_BASE_DIR/falignout/$ST::CFG_EXPTNAME.falign.fdict";;
- # Use a particular beam width for force alignment. The wider
- # (i.e. smaller numerically) the beam, the fewer sentences will be
- # rejected for bad alignment.
- $CFG_FORCE_ALIGN_BEAM = 1e-60;
- # Calculate an LDA/MLLT transform?
- $CFG_LDA_MLLT = 'no';
- # Dimensionality of LDA/MLLT output
- $CFG_LDA_DIMENSION = 29;
- # This is actually just a difference in log space (it doesn't make
- # sense otherwise, because different feature parameters have very
- # different likelihoods)
- $CFG_CONVERGENCE_RATIO = 0.1;
- # Queue::POSIX for multiple CPUs on a local machine
- # Queue::PBS to use a PBS/TORQUE queue
- $CFG_QUEUE_TYPE = "Queue";
- # Name of queue to use for PBS/TORQUE
- $CFG_QUEUE_NAME = "workq";
- # (yes/no) Build questions for decision tree clustering automatically
- $CFG_MAKE_QUESTS = "yes";
- # If CFG_MAKE_QUESTS is yes, questions are written to this file.
- # If CFG_MAKE_QUESTS is no, questions are read from this file.
- $CFG_QUESTION_SET = "${CFG_BASE_DIR}/model_architecture/${CFG_EXPTNAME}.tree_questions";
- #$CFG_QUESTION_SET = "${CFG_BASE_DIR}/linguistic_questions";
- $CFG_CP_OPERATION = "${CFG_BASE_DIR}/model_architecture/${CFG_EXPTNAME}.cpmeanvar";
- # Configuration script for sphinx decoder -*-mode:Perl-*-
- # Variables starting with $DEC_CFG_ refer to decoder specific
- # arguments, those starting with $CFG_ refer to trainer arguments,
- # some of them also used by the decoder.
- $DEC_CFG_VERBOSE = 1; # Determines how much goes to the screen.
- # These are filled in at configuration time
- $DEC_CFG_DB_NAME = 'an4';
- $DEC_CFG_BASE_DIR = '/home/stvad/source/c++/GSoC/an4';
- $DEC_CFG_SPHINXDECODER_DIR = '/home/stvad/source/c++/GSoC/local/lib/sphinxtrain';
- # Name of the decoding script to use (psdecode.pl or s3decode.pl, probably)
- $DEC_CFG_SCRIPT = 'psdecode.pl';
- $DEC_CFG_BIN_DIR = "/home/stvad/source/c++/GSoC/local/lib/sphinxtrain/../../bin";
- $DEC_CFG_GIF_DIR = "$DEC_CFG_BASE_DIR/gifs";
- $DEC_CFG_SCRIPT_DIR = "$DEC_CFG_BASE_DIR/scripts";
- $DEC_CFG_EXPTNAME = "$CFG_EXPTNAME";
- $DEC_CFG_JOBNAME = "$CFG_EXPTNAME"."_job";
- # Models to use.
- $DEC_CFG_MODEL_NAME = "$CFG_EXPTNAME.cd_${CFG_DIRLABEL}_${CFG_N_TIED_STATES}";
- $DEC_CFG_FEATFILES_DIR = "$DEC_CFG_BASE_DIR/feat";
- $DEC_CFG_FEATFILE_EXTENSION = '.mfc';
- $DEC_CFG_VECTOR_LENGTH = $CFG_VECTOR_LENGTH;
- $DEC_CFG_AGC = $CFG_AGC;
- $DEC_CFG_CMN = $CFG_CMN;
- $DEC_CFG_VARNORM = $CFG_VARNORM;
- $DEC_CFG_QMGR_DIR = "$DEC_CFG_BASE_DIR/qmanager";
- $DEC_CFG_LOG_DIR = "$DEC_CFG_BASE_DIR/logdir";
- $DEC_CFG_MODEL_DIR = "$CFG_MODEL_DIR";
- $DEC_CFG_DICTIONARY = "$DEC_CFG_BASE_DIR/etc/$DEC_CFG_DB_NAME.dic";
- $DEC_CFG_FILLERDICT = "$DEC_CFG_BASE_DIR/etc/$DEC_CFG_DB_NAME.filler";
- $DEC_CFG_LISTOFFILES = "$DEC_CFG_BASE_DIR/etc/${DEC_CFG_DB_NAME}_test.fileids";
- $DEC_CFG_TRANSCRIPTFILE = "$DEC_CFG_BASE_DIR/etc/${DEC_CFG_DB_NAME}_test.transcription";
- $DEC_CFG_RESULT_DIR = "$DEC_CFG_BASE_DIR/result";
- # This variables, used by the decoder, have to be user defined, and
- # may affect the decoder output
- $DEC_CFG_LANGUAGEMODEL_DIR = "$DEC_CFG_BASE_DIR/etc";
- $DEC_CFG_LANGUAGEMODEL = "$DEC_CFG_LANGUAGEMODEL_DIR/${DEC_CFG_DB_NAME}.lm.DMP";
- $DEC_CFG_LANGUAGEWEIGHT = "10";
- $DEC_CFG_BEAMWIDTH = "1e-80";
- $DEC_CFG_WORDBEAM = "1e-40";
- $DEC_CFG_ALIGN = "builtin";
- $DEC_CFG_HMM_TYPE = $CFG_HMM_TYPE;
- $DEC_CFG_NPART = 1; # Define how many pieces to split decode in
- # This variable has to be defined, otherwise utils.pl will not load.
- $CFG_DONE = 1;
- return 1;
Advertisement
Add Comment
Please, Sign In to add comment