00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #ifndef __PS_CMDLN_MACRO_H__
00041 #define __PS_CMDLN_MACRO_H__
00042
00043 #include <cmd_ln.h>
00044 #include <feat.h>
00045 #include <fe.h>
00046
00048 #define POCKETSPHINX_OPTIONS \
00049 waveform_to_cepstral_command_line_macro(), \
00050 cepstral_to_feature_command_line_macro(), \
00051 POCKETSPHINX_ACMOD_OPTIONS, \
00052 POCKETSPHINX_BEAM_OPTIONS, \
00053 POCKETSPHINX_SEARCH_OPTIONS, \
00054 POCKETSPHINX_DICT_OPTIONS, \
00055 POCKETSPHINX_NGRAM_OPTIONS, \
00056 POCKETSPHINX_FSG_OPTIONS, \
00057 POCKETSPHINX_DEBUG_OPTIONS
00058
00060 #define POCKETSPHINX_DEBUG_OPTIONS \
00061 { "-logfn", \
00062 ARG_STRING, \
00063 NULL, \
00064 "File to write log messages in" \
00065 }, \
00066 { "-debug", \
00067 ARG_INT32, \
00068 NULL, \
00069 "Verbosity level for debugging messages" \
00070 }, \
00071 { "-mfclogdir", \
00072 ARG_STRING, \
00073 NULL, \
00074 "Directory to log feature files to" \
00075 }, \
00076 { "-rawlogdir", \
00077 ARG_STRING, \
00078 NULL, \
00079 "Directory to log raw audio files to" }
00080
00082 #define POCKETSPHINX_BEAM_OPTIONS \
00083 { "-beam", \
00084 ARG_FLOAT64, \
00085 "1e-48", \
00086 "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
00087 { "-wbeam", \
00088 ARG_FLOAT64, \
00089 "7e-29", \
00090 "Beam width applied to word exits" }, \
00091 { "-pbeam", \
00092 ARG_FLOAT64, \
00093 "1e-48", \
00094 "Beam width applied to phone transitions" }, \
00095 { "-lpbeam", \
00096 ARG_FLOAT64, \
00097 "1e-40", \
00098 "Beam width applied to last phone in words" }, \
00099 { "-lponlybeam", \
00100 ARG_FLOAT64, \
00101 "7e-29", \
00102 "Beam width applied to last phone in single-phone words" }, \
00103 { "-wend_beam", \
00104 ARG_FLOAT64, \
00105 "7e-29", \
00106 "(tst only) Beam selecting word-final HMMs exiting in each frame [0(widest) .. 1(narrowest)]" }, \
00107 { "-vhbeam", \
00108 ARG_FLOAT64, \
00109 "7e-29", \
00110 "(tst only) Beam width applied to Viterbi history entries (before -maxhistpf)" }, \
00111 { "-fwdflatbeam", \
00112 ARG_FLOAT64, \
00113 "1e-64", \
00114 "Beam width applied to every frame in second-pass flat search" }, \
00115 { "-fwdflatwbeam", \
00116 ARG_FLOAT64, \
00117 "7e-29", \
00118 "Beam width applied to word exits in second-pass flat search" }, \
00119 { "-pl_window", \
00120 ARG_INT32, \
00121 "0", \
00122 "Phoneme lookahead window size, in frames" }, \
00123 { "-pl_beam", \
00124 ARG_FLOAT64, \
00125 "1e-10", \
00126 "Beam width applied to phone loop search for lookahead" }, \
00127 { "-pl_pbeam", \
00128 ARG_FLOAT64, \
00129 "1e-5", \
00130 "Beam width applied to phone loop transitions for lookahead" }
00131
00133 #define POCKETSPHINX_SEARCH_OPTIONS \
00134 { "-compallsen", \
00135 ARG_BOOLEAN, \
00136 "no", \
00137 "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
00138 { "-tst", \
00139 ARG_BOOLEAN, \
00140 "no", \
00141 "Run time-switch tree search (a.k.a. Sphinx3 search)" }, \
00142 { "-Nlextree", \
00143 ARG_INT32, \
00144 "3", \
00145 "(tst only) No. of lextrees to be instantiated; entries into them staggered in time" }, \
00146 { "-epl", \
00147 ARG_INT32, \
00148 "3", \
00149 "(tst only) Entries Per Lextree; #successive entries into one lextree before lextree-entries shifted to the next" }, \
00150 { "-min_endfr", \
00151 ARG_INT32, \
00152 "3", \
00153 "(tst only) Nodes ignored during lattice generation if they persist for fewer than so many end frames" }, \
00154 { "-maxhistpf", \
00155 ARG_INT32, \
00156 "100", \
00157 "(tst only) Max no. of histories to maintain at each frame" }, \
00158 { "-hmmhistbinsize", \
00159 ARG_INT32, \
00160 "5000", \
00161 "(tst only) Performance histogram: #frames vs #HMMs active; #HMMs/bin in this histogram" }, \
00162 { "-fwdtree", \
00163 ARG_BOOLEAN, \
00164 "yes", \
00165 "Run forward lexicon-tree search (1st pass)" }, \
00166 { "-fwdflat", \
00167 ARG_BOOLEAN, \
00168 "yes", \
00169 "Run forward flat-lexicon search over word lattice (2nd pass)" }, \
00170 { "-bestpath", \
00171 ARG_BOOLEAN, \
00172 "yes", \
00173 "Run bestpath (Dijkstra) search over word lattice (3rd pass)" }, \
00174 { "-backtrace", \
00175 ARG_BOOLEAN, \
00176 "no", \
00177 "Print results and backtraces to log file." }, \
00178 { "-latsize", \
00179 ARG_INT32, \
00180 "5000", \
00181 "Initial backpointer table size" }, \
00182 { "-maxwpf", \
00183 ARG_INT32, \
00184 "-1", \
00185 "Maximum number of distinct word exits at each frame (or -1 for no pruning)" }, \
00186 { "-maxhmmpf", \
00187 ARG_INT32, \
00188 "-1", \
00189 "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" }, \
00190 { "-fwdflatefwid", \
00191 ARG_INT32, \
00192 "4", \
00193 "Minimum number of end frames for a word to be searched in fwdflat search" }, \
00194 { "-fwdflatsfwin", \
00195 ARG_INT32, \
00196 "25", \
00197 "Window of frames in lattice to search for successor words in fwdflat search " }
00198
00200 #define POCKETSPHINX_FSG_OPTIONS \
00201 { "-fsg", \
00202 ARG_STRING, \
00203 NULL, \
00204 "Sphinx format finite state grammar file"}, \
00205 { "-jsgf", \
00206 ARG_STRING, \
00207 NULL, \
00208 "JSGF grammar file" }, \
00209 { "-toprule", \
00210 ARG_STRING, \
00211 NULL, \
00212 "Start rule for JSGF (first public rule is default)" }, \
00213 { "-fsgusealtpron", \
00214 ARG_BOOLEAN, \
00215 "yes", \
00216 "Add alternate pronunciations to FSG"}, \
00217 { "-fsgusefiller", \
00218 ARG_BOOLEAN, \
00219 "yes", \
00220 "Insert filler words at each state."}
00221
00223 #define POCKETSPHINX_NGRAM_OPTIONS \
00224 { "-lm", \
00225 ARG_STRING, \
00226 NULL, \
00227 "Word trigram language model input file" }, \
00228 { "-lmctl", \
00229 ARG_STRING, \
00230 NULL, \
00231 "Specify a set of language model\n"}, \
00232 { "-lmname", \
00233 ARG_STRING, \
00234 "default", \
00235 "Which language model in -lmctl to use by default"}, \
00236 { "-lw", \
00237 ARG_FLOAT32, \
00238 "6.5", \
00239 "Language model probability weight" }, \
00240 { "-fwdflatlw", \
00241 ARG_FLOAT32, \
00242 "8.5", \
00243 "Language model probability weight for flat lexicon (2nd pass) decoding" }, \
00244 { "-bestpathlw", \
00245 ARG_FLOAT32, \
00246 "9.5", \
00247 "Language model probability weight for bestpath search" }, \
00248 { "-ascale", \
00249 ARG_FLOAT32, \
00250 "20.0", \
00251 "Inverse of acoustic model scale for confidence score calculation" }, \
00252 { "-wip", \
00253 ARG_FLOAT32, \
00254 "0.65", \
00255 "Word insertion penalty" }, \
00256 { "-nwpen", \
00257 ARG_FLOAT32, \
00258 "1.0", \
00259 "New word transition penalty" }, \
00260 { "-pip", \
00261 ARG_FLOAT32, \
00262 "1.0", \
00263 "Phone insertion penalty" }, \
00264 { "-uw", \
00265 ARG_FLOAT32, \
00266 "1.0", \
00267 "Unigram weight" }, \
00268 { "-silprob", \
00269 ARG_FLOAT32, \
00270 "0.005", \
00271 "Silence word transition probability" }, \
00272 { "-fillprob", \
00273 ARG_FLOAT32, \
00274 "1e-8", \
00275 "Filler word transition probability" }, \
00276 { "-bghist", \
00277 ARG_BOOLEAN, \
00278 "no", \
00279 "Bigram-mode: If TRUE only one BP entry/frame; else one per LM state" }, \
00280 { "-lextreedump", \
00281 ARG_INT32, \
00282 "0", \
00283 "Whether to dump the lextree structure to stderr (for debugging), 1 for Ravi's format, 2 for Dot format, Larger than 2 will be treated as Ravi's format" }
00284
00286 #define POCKETSPHINX_DICT_OPTIONS \
00287 { "-dict", \
00288 REQARG_STRING, \
00289 NULL, \
00290 "Main pronunciation dictionary (lexicon) input file" }, \
00291 { "-fdict", \
00292 ARG_STRING, \
00293 NULL, \
00294 "Noise word pronunciation dictionary input file" }, \
00295 { "-dictcase", \
00296 ARG_BOOLEAN, \
00297 "no", \
00298 "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" }, \
00299 { "-maxnewoov", \
00300 ARG_INT32, \
00301 "20", \
00302 "Maximum new OOVs that can be added at run time" }, \
00303 { "-usewdphones", \
00304 ARG_BOOLEAN, \
00305 "no", \
00306 "Use within-word phones only" }
00307
00309 #define POCKETSPHINX_ACMOD_OPTIONS \
00310 { "-hmm", \
00311 ARG_STRING, \
00312 NULL, \
00313 "Directory containing acoustic model files."}, \
00314 { "-featparams", \
00315 ARG_STRING, \
00316 NULL, \
00317 "File containing feature extraction parameters."}, \
00318 { "-mdef", \
00319 ARG_STRING, \
00320 NULL, \
00321 "Model definition input file" }, \
00322 { "-tmat", \
00323 ARG_STRING, \
00324 NULL, \
00325 "HMM state transition matrix input file" }, \
00326 { "-tmatfloor", \
00327 ARG_FLOAT32, \
00328 "0.0001", \
00329 "HMM state transition probability floor (applied to -tmat file)" }, \
00330 { "-mean", \
00331 ARG_STRING, \
00332 NULL, \
00333 "Mixture gaussian means input file" }, \
00334 { "-var", \
00335 ARG_STRING, \
00336 NULL, \
00337 "Mixture gaussian variances input file" }, \
00338 { "-varfloor", \
00339 ARG_FLOAT32, \
00340 "0.0001", \
00341 "Mixture gaussian variance floor (applied to data from -var file)" }, \
00342 { "-mixw", \
00343 ARG_STRING, \
00344 NULL, \
00345 "Senone mixture weights input file (uncompressed)" }, \
00346 { "-mixwfloor", \
00347 ARG_FLOAT32, \
00348 "0.0000001", \
00349 "Senone mixture weights floor (applied to data from -mixw file)" }, \
00350 { "-sendump", \
00351 ARG_STRING, \
00352 NULL, \
00353 "Senone dump (compressed mixture weights) input file" }, \
00354 { "-mllr", \
00355 ARG_STRING, \
00356 NULL, \
00357 "MLLR transformation to apply to means and variances" }, \
00358 { "-mmap", \
00359 ARG_BOOLEAN, \
00360 "yes", \
00361 "Use memory-mapped I/O (if possible) for model files" }, \
00362 { "-ds", \
00363 ARG_INT32, \
00364 "1", \
00365 "Frame GMM computation downsampling ratio" }, \
00366 { "-topn", \
00367 ARG_INT32, \
00368 "4", \
00369 "Maximum number of top Gaussians to use in scoring." }, \
00370 { "-topn_beam", \
00371 ARG_STRING, \
00372 "0", \
00373 "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\
00374 { "-kdtree", \
00375 ARG_STRING, \
00376 NULL, \
00377 "kd-Tree file for Gaussian selection" }, \
00378 { "-kdmaxdepth", \
00379 ARG_INT32, \
00380 "0", \
00381 "Maximum depth of kd-Trees to use" }, \
00382 { "-kdmaxbbi", \
00383 ARG_INT32, \
00384 "-1", \
00385 "Maximum number of Gaussians per leaf node in kd-Trees" }, \
00386 { "-logbase", \
00387 ARG_FLOAT32, \
00388 "1.0001", \
00389 "Base in which all log-likelihoods calculated" }
00390
00391 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
00392
00393 #endif