include/cmdln_macro.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2006 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* cmdln_macro.h - Command line definitions for PocketSphinx */
00039 
00040 #ifndef __PS_CMDLN_MACRO_H__
00041 #define __PS_CMDLN_MACRO_H__
00042 
00043 #include <cmd_ln.h>
00044 #include <feat.h>
00045 #include <fe.h>
00046 
00048 #define POCKETSPHINX_OPTIONS \
00049     waveform_to_cepstral_command_line_macro(), \
00050     cepstral_to_feature_command_line_macro(), \
00051     POCKETSPHINX_ACMOD_OPTIONS, \
00052         POCKETSPHINX_BEAM_OPTIONS,   \
00053         POCKETSPHINX_SEARCH_OPTIONS, \
00054         POCKETSPHINX_DICT_OPTIONS, \
00055         POCKETSPHINX_NGRAM_OPTIONS, \
00056         POCKETSPHINX_FSG_OPTIONS, \
00057         POCKETSPHINX_DEBUG_OPTIONS
00058 
00060 #define POCKETSPHINX_DEBUG_OPTIONS                      \
00061     { "-logfn",                                         \
00062             ARG_STRING,                                 \
00063             NULL,                                       \
00064             "File to write log messages in"             \
00065      },                                                 \
00066     { "-debug",                                         \
00067             ARG_INT32,                                  \
00068             NULL,                                       \
00069             "Verbosity level for debugging messages"    \
00070      },                                                 \
00071      { "-mfclogdir",                                    \
00072              ARG_STRING,                                \
00073              NULL,                                      \
00074              "Directory to log feature files to"        \
00075              },                                         \
00076     { "-rawlogdir",                                     \
00077             ARG_STRING,                                 \
00078             NULL,                                       \
00079             "Directory to log raw audio files to" }
00080 
00082 #define POCKETSPHINX_BEAM_OPTIONS                                       \
00083 { "-beam",                                                              \
00084       ARG_FLOAT64,                                                      \
00085       "1e-48",                                                          \
00086       "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
00087 { "-wbeam",                                                             \
00088       ARG_FLOAT64,                                                      \
00089       "7e-29",                                                          \
00090       "Beam width applied to word exits" },                             \
00091 { "-pbeam",                                                             \
00092       ARG_FLOAT64,                                                      \
00093       "1e-48",                                                          \
00094       "Beam width applied to phone transitions" },                      \
00095 { "-lpbeam",                                                            \
00096       ARG_FLOAT64,                                                      \
00097       "1e-40",                                                          \
00098       "Beam width applied to last phone in words" },                    \
00099 { "-lponlybeam",                                                        \
00100       ARG_FLOAT64,                                                      \
00101       "7e-29",                                                          \
00102       "Beam width applied to last phone in single-phone words" },       \
00103 { "-wend_beam", \
00104       ARG_FLOAT64, \
00105       "7e-29", \
00106       "(tst only) Beam selecting word-final HMMs exiting in each frame [0(widest) .. 1(narrowest)]" }, \
00107 { "-vhbeam", \
00108       ARG_FLOAT64, \
00109       "7e-29", \
00110       "(tst only) Beam width applied to Viterbi history entries (before -maxhistpf)" }, \
00111 { "-fwdflatbeam",                                                       \
00112       ARG_FLOAT64,                                                      \
00113       "1e-64",                                                          \
00114       "Beam width applied to every frame in second-pass flat search" }, \
00115 { "-fwdflatwbeam",                                                      \
00116       ARG_FLOAT64,                                                      \
00117       "7e-29",                                                          \
00118       "Beam width applied to word exits in second-pass flat search" },  \
00119 { "-pl_window",                                                         \
00120       ARG_INT32,                                                        \
00121       "0",                                                              \
00122       "Phoneme lookahead window size, in frames" },                     \
00123 { "-pl_beam",                                                           \
00124       ARG_FLOAT64,                                                      \
00125       "1e-10",                                                          \
00126       "Beam width applied to phone loop search for lookahead" },        \
00127 { "-pl_pbeam",                                                          \
00128       ARG_FLOAT64,                                                      \
00129       "1e-5",                                                           \
00130       "Beam width applied to phone loop transitions for lookahead" }
00131 
00133 #define POCKETSPHINX_SEARCH_OPTIONS \
00134 { "-compallsen",                                                                                \
00135       ARG_BOOLEAN,                                                                              \
00136       "no",                                                                                     \
00137       "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
00138 { "-tst",                                                                                       \
00139       ARG_BOOLEAN,                                                                              \
00140       "no",                                                                                     \
00141       "Run time-switch tree search (a.k.a. Sphinx3 search)" },                                  \
00142 { "-Nlextree", \
00143       ARG_INT32, \
00144       "3", \
00145       "(tst only) No. of lextrees to be instantiated; entries into them staggered in time" }, \
00146 { "-epl", \
00147       ARG_INT32, \
00148       "3", \
00149       "(tst only) Entries Per Lextree; #successive entries into one lextree before lextree-entries shifted to the next" }, \
00150 { "-min_endfr", \
00151       ARG_INT32, \
00152       "3", \
00153       "(tst only) Nodes ignored during lattice generation if they persist for fewer than so many end frames" }, \
00154 { "-maxhistpf", \
00155       ARG_INT32, \
00156       "100", \
00157       "(tst only) Max no. of histories to maintain at each frame" }, \
00158 { "-hmmhistbinsize", \
00159       ARG_INT32, \
00160       "5000", \
00161       "(tst only) Performance histogram: #frames vs #HMMs active; #HMMs/bin in this histogram" }, \
00162 { "-fwdtree",                                                                                   \
00163       ARG_BOOLEAN,                                                                              \
00164       "yes",                                                                                    \
00165       "Run forward lexicon-tree search (1st pass)" },                                           \
00166 { "-fwdflat",                                                                                   \
00167       ARG_BOOLEAN,                                                                              \
00168       "yes",                                                                                    \
00169       "Run forward flat-lexicon search over word lattice (2nd pass)" },                         \
00170 { "-bestpath",                                                                                  \
00171       ARG_BOOLEAN,                                                                              \
00172       "yes",                                                                                    \
00173       "Run bestpath (Dijkstra) search over word lattice (3rd pass)" },                          \
00174 { "-backtrace",                                                                                 \
00175       ARG_BOOLEAN,                                                                              \
00176       "no",                                                                                     \
00177       "Print results and backtraces to log file." },                                            \
00178 { "-latsize",                                                                                   \
00179       ARG_INT32,                                                                                \
00180       "5000",                                                                                   \
00181       "Initial backpointer table size" },                                                       \
00182 { "-maxwpf",                                                                                    \
00183       ARG_INT32,                                                                                \
00184       "-1",                                                                                     \
00185       "Maximum number of distinct word exits at each frame (or -1 for no pruning)" },           \
00186 { "-maxhmmpf",                                                                                  \
00187       ARG_INT32,                                                                                \
00188       "-1",                                                                                     \
00189       "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" },       \
00190 { "-fwdflatefwid",                                                                              \
00191       ARG_INT32,                                                                                \
00192       "4",                                                                                      \
00193       "Minimum number of end frames for a word to be searched in fwdflat search" },             \
00194 { "-fwdflatsfwin",                                                                              \
00195       ARG_INT32,                                                                                \
00196       "25",                                                                                     \
00197       "Window of frames in lattice to search for successor words in fwdflat search " }
00198 
00200 #define POCKETSPHINX_FSG_OPTIONS \
00201     { "-fsg",                                                   \
00202             ARG_STRING,                                         \
00203             NULL,                                               \
00204             "Sphinx format finite state grammar file"},         \
00205 { "-jsgf",                                                      \
00206         ARG_STRING,                                             \
00207         NULL,                                                   \
00208         "JSGF grammar file" },                                  \
00209 { "-toprule",                                                   \
00210         ARG_STRING,                                             \
00211         NULL,                                                   \
00212         "Start rule for JSGF (first public rule is default)" }, \
00213 { "-fsgusealtpron",                                             \
00214         ARG_BOOLEAN,                                            \
00215         "yes",                                                  \
00216         "Add alternate pronunciations to FSG"},                 \
00217 { "-fsgusefiller",                                              \
00218         ARG_BOOLEAN,                                            \
00219         "yes",                                                  \
00220         "Insert filler words at each state."}
00221 
00223 #define POCKETSPHINX_NGRAM_OPTIONS \
00224 { "-lm",                                                                                \
00225       ARG_STRING,                                                                       \
00226       NULL,                                                                             \
00227       "Word trigram language model input file" },                                       \
00228 { "-lmctl",                                                                             \
00229       ARG_STRING,                                                                       \
00230       NULL,                                                                             \
00231       "Specify a set of language model\n"},                                             \
00232 { "-lmname",                                                                            \
00233       ARG_STRING,                                                                       \
00234       "default",                                                                        \
00235       "Which language model in -lmctl to use by default"},                              \
00236 { "-lw",                                                                                \
00237       ARG_FLOAT32,                                                                      \
00238       "6.5",                                                                            \
00239       "Language model probability weight" },                                            \
00240 { "-fwdflatlw",                                                                         \
00241       ARG_FLOAT32,                                                                      \
00242       "8.5",                                                                            \
00243       "Language model probability weight for flat lexicon (2nd pass) decoding" },       \
00244 { "-bestpathlw",                                                                        \
00245       ARG_FLOAT32,                                                                      \
00246       "9.5",                                                                            \
00247       "Language model probability weight for bestpath search" },                        \
00248 { "-ascale",                                                                            \
00249       ARG_FLOAT32,                                                                      \
00250       "20.0",                                                                           \
00251       "Inverse of acoustic model scale for confidence score calculation" },             \
00252 { "-wip",                                                                               \
00253       ARG_FLOAT32,                                                                      \
00254       "0.65",                                                                           \
00255       "Word insertion penalty" },                                                       \
00256 { "-nwpen",                                                                             \
00257       ARG_FLOAT32,                                                                      \
00258       "1.0",                                                                            \
00259       "New word transition penalty" },                                                  \
00260 { "-pip",                                                                               \
00261       ARG_FLOAT32,                                                                      \
00262       "1.0",                                                                            \
00263       "Phone insertion penalty" },                                                      \
00264 { "-uw",                                                                                \
00265       ARG_FLOAT32,                                                                      \
00266       "1.0",                                                                            \
00267       "Unigram weight" },                                                               \
00268 { "-silprob",                                                                           \
00269       ARG_FLOAT32,                                                                      \
00270       "0.005",                                                                          \
00271       "Silence word transition probability" },                                          \
00272 { "-fillprob",                                                                          \
00273       ARG_FLOAT32,                                                                      \
00274       "1e-8",                                                                           \
00275         "Filler word transition probability" }, \
00276 { "-bghist",   \
00277       ARG_BOOLEAN, \
00278       "no", \
00279       "Bigram-mode: If TRUE only one BP entry/frame; else one per LM state" }, \
00280 { "-lextreedump", \
00281       ARG_INT32, \
00282       "0", \
00283       "Whether to dump the lextree structure to stderr (for debugging), 1 for Ravi's format, 2 for Dot format, Larger than 2 will be treated as Ravi's format" }
00284 
00286 #define POCKETSPHINX_DICT_OPTIONS \
00287     { "-dict",                                                  \
00288       REQARG_STRING,                                            \
00289       NULL,                                                     \
00290       "Main pronunciation dictionary (lexicon) input file" },   \
00291     { "-fdict",                                                 \
00292       ARG_STRING,                                               \
00293       NULL,                                                     \
00294       "Noise word pronunciation dictionary input file" },       \
00295     { "-dictcase",                                              \
00296       ARG_BOOLEAN,                                              \
00297       "no",                                                     \
00298       "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" },     \
00299     { "-maxnewoov",                                             \
00300       ARG_INT32,                                                \
00301       "20",                                                     \
00302       "Maximum new OOVs that can be added at run time" },       \
00303     { "-usewdphones",                                           \
00304       ARG_BOOLEAN,                                              \
00305       "no",                                                     \
00306       "Use within-word phones only" }
00307 
00309 #define POCKETSPHINX_ACMOD_OPTIONS \
00310 { "-hmm",                                                                       \
00311       ARG_STRING,                                                               \
00312       NULL,                                                                     \
00313       "Directory containing acoustic model files."},                            \
00314 { "-featparams",                                                                \
00315       ARG_STRING,                                                               \
00316       NULL,                                                                     \
00317       "File containing feature extraction parameters."},                        \
00318 { "-mdef",                                                                      \
00319       ARG_STRING,                                                               \
00320       NULL,                                                                     \
00321       "Model definition input file" },                                          \
00322 { "-tmat",                                                                      \
00323       ARG_STRING,                                                               \
00324       NULL,                                                                     \
00325       "HMM state transition matrix input file" },                               \
00326 { "-tmatfloor",                                                                 \
00327       ARG_FLOAT32,                                                              \
00328       "0.0001",                                                                 \
00329       "HMM state transition probability floor (applied to -tmat file)" },       \
00330 { "-mean",                                                                      \
00331       ARG_STRING,                                                               \
00332       NULL,                                                                     \
00333       "Mixture gaussian means input file" },                                    \
00334 { "-var",                                                                       \
00335       ARG_STRING,                                                               \
00336       NULL,                                                                     \
00337       "Mixture gaussian variances input file" },                                \
00338 { "-varfloor",                                                                  \
00339       ARG_FLOAT32,                                                              \
00340       "0.0001",                                                                 \
00341       "Mixture gaussian variance floor (applied to data from -var file)" },     \
00342 { "-mixw",                                                                      \
00343       ARG_STRING,                                                               \
00344       NULL,                                                                     \
00345       "Senone mixture weights input file (uncompressed)" },                     \
00346 { "-mixwfloor",                                                                 \
00347       ARG_FLOAT32,                                                              \
00348       "0.0000001",                                                              \
00349       "Senone mixture weights floor (applied to data from -mixw file)" },       \
00350 { "-sendump",                                                                   \
00351       ARG_STRING,                                                               \
00352       NULL,                                                                     \
00353       "Senone dump (compressed mixture weights) input file" },                  \
00354 { "-mllr",                                                                      \
00355       ARG_STRING,                                                               \
00356       NULL,                                                                     \
00357       "MLLR transformation to apply to means and variances" },                  \
00358 { "-mmap",                                                                      \
00359       ARG_BOOLEAN,                                                              \
00360       "yes",                                                                    \
00361       "Use memory-mapped I/O (if possible) for model files" },                  \
00362 { "-ds",                                                                        \
00363       ARG_INT32,                                                                \
00364       "1",                                                                      \
00365       "Frame GMM computation downsampling ratio" },                             \
00366 { "-topn",                                                                      \
00367       ARG_INT32,                                                                \
00368       "4",                                                                      \
00369       "Maximum number of top Gaussians to use in scoring." },                   \
00370 { "-topn_beam",                                                                 \
00371       ARG_STRING,                                                               \
00372       "0",                                                                     \
00373       "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\
00374 { "-kdtree",                                                                    \
00375       ARG_STRING,                                                               \
00376       NULL,                                                                     \
00377       "kd-Tree file for Gaussian selection" },                                  \
00378 { "-kdmaxdepth",                                                                \
00379       ARG_INT32,                                                                \
00380       "0",                                                                      \
00381       "Maximum depth of kd-Trees to use" },                                     \
00382 { "-kdmaxbbi",                                                                  \
00383       ARG_INT32,                                                                \
00384       "-1",                                                                     \
00385       "Maximum number of Gaussians per leaf node in kd-Trees" },                \
00386 { "-logbase",                                                                   \
00387       ARG_FLOAT32,                                                              \
00388       "1.0001",                                                                 \
00389       "Base in which all log-likelihoods calculated" }
00390 
00391 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
00392 
00393 #endif /* __PS_CMDLN_MACRO_H__ */

Generated on Mon Jan 24 21:50:15 2011 for PocketSphinx by  doxygen 1.4.7