src/libpocketsphinx/pocketsphinx.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* System headers. */
00039 #include <stdio.h>
00040 #include <assert.h>
00041 
00042 /* SphinxBase headers. */
00043 #include <err.h>
00044 #include <strfuncs.h>
00045 #include <filename.h>
00046 #include <pio.h>
00047 
00048 /* Local headers. */
00049 #include "cmdln_macro.h"
00050 #include "pocketsphinx_internal.h"
00051 #include "ps_lattice_internal.h"
00052 #include "phone_loop_search.h"
00053 #include "fsg_search_internal.h"
00054 #include "tst_search.h"
00055 #include "ngram_search.h"
00056 #include "ngram_search_fwdtree.h"
00057 #include "ngram_search_fwdflat.h"
00058 
00059 static const arg_t ps_args_def[] = {
00060     POCKETSPHINX_OPTIONS,
00061     CMDLN_EMPTY_OPTION
00062 };
00063 
00064 /* I'm not sure what the portable way to do this is. */
00065 static int
00066 file_exists(const char *path)
00067 {
00068     FILE *tmp;
00069 
00070     tmp = fopen(path, "rb");
00071     if (tmp) fclose(tmp);
00072     return (tmp != NULL);
00073 }
00074 
00075 static void
00076 ps_add_file(ps_decoder_t *ps, const char *arg,
00077             const char *hmmdir, const char *file)
00078 {
00079     char *tmp = string_join(hmmdir, "/", file, NULL);
00080 
00081     if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp))
00082         cmd_ln_set_str_r(ps->config, arg, tmp);
00083     ckd_free(tmp);
00084 }
00085 
00086 static void
00087 ps_init_defaults(ps_decoder_t *ps)
00088 {
00089     char const *hmmdir;
00090 
00091     /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
00092 #ifdef __ADSPBLACKFIN__
00093     E_INFO("Will not use mmap() on uClinux/Blackfin.");
00094     cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
00095 #endif
00096     /* Get acoustic model filenames and add them to the command-line */
00097     if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
00098         ps_add_file(ps, "-mdef", hmmdir, "mdef");
00099         ps_add_file(ps, "-mean", hmmdir, "means");
00100         ps_add_file(ps, "-var", hmmdir, "variances");
00101         ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
00102         ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
00103         ps_add_file(ps, "-sendump", hmmdir, "sendump");
00104         ps_add_file(ps, "-kdtree", hmmdir, "kdtrees");
00105         ps_add_file(ps, "-fdict", hmmdir, "noisedict");
00106         ps_add_file(ps, "-lda", hmmdir, "feature_transform");
00107         ps_add_file(ps, "-featparams", hmmdir, "feat.params");
00108     }
00109 }
00110 
00111 static void
00112 ps_free_searches(ps_decoder_t *ps)
00113 {
00114     gnode_t *gn;
00115 
00116     if (ps->searches == NULL)
00117         return;
00118 
00119     for (gn = ps->searches; gn; gn = gnode_next(gn))
00120         ps_search_free(gnode_ptr(gn));
00121     glist_free(ps->searches);
00122     ps->searches = NULL;
00123     ps->search = NULL;
00124 }
00125 
00126 static ps_search_t *
00127 ps_find_search(ps_decoder_t *ps, char const *name)
00128 {
00129     gnode_t *gn;
00130 
00131     for (gn = ps->searches; gn; gn = gnode_next(gn)) {
00132         if (0 == strcmp(ps_search_name(gnode_ptr(gn)), name))
00133             return (ps_search_t *)gnode_ptr(gn);
00134     }
00135     return NULL;
00136 }
00137 
00138 int
00139 ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
00140 {
00141     char const *lmfile, *lmctl = NULL;
00142 
00143     if (config && config != ps->config) {
00144         cmd_ln_free_r(ps->config);
00145         ps->config = config;
00146     }
00147 #ifndef _WIN32_WCE
00148     /* Set up logging. */
00149     if (cmd_ln_str_r(ps->config, "-logfn"))
00150         err_set_logfile(cmd_ln_str_r(ps->config, "-logfn"));
00151 #endif
00152     err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
00153     ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
00154     ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
00155 
00156     /* Fill in some default arguments. */
00157     ps_init_defaults(ps);
00158 
00159     /* Free old searches (do this before other reinit) */
00160     ps_free_searches(ps);
00161 
00162     /* Free old acmod. */
00163     acmod_free(ps->acmod);
00164     ps->acmod = NULL;
00165 
00166     /* Free old dictionary (must be done after the two things above) */
00167     s3dict_free(ps->dict);
00168     ps->dict = NULL;
00169 
00170 
00171     /* Logmath computation (used in acmod and search) */
00172     if (ps->lmath == NULL
00173         || (logmath_get_base(ps->lmath) != 
00174             (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
00175         if (ps->lmath)
00176             logmath_free(ps->lmath);
00177         ps->lmath = logmath_init
00178             ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
00179              cmd_ln_boolean_r(ps->config, "-bestpath"));
00180     }
00181 
00182     /* Acoustic model (this is basically everything that
00183      * uttproc.c, senscr.c, and others used to do) */
00184     if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
00185         return -1;
00186     /* Make the acmod's feature buffer growable if we are doing two-pass search. */
00187     if (cmd_ln_boolean_r(ps->config, "-fwdflat")
00188         && cmd_ln_boolean_r(ps->config, "-fwdtree"))
00189         acmod_set_grow(ps->acmod, TRUE);
00190 
00191     if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) {
00192         /* Initialize an auxiliary phone loop search, which will run in
00193          * "parallel" with FSG or N-Gram search. */
00194         if ((ps->phone_loop = phone_loop_search_init(ps->config,
00195                                                      ps->acmod, ps->dict)) == NULL)
00196             return -1;
00197         ps->searches = glist_add_ptr(ps->searches, ps->phone_loop);
00198     }
00199 
00200     /* Dictionary and triphone mappings (depends on acmod). */
00201     /* FIXME: pass config, change arguments, implement LTS, etc. */
00202     if ((ps->dict = s3dict_init(ps->acmod->mdef,
00203                                 cmd_ln_str_r(ps->config, "-dict"),
00204                                 cmd_ln_str_r(ps->config, "-fdict"),
00205                                 FALSE, TRUE)) == NULL)
00206         return -1;
00207 
00208     /* Determine whether we are starting out in FSG or N-Gram search mode. */
00209     if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) {
00210         ps_search_t *fsgs;
00211 
00212         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict, FALSE, ps->lmath)) == NULL)
00213             return -1;
00214         if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
00215             return -1;
00216         fsgs->pls = ps->phone_loop;
00217         ps->searches = glist_add_ptr(ps->searches, fsgs);
00218         ps->search = fsgs;
00219     }
00220     else if (cmd_ln_str_r(ps->config, "-tst")) {
00221         ps_search_t *tstg;
00222 
00223         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict, TRUE, ps->lmath)) == NULL)
00224             return -1;
00225         if ((tstg = tst_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
00226             return -1;
00227         /* FIXME: add phoneme lookahead */
00228         ps->searches = glist_add_ptr(ps->searches, tstg);
00229         ps->search = tstg;
00230     }
00231     else if ((lmfile = cmd_ln_str_r(ps->config, "-lm"))
00232              || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) {
00233         ps_search_t *ngs;
00234 
00235         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict, FALSE, ps->lmath)) == NULL)
00236             return -1;
00237         if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
00238             return -1;
00239         ngs->pls = ps->phone_loop;
00240         ps->searches = glist_add_ptr(ps->searches, ngs);
00241         ps->search = ngs;
00242     }
00243     /* Otherwise, we will initialize the search whenever the user
00244      * decides to load an FSG or a language model. */
00245     else {
00246         /* Major hack, we just assume that composite triphones (which
00247          * actually are not that useful) and hence TST won't be used. */
00248         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict, FALSE, ps->lmath)) == NULL)
00249             return -1;
00250     }
00251 
00252     /* Initialize performance timer. */
00253     ps->perf.name = "decode";
00254     ptmr_init(&ps->perf);
00255 
00256     return 0;
00257 }
00258 
00259 ps_decoder_t *
00260 ps_init(cmd_ln_t *config)
00261 {
00262     ps_decoder_t *ps;
00263 
00264     ps = ckd_calloc(1, sizeof(*ps));
00265     ps->refcount = 1;
00266     if (ps_reinit(ps, config) < 0) {
00267         ps_free(ps);
00268         return NULL;
00269     }
00270     return ps;
00271 }
00272 
00273 arg_t const *
00274 ps_args(void)
00275 {
00276     return ps_args_def;
00277 }
00278 
00279 ps_decoder_t *
00280 ps_retain(ps_decoder_t *ps)
00281 {
00282     ++ps->refcount;
00283     return ps;
00284 }
00285 
00286 int
00287 ps_free(ps_decoder_t *ps)
00288 {
00289     gnode_t *gn;
00290 
00291     if (ps == NULL)
00292         return 0;
00293     if (--ps->refcount > 0)
00294         return ps->refcount;
00295     for (gn = ps->searches; gn; gn = gnode_next(gn))
00296         ps_search_free(gnode_ptr(gn));
00297     glist_free(ps->searches);
00298     s3dict_free(ps->dict);
00299     dict2pid_free(ps->d2p);
00300     acmod_free(ps->acmod);
00301     logmath_free(ps->lmath);
00302     cmd_ln_free_r(ps->config);
00303     ckd_free(ps->uttid);
00304     ckd_free(ps);
00305     return 0;
00306 }
00307 
00308 char const *
00309 ps_get_uttid(ps_decoder_t *ps)
00310 {
00311     return ps->uttid;
00312 }
00313 
00314 cmd_ln_t *
00315 ps_get_config(ps_decoder_t *ps)
00316 {
00317     return ps->config;
00318 }
00319 
00320 logmath_t *
00321 ps_get_logmath(ps_decoder_t *ps)
00322 {
00323     return ps->lmath;
00324 }
00325 
00326 fe_t *
00327 ps_get_fe(ps_decoder_t *ps)
00328 {
00329     return ps->acmod->fe;
00330 }
00331 
00332 feat_t *
00333 ps_get_feat(ps_decoder_t *ps)
00334 {
00335     return ps->acmod->fcb;
00336 }
00337 
00338 ps_mllr_t *
00339 ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
00340 {
00341     return acmod_update_mllr(ps->acmod, mllr);
00342 }
00343 
00344 ngram_model_t *
00345 ps_get_lmset(ps_decoder_t *ps)
00346 {
00347     if (ps->search == NULL
00348         || 0 != strcmp(ps_search_name(ps->search), "ngram"))
00349         return NULL;
00350     return ((ngram_search_t *)ps->search)->lmset;
00351 }
00352 
00353 ngram_model_t *
00354 ps_update_lmset(ps_decoder_t *ps, ngram_model_t *lmset)
00355 {
00356     ngram_search_t *ngs;
00357     ps_search_t *search;
00358 
00359     /* Look for N-Gram search. */
00360     search = ps_find_search(ps, "ngram");
00361     if (search == NULL) {
00362         /* Initialize N-Gram search. */
00363         search = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p);
00364         if (search == NULL)
00365             return NULL;
00366         search->pls = ps->phone_loop;
00367         ps->searches = glist_add_ptr(ps->searches, search);
00368         ngs = (ngram_search_t *)search;
00369     }
00370     else {
00371         ngs = (ngram_search_t *)search;
00372         /* Free any previous lmset if this is a new one. */
00373         if (ngs->lmset != NULL && ngs->lmset != lmset)
00374             ngram_model_free(ngs->lmset);
00375         ngs->lmset = lmset;
00376         /* Tell N-Gram search to update its view of the world. */
00377         if (ps_search_reinit(search) < 0)
00378             return NULL;
00379     }
00380     ps->search = search;
00381     return ngs->lmset;
00382 }
00383 
00384 fsg_set_t *
00385 ps_get_fsgset(ps_decoder_t *ps)
00386 {
00387     if (ps->search == NULL
00388         || 0 != strcmp(ps_search_name(ps->search), "fsg"))
00389         return NULL;
00390     return (fsg_set_t *)ps->search;
00391 }
00392 
00393 fsg_set_t *
00394 ps_update_fsgset(ps_decoder_t *ps)
00395 {
00396     ps_search_t *search;
00397 
00398     /* Look for FSG search. */
00399     search = ps_find_search(ps, "fsg");
00400     if (search == NULL) {
00401         /* Initialize FSG search. */
00402         search = fsg_search_init(ps->config,
00403                                  ps->acmod, ps->dict, ps->d2p);
00404         search->pls = ps->phone_loop;
00405         ps->searches = glist_add_ptr(ps->searches, search);
00406     }
00407     else {
00408         /* Tell FSG search to update its view of the world. */
00409         if (ps_search_reinit(search) < 0)
00410             return NULL;
00411     }
00412     ps->search = search;
00413     return (fsg_set_t *)search;
00414 }
00415 
00416 int
00417 ps_add_word(ps_decoder_t *ps,
00418             char const *word,
00419             char const *phones,
00420             int update)
00421 {
00422     int32 wid, lmwid;
00423     ngram_model_t *lmset;
00424     char *pron;
00425     int rv;
00426 
00427     pron = ckd_salloc(phones);
00428     /* Add it to the dictionary. */
00429     if ((wid = s3dict_add_word(ps->dict, word, pron, strlen(pron))) == -1) {
00430         ckd_free(pron);
00431         return -1;
00432     }
00433     /* Now we also have to add it to dict2pid, oh fun. */
00434 
00435     /* No longer needed. */
00436     ckd_free(pron);
00437 
00438     if ((lmset = ps_get_lmset(ps)) != NULL) {
00439         /* Add it to the LM set (meaning, the current LM).  In a perfect
00440          * world, this would result in the same WID, but because of the
00441          * weird way that word IDs are handled, it doesn't. */
00442         if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
00443             == NGRAM_INVALID_WID)
00444             return -1;
00445     }
00446  
00447     /* Rebuild the widmap and search tree if requested. */
00448     if (update) {
00449         if ((rv = ps_search_reinit(ps->search) < 0))
00450             return rv;
00451     }
00452     return wid;
00453 }
00454 
00455 int
00456 ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
00457               char const *uttid, long maxsamps)
00458 {
00459     long total, pos;
00460 
00461     ps_start_utt(ps, uttid);
00462     /* If this file is seekable or maxsamps is specified, then decode
00463      * the whole thing at once. */
00464     if (maxsamps != -1 || (pos = ftell(rawfh)) >= 0) {
00465         int16 *data;
00466 
00467         if (maxsamps == -1) {
00468             long endpos;
00469             fseek(rawfh, 0, SEEK_END);
00470             endpos = ftell(rawfh);
00471             fseek(rawfh, pos, SEEK_SET);
00472             maxsamps = endpos - pos;
00473         }
00474         data = ckd_calloc(maxsamps, sizeof(*data));
00475         total = fread(data, sizeof(*data), maxsamps, rawfh);
00476         ps_process_raw(ps, data, total, FALSE, TRUE);
00477         ckd_free(data);
00478     }
00479     else {
00480         /* Otherwise decode it in a stream. */
00481         total = 0;
00482         while (!feof(rawfh)) {
00483             int16 data[256];
00484             size_t nread;
00485 
00486             nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
00487             ps_process_raw(ps, data, nread, FALSE, FALSE);
00488             total += nread;
00489         }
00490     }
00491     ps_end_utt(ps);
00492     return total;
00493 }
00494 
00495 int
00496 ps_start_utt(ps_decoder_t *ps, char const *uttid)
00497 {
00498     FILE *mfcfh = NULL;
00499     FILE *rawfh = NULL;
00500     int rv;
00501 
00502     if (ps->search == NULL) {
00503         E_ERROR("No search module is selected, did you forget to "
00504                 "specify a language model or grammar?\n");
00505         return -1;
00506     }
00507 
00508     ptmr_reset(&ps->perf);
00509     ptmr_start(&ps->perf);
00510 
00511     if (uttid) {
00512         ckd_free(ps->uttid);
00513         ps->uttid = ckd_salloc(uttid);
00514     }
00515     else {
00516         char nuttid[16];
00517         ckd_free(ps->uttid);
00518         sprintf(nuttid, "%09u", ps->uttno);
00519         ps->uttid = ckd_salloc(nuttid);
00520         ++ps->uttno;
00521     }
00522     /* Remove any residual word lattice and hypothesis. */
00523     ps_lattice_free(ps->search->dag);
00524     ps->search->dag = NULL;
00525     ps->search->last_link = NULL;
00526     ps->search->post = 0;
00527     ckd_free(ps->search->hyp_str);
00528     ps->search->hyp_str = NULL;
00529 
00530     if ((rv = acmod_start_utt(ps->acmod)) < 0)
00531         return rv;
00532 
00533     /* Start logging features and audio if requested. */
00534     if (ps->mfclogdir) {
00535         char *logfn = string_join(ps->mfclogdir, "/",
00536                                   ps->uttid, ".mfc", NULL);
00537         E_INFO("Writing MFCC log file: %s\n", logfn);
00538         if ((mfcfh = fopen(logfn, "wb")) == NULL) {
00539             E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn);
00540             ckd_free(logfn);
00541             return -1;
00542         }
00543         ckd_free(logfn);
00544         acmod_set_mfcfh(ps->acmod, mfcfh);
00545     }
00546     if (ps->rawlogdir) {
00547         char *logfn = string_join(ps->rawlogdir, "/",
00548                                   ps->uttid, ".raw", NULL);
00549         E_INFO("Writing raw audio log file: %s\n", logfn);
00550         if ((rawfh = fopen(logfn, "wb")) == NULL) {
00551             E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn);
00552             ckd_free(logfn);
00553             return -1;
00554         }
00555         ckd_free(logfn);
00556         acmod_set_rawfh(ps->acmod, rawfh);
00557     }
00558 
00559     /* Start auxiliary phone loop search. */
00560     if (ps->phone_loop)
00561         ps_search_start(ps->phone_loop);
00562 
00563     return ps_search_start(ps->search);
00564 }
00565 
00566 static int
00567 ps_search_forward(ps_decoder_t *ps)
00568 {
00569     int nfr;
00570 
00571     nfr = 0;
00572     while (ps->acmod->n_feat_frame > 0) {
00573         int k;
00574         if (ps->phone_loop)
00575             if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0)
00576                 return k;
00577         if (ps->acmod->output_frame >= ps->pl_window)
00578             if ((k = ps_search_step(ps->search,
00579                                     ps->acmod->output_frame - ps->pl_window)) < 0)
00580                 return k;
00581         acmod_advance(ps->acmod);
00582         ++ps->n_frame;
00583         ++nfr;
00584     }
00585     return nfr;
00586 }
00587 
00588 int
00589 ps_process_raw(ps_decoder_t *ps,
00590                int16 const *data,
00591                size_t n_samples,
00592                int no_search,
00593                int full_utt)
00594 {
00595     int n_searchfr = 0;
00596 
00597     if (no_search)
00598         acmod_set_grow(ps->acmod, TRUE);
00599 
00600     while (n_samples) {
00601         int nfr;
00602 
00603         /* Process some data into features. */
00604         if ((nfr = acmod_process_raw(ps->acmod, &data,
00605                                      &n_samples, full_utt)) < 0)
00606             return nfr;
00607 
00608         /* Score and search as much data as possible */
00609         if (no_search)
00610             continue;
00611         if ((nfr = ps_search_forward(ps)) < 0)
00612             return nfr;
00613         n_searchfr += nfr;
00614     }
00615 
00616     return n_searchfr;
00617 }
00618 
00619 int
00620 ps_process_cep(ps_decoder_t *ps,
00621                mfcc_t **data,
00622                int32 n_frames,
00623                int no_search,
00624                int full_utt)
00625 {
00626     int n_searchfr = 0;
00627 
00628     if (no_search)
00629         acmod_set_grow(ps->acmod, TRUE);
00630 
00631     while (n_frames) {
00632         int nfr;
00633 
00634         /* Process some data into features. */
00635         if ((nfr = acmod_process_cep(ps->acmod, &data,
00636                                      &n_frames, full_utt)) < 0)
00637             return nfr;
00638 
00639         /* Score and search as much data as possible */
00640         if (no_search)
00641             continue;
00642         if ((nfr = ps_search_forward(ps)) < 0)
00643             return nfr;
00644         n_searchfr += nfr;
00645     }
00646 
00647     return n_searchfr;
00648 }
00649 
00650 int
00651 ps_end_utt(ps_decoder_t *ps)
00652 {
00653     int rv, i;
00654 
00655     acmod_end_utt(ps->acmod);
00656 
00657     /* Search any remaining frames. */
00658     if ((rv = ps_search_forward(ps)) < 0) {
00659         ptmr_stop(&ps->perf);
00660         return rv;
00661     }
00662     /* Finish phone loop search. */
00663     if (ps->phone_loop) {
00664         if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
00665             ptmr_stop(&ps->perf);
00666             return rv;
00667         }
00668     }
00669     /* Search any frames remaining in the lookahead window. */
00670     for (i = ps->acmod->output_frame - ps->pl_window;
00671          i < ps->acmod->output_frame; ++i)
00672         ps_search_step(ps->search, i);
00673     /* Finish main search. */
00674     if ((rv = ps_search_finish(ps->search)) < 0) {
00675         ptmr_stop(&ps->perf);
00676         return rv;
00677     }
00678     ptmr_stop(&ps->perf);
00679 
00680     /* Log a backtrace if requested. */
00681     if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
00682         char const *uttid, *hyp;
00683         ps_seg_t *seg;
00684         int32 score;
00685 
00686         hyp = ps_get_hyp(ps, &score, &uttid);
00687         E_INFO("%s: %s (%d)\n", uttid, hyp, score);
00688         E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
00689                     "word", "start", "end", "pprob", "ascr", "lscr", "lback");
00690         for (seg = ps_seg_iter(ps, &score); seg;
00691              seg = ps_seg_next(seg)) {
00692             char const *word;
00693             int sf, ef;
00694             int32 post, lscr, ascr, lback;
00695 
00696             word = ps_seg_word(seg);
00697             ps_seg_frames(seg, &sf, &ef);
00698             post = ps_seg_prob(seg, &ascr, &lscr, &lback);
00699             E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
00700                         word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
00701         }
00702     }
00703     return rv;
00704 }
00705 
00706 char const *
00707 ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
00708 {
00709     char const *hyp;
00710 
00711     ptmr_start(&ps->perf);
00712     hyp = ps_search_hyp(ps->search, out_best_score);
00713     if (out_uttid)
00714         *out_uttid = ps->uttid;
00715     ptmr_stop(&ps->perf);
00716     return hyp;
00717 }
00718 
00719 int32
00720 ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
00721 {
00722     int32 prob;
00723 
00724     ptmr_start(&ps->perf);
00725     prob = ps_search_prob(ps->search);
00726     if (out_uttid)
00727         *out_uttid = ps->uttid;
00728     ptmr_stop(&ps->perf);
00729     return prob;
00730 }
00731 
00732 ps_seg_t *
00733 ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
00734 {
00735     ps_seg_t *itor;
00736 
00737     ptmr_start(&ps->perf);
00738     itor = ps_search_seg_iter(ps->search, out_best_score);
00739     ptmr_stop(&ps->perf);
00740     return itor;
00741 }
00742 
00743 ps_seg_t *
00744 ps_seg_next(ps_seg_t *seg)
00745 {
00746     return ps_search_seg_next(seg);
00747 }
00748 
00749 char const *
00750 ps_seg_word(ps_seg_t *seg)
00751 {
00752     return seg->word;
00753 }
00754 
00755 void
00756 ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
00757 {
00758     if (out_sf) *out_sf = seg->sf;
00759     if (out_ef) *out_ef = seg->ef;
00760 }
00761 
00762 int32
00763 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
00764 {
00765     if (out_ascr) *out_ascr = seg->ascr;
00766     if (out_lscr) *out_lscr = seg->lscr;
00767     if (out_lback) *out_lback = seg->lback;
00768     return seg->prob;
00769 }
00770 
00771 void
00772 ps_seg_free(ps_seg_t *seg)
00773 {
00774     ps_search_seg_free(seg);
00775 }
00776 
00777 ps_lattice_t *
00778 ps_get_lattice(ps_decoder_t *ps)
00779 {
00780     return ps_search_lattice(ps->search);
00781 }
00782 
00783 ps_nbest_t *
00784 ps_nbest(ps_decoder_t *ps, int sf, int ef,
00785          char const *ctx1, char const *ctx2)
00786 {
00787     ps_lattice_t *dag;
00788     ngram_model_t *lmset;
00789     ps_astar_t *nbest;
00790     float32 lwf;
00791     int32 w1, w2;
00792 
00793     if (ps->search == NULL)
00794         return NULL;
00795     if ((dag = ps_get_lattice(ps)) == NULL)
00796         return NULL;
00797 
00798     /* FIXME: This is all quite specific to N-Gram search.  Either we
00799      * should make N-best a method for each search module or it needs
00800      * to be abstracted to work for N-Gram and FSG. */
00801     if (0 != strcmp(ps_search_name(ps->search), "ngram")) {
00802         lmset = NULL;
00803         lwf = 1.0f;
00804     }
00805     else {
00806         lmset = ((ngram_search_t *)ps->search)->lmset;
00807         lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
00808     }
00809 
00810     w1 = ctx1 ? s3dict_wordid(ps_search_dict(ps->search), ctx1) : -1;
00811     w2 = ctx2 ? s3dict_wordid(ps_search_dict(ps->search), ctx2) : -1;
00812     nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2);
00813 
00814     return (ps_nbest_t *)nbest;
00815 }
00816 
00817 void
00818 ps_nbest_free(ps_nbest_t *nbest)
00819 {
00820     ps_astar_finish(nbest);
00821 }
00822 
00823 ps_nbest_t *
00824 ps_nbest_next(ps_nbest_t *nbest)
00825 {
00826     ps_latpath_t *next;
00827 
00828     next = ps_astar_next(nbest);
00829     if (next == NULL) {
00830         ps_nbest_free(nbest);
00831         return NULL;
00832     }
00833     return nbest;
00834 }
00835 
00836 char const *
00837 ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
00838 {
00839     if (nbest->top == NULL)
00840         return NULL;
00841     if (out_score) *out_score = nbest->top->score;
00842     return ps_astar_hyp(nbest, nbest->top);
00843 }
00844 
00845 ps_seg_t *
00846 ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
00847 {
00848     if (nbest->top == NULL)
00849         return NULL;
00850     if (out_score) *out_score = nbest->top->score;
00851     return ps_astar_seg_iter(nbest, nbest->top, 1.0);
00852 }
00853 
00854 int
00855 ps_get_n_frames(ps_decoder_t *ps)
00856 {
00857     return ps->acmod->output_frame + 1;
00858 }
00859 
00860 void
00861 ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
00862                 double *out_ncpu, double *out_nwall)
00863 {
00864     int32 frate;
00865 
00866     frate = cmd_ln_int32_r(ps->config, "-frate");
00867     *out_nspeech = (double)ps->acmod->output_frame / frate;
00868     *out_ncpu = ps->perf.t_cpu;
00869     *out_nwall = ps->perf.t_elapsed;
00870 }
00871 
00872 void
00873 ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
00874                 double *out_ncpu, double *out_nwall)
00875 {
00876     int32 frate;
00877 
00878     frate = cmd_ln_int32_r(ps->config, "-frate");
00879     *out_nspeech = (double)ps->n_frame / frate;
00880     *out_ncpu = ps->perf.t_tot_cpu;
00881     *out_nwall = ps->perf.t_tot_elapsed;
00882 }
00883 
00884 void
00885 ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt,
00886                cmd_ln_t *config, acmod_t *acmod, s3dict_t *dict,
00887                dict2pid_t *d2p)
00888 {
00889     search->vt = vt;
00890     search->config = config;
00891     search->acmod = acmod;
00892     search->dict = dict;
00893     search->d2p = d2p;
00894     if (dict) {
00895         /* FIXME: redundant? */
00896         search->start_wid = s3dict_startwid(dict);
00897         search->finish_wid = s3dict_finishwid(dict);
00898         search->silence_wid = s3dict_silwid(dict);
00899     }
00900     else {
00901         search->start_wid = search->finish_wid = search->silence_wid = -1;
00902     }
00903 }
00904 
00905 void
00906 ps_search_deinit(ps_search_t *search)
00907 {
00908     /* FIXME: We will have refcounting on acmod, config, etc, at which
00909      * point we will free them here too. */
00910     ckd_free(search->hyp_str);
00911     ps_lattice_free(search->dag);
00912 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7