src/libpocketsphinx/ngram_search_fwdflat.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 #include <err.h>
00050 
00051 /* Local headers. */
00052 #include "ngram_search.h"
00053 #include "ps_lattice_internal.h"
00054 
00055 /* Turn this on to dump channels for debugging */
00056 #define __CHAN_DUMP__           0
00057 #if __CHAN_DUMP__
00058 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00059 #else
00060 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00061 #endif
00062 
00063 static void
00064 ngram_fwdflat_expand_all(ngram_search_t *ngs)
00065 {
00066     int n_words, i;
00067 
00068     /* For all "real words" (not fillers or <s>/</s>) in the dictionary,
00069      *
00070      * 1) Add the ones which are in the LM to the fwdflat wordlist
00071      * 2) And to the expansion list (since we are expanding all)
00072      */
00073     ngs->n_expand_words = 0;
00074     n_words = ps_search_n_words(ngs);
00075     bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
00076     for (i = 0; i < n_words; ++i) {
00077         if (!s3dict_real_word(ps_search_dict(ngs), i))
00078             continue;
00079         if (!ngram_model_set_known_wid(ngs->lmset,
00080                                        s3dict_basewid(ps_search_dict(ngs),i)))
00081             continue;
00082         ngs->fwdflat_wordlist[ngs->n_expand_words] = i;
00083         ngs->expand_word_list[ngs->n_expand_words] = i;
00084         bitvec_set(ngs->expand_word_flag, i);
00085         ngs->n_expand_words++;
00086     }
00087     E_INFO("Utterance vocabulary contains %d words\n", ngs->n_expand_words);
00088     ngs->expand_word_list[ngs->n_expand_words] = -1;
00089     ngs->fwdflat_wordlist[ngs->n_expand_words] = -1;
00090 }
00091 
00092 void
00093 ngram_fwdflat_init(ngram_search_t *ngs)
00094 {
00095     int n_words, i;
00096 
00097     n_words = ps_search_n_words(ngs);
00098     ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist));
00099     ngs->expand_word_flag = bitvec_alloc(n_words);
00100     ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list));
00101     ngs->frm_wordlist = ckd_calloc(ngs->n_frame_alloc, sizeof(*ngs->frm_wordlist));
00102     ngs->min_ef_width = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatefwid");
00103     ngs->max_sf_win = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatsfwin");
00104     E_INFO("fwdflat: min_ef_width = %d, max_sf_win = %d\n",
00105            ngs->min_ef_width, ngs->max_sf_win);
00106 
00107     /* No tree-search; pre-build the expansion list, including all LM words. */
00108     if (!ngs->fwdtree) {
00109         s3dict_t *dict = ps_search_dict(ngs);
00110         int w;
00111 
00112         /* Build full expansion list from LM words. */
00113         ngram_fwdflat_expand_all(ngs);
00114 
00115         /* Allocate single-phone words, since they won't have
00116          * been allocated for us by fwdtree initialization. */
00117         ngs->n_1ph_words = 0;
00118         for (w = 0; w < n_words; w++) {
00119             if (s3dict_pronlen(dict, w) == 1)
00120                 ++ngs->n_1ph_words;
00121         }
00122         ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00123         i = 0;
00124         for (w = 0; w < n_words; w++) {
00125             if (s3dict_pronlen(dict, w) != 1)
00126                 continue;
00127 
00128             /* DICT2PID location */
00129             ngs->rhmm_1ph[i].ciphone = s3dict_first_phone(dict, w);
00130             ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
00131             hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
00132                      /* ssid */ bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef,
00133                                                   ngs->rhmm_1ph[i].ciphone),
00134                      /* tmatid */ ngs->rhmm_1ph[i].ciphone);
00135             ngs->rhmm_1ph[i].next = NULL;
00136             ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00137             i++;
00138         }
00139     }
00140 }
00141 
00142 void
00143 ngram_fwdflat_deinit(ngram_search_t *ngs)
00144 {
00145     /* Free single-phone words if we allocated them. */
00146     if (!ngs->fwdtree) {
00147         ckd_free(ngs->rhmm_1ph);
00148     }
00149     ckd_free(ngs->fwdflat_wordlist);
00150     bitvec_free(ngs->expand_word_flag);
00151     ckd_free(ngs->expand_word_list);
00152     ckd_free(ngs->frm_wordlist);
00153 }
00154 
00155 int
00156 ngram_fwdflat_reinit(ngram_search_t *ngs)
00157 {
00158     /* No tree-search; re-build the expansion list from all LM words. */
00159     if (!ngs->fwdtree) {
00160         /* Rebuild full expansion list from LM words. */
00161         ngram_fwdflat_expand_all(ngs);
00162     }
00163     /* Otherwise there is nothing to do since the wordlist is
00164      * generated anew every utterance. */
00165     return 0;
00166 }
00167 
00171 static void
00172 build_fwdflat_wordlist(ngram_search_t *ngs)
00173 {
00174     int32 i, f, sf, ef, wid, nwd;
00175     s3dict_t *dict;
00176     bptbl_t *bp;
00177     ps_latnode_t *node, *prevnode, *nextnode;
00178 
00179     /* No tree-search, use statically allocated wordlist. */
00180     if (!ngs->fwdtree)
00181         return;
00182 
00183     dict = ps_search_dict(ngs);
00184 
00185     memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist));
00186 
00187     /* Scan the backpointer table for all active words and record
00188      * their exit frames. */
00189     for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) {
00190         sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1;
00191         ef = bp->frame;
00192         wid = bp->wid;
00193 
00194         /*
00195          * NOTE: fwdflat_wordlist excludes <s>, <sil> and noise words;
00196          * it includes </s>.  That is, it includes anything to which a
00197          * transition can be made in the LM.
00198          */
00199         /* Ignore silence and <s> */
00200         if (s3dict_filler_word(dict, wid) || (wid == s3dict_startwid(dict)))
00201             continue;
00202 
00203         /* Look for it in the wordlist. */
00204         for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid);
00205              node = node->next);
00206 
00207         /* Update last end frame. */
00208         if (node)
00209             node->lef = ef;
00210         else {
00211             /* New node; link to head of list */
00212             node = listelem_malloc(ngs->latnode_alloc);
00213             node->wid = wid;
00214             node->fef = node->lef = ef;
00215 
00216             node->next = ngs->frm_wordlist[sf];
00217             ngs->frm_wordlist[sf] = node;
00218         }
00219     }
00220 
00221     /* Eliminate "unlikely" words, for which there are too few end points */
00222     for (f = 0; f < ngs->n_frame; f++) {
00223         prevnode = NULL;
00224         for (node = ngs->frm_wordlist[f]; node; node = nextnode) {
00225             nextnode = node->next;
00226             /* Word has too few endpoints */
00227             if ((node->lef - node->fef < ngs->min_ef_width) ||
00228                 /* Word is </s> and doesn't actually end in last frame */
00229                 ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) {
00230                 if (!prevnode)
00231                     ngs->frm_wordlist[f] = nextnode;
00232                 else
00233                     prevnode->next = nextnode;
00234                 listelem_free(ngs->latnode_alloc, node);
00235             }
00236             else
00237                 prevnode = node;
00238         }
00239     }
00240 
00241     /* Form overall wordlist for 2nd pass */
00242     nwd = 0;
00243     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00244     for (f = 0; f < ngs->n_frame; f++) {
00245         for (node = ngs->frm_wordlist[f]; node; node = node->next) {
00246             if (!bitvec_is_set(ngs->word_active, node->wid)) {
00247                 bitvec_set(ngs->word_active, node->wid);
00248                 ngs->fwdflat_wordlist[nwd++] = node->wid;
00249             }
00250         }
00251     }
00252     ngs->fwdflat_wordlist[nwd] = -1;
00253     E_INFO("Utterance vocabulary contains %d words\n", nwd);
00254 }
00255 
00259 static void
00260 build_fwdflat_chan(ngram_search_t *ngs)
00261 {
00262     int32 i, wid, p;
00263     root_chan_t *rhmm;
00264     chan_t *hmm, *prevhmm;
00265     s3dict_t *dict;
00266     dict2pid_t *d2p;
00267 
00268     dict = ps_search_dict(ngs);
00269     d2p = ps_search_dict2pid(ngs);
00270 
00271     /* Build word HMMs for each word in the lattice. */
00272     for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00273         wid = ngs->fwdflat_wordlist[i];
00274 
00275         /* Omit single-phone words as they are permanently allocated */
00276         if (s3dict_pronlen(dict, wid) == 1)
00277             continue;
00278 
00279         assert(ngs->word_chan[wid] == NULL);
00280 
00281         /* Multiplex root HMM for first phone (one root per word, flat
00282          * lexicon).  diphone is irrelevant here, for the time being,
00283          * at least. */
00284         rhmm = listelem_malloc(ngs->root_chan_alloc);
00285         rhmm->ci2phone = s3dict_pron(dict, wid, 1);
00286         rhmm->ciphone = s3dict_first_phone(dict, wid);
00287         rhmm->next = NULL;
00288         hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE,
00289                  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone),
00290                  rhmm->ciphone);
00291 
00292         /* HMMs for word-internal phones */
00293         prevhmm = NULL;
00294         for (p = 1; p < s3dict_pronlen(dict, wid) - 1; p++) {
00295             hmm = listelem_malloc(ngs->chan_alloc);
00296             hmm->ciphone = s3dict_pron(dict, wid, p);
00297             hmm->info.rc_id = (p == s3dict_pronlen(dict, wid) - 1) ? 0 : -1;
00298             hmm->next = NULL;
00299             hmm_init(ngs->hmmctx, &hmm->hmm, FALSE,
00300                      dict2pid_internal(d2p,wid,p), hmm->ciphone);
00301 
00302             if (prevhmm)
00303                 prevhmm->next = hmm;
00304             else
00305                 rhmm->next = hmm;
00306 
00307             prevhmm = hmm;
00308         }
00309 
00310         /* Right-context phones */
00311         ngram_search_alloc_all_rc(ngs, wid);
00312 
00313         /* Link in just allocated right-context phones */
00314         if (prevhmm)
00315             prevhmm->next = ngs->word_chan[wid];
00316         else
00317             rhmm->next = ngs->word_chan[wid];
00318         ngs->word_chan[wid] = (chan_t *) rhmm;
00319     }
00320 }
00321 
00322 void
00323 ngram_fwdflat_start(ngram_search_t *ngs)
00324 {
00325     root_chan_t *rhmm;
00326     int i;
00327 
00328     build_fwdflat_wordlist(ngs);
00329     build_fwdflat_chan(ngs);
00330 
00331     ngs->bpidx = 0;
00332     ngs->bss_head = 0;
00333 
00334     for (i = 0; i < ps_search_n_words(ngs); i++)
00335         ngs->word_lat_idx[i] = NO_BP;
00336 
00337     /* Start search with <s>; word_chan[<s>] is permanently allocated */
00338     rhmm = (root_chan_t *) ngs->word_chan[ps_search_start_wid(ngs)];
00339     hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00340     ngs->active_word_list[0][0] = ps_search_start_wid(ngs);
00341     ngs->n_active_word[0] = 1;
00342 
00343     ngs->best_score = 0;
00344     ngs->renormalized = FALSE;
00345 
00346     for (i = 0; i < ps_search_n_words(ngs); i++)
00347         ngs->last_ltrans[i].sf = -1;
00348 
00349     if (!ngs->fwdtree)
00350         ngs->n_frame = 0;
00351 
00352     ngs->st.n_fwdflat_chan = 0;
00353     ngs->st.n_fwdflat_words = 0;
00354     ngs->st.n_fwdflat_word_transition = 0;
00355     ngs->st.n_senone_active_utt = 0;
00356 }
00357 
00358 static void
00359 compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx)
00360 {
00361     int32 i, w;
00362     int32 *awl;
00363     root_chan_t *rhmm;
00364     chan_t *hmm;
00365 
00366     acmod_clear_active(ps_search_acmod(ngs));
00367 
00368     i = ngs->n_active_word[frame_idx & 0x1];
00369     awl = ngs->active_word_list[frame_idx & 0x1];
00370 
00371     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00372         rhmm = (root_chan_t *)ngs->word_chan[w];
00373         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00374             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00375         }
00376 
00377         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00378             if (hmm_frame(&hmm->hmm) == frame_idx) {
00379                 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00380             }
00381         }
00382     }
00383 }
00384 
00385 static void
00386 fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx)
00387 {
00388     int32 i, w, bestscore;
00389     int32 *awl;
00390     root_chan_t *rhmm;
00391     chan_t *hmm;
00392 
00393     i = ngs->n_active_word[frame_idx & 0x1];
00394     awl = ngs->active_word_list[frame_idx & 0x1];
00395     bestscore = WORST_SCORE;
00396 
00397     ngs->st.n_fwdflat_words += i;
00398 
00399     /* Scan all active words. */
00400     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00401         rhmm = (root_chan_t *) ngs->word_chan[w];
00402         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00403             int32 score = chan_v_eval(rhmm);
00404             if ((score BETTER_THAN bestscore) && (w != ps_search_finish_wid(ngs)))
00405                 bestscore = score;
00406             ngs->st.n_fwdflat_chan++;
00407         }
00408 
00409         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00410             if (hmm_frame(&hmm->hmm) == frame_idx) {
00411                 int32 score = chan_v_eval(hmm);
00412                 if (score BETTER_THAN bestscore)
00413                     bestscore = score;
00414                 ngs->st.n_fwdflat_chan++;
00415             }
00416         }
00417     }
00418 
00419     ngs->best_score = bestscore;
00420 }
00421 
00422 static void
00423 fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx)
00424 {
00425     int32 i, cf, nf, w, pip, newscore, thresh, wordthresh;
00426     int32 *awl;
00427     root_chan_t *rhmm;
00428     chan_t *hmm, *nexthmm;
00429 
00430     cf = frame_idx;
00431     nf = cf + 1;
00432     i = ngs->n_active_word[cf & 0x1];
00433     awl = ngs->active_word_list[cf & 0x1];
00434     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00435 
00436     thresh = ngs->best_score + ngs->fwdflatbeam;
00437     wordthresh = ngs->best_score + ngs->fwdflatwbeam;
00438     pip = ngs->pip;
00439     E_DEBUG(3,("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh));
00440 
00441     /* Scan all active words. */
00442     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00443         rhmm = (root_chan_t *) ngs->word_chan[w];
00444         /* Propagate active root channels */
00445         if (hmm_frame(&rhmm->hmm) == cf
00446             && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
00447             hmm_frame(&rhmm->hmm) = nf;
00448             bitvec_set(ngs->word_active, w);
00449 
00450             /* Transitions out of root channel */
00451             newscore = hmm_out_score(&rhmm->hmm);
00452             if (rhmm->next) {
00453                 assert(s3dict_pronlen(ps_search_dict(ngs), w) > 1);
00454 
00455                 newscore += pip;
00456                 if (newscore BETTER_THAN thresh) {
00457                     hmm = rhmm->next;
00458                     /* Enter all right context phones */
00459                     if (hmm->info.rc_id >= 0) {
00460                         for (; hmm; hmm = hmm->next) {
00461                             if ((hmm_frame(&hmm->hmm) < cf)
00462                                 || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
00463                                 hmm_enter(&hmm->hmm, newscore,
00464                                           hmm_out_history(&rhmm->hmm), nf);
00465                             }
00466                         }
00467                     }
00468                     /* Just a normal word internal phone */
00469                     else {
00470                         if ((hmm_frame(&hmm->hmm) < cf)
00471                             || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
00472                                 hmm_enter(&hmm->hmm, newscore,
00473                                           hmm_out_history(&rhmm->hmm), nf);
00474                         }
00475                     }
00476                 }
00477             }
00478             else {
00479                 assert(s3dict_pronlen(ps_search_dict(ngs), w) == 1);
00480 
00481                 /* Word exit for single-phone words (where did their
00482                  * whmms come from?) */
00483                 if (newscore BETTER_THAN wordthresh) {
00484                     ngram_search_save_bp(ngs, cf, w, newscore,
00485                                          hmm_out_history(&rhmm->hmm), 0);
00486                 }
00487             }
00488         }
00489 
00490         /* Transitions out of non-root channels. */
00491         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00492             if (hmm_frame(&hmm->hmm) >= cf) {
00493                 /* Propagate forward HMMs inside the beam. */
00494                 if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
00495                     hmm_frame(&hmm->hmm) = nf;
00496                     bitvec_set(ngs->word_active, w);
00497 
00498                     newscore = hmm_out_score(&hmm->hmm);
00499                     /* Word-internal phones */
00500                     if (hmm->info.rc_id < 0) {
00501                         newscore += pip;
00502                         if (newscore BETTER_THAN thresh) {
00503                             nexthmm = hmm->next;
00504                             /* Enter all right-context phones. */
00505                             if (nexthmm->info.rc_id >= 0) {
00506                                  for (; nexthmm; nexthmm = nexthmm->next) {
00507                                     if ((hmm_frame(&nexthmm->hmm) < cf)
00508                                         || (newscore BETTER_THAN
00509                                             hmm_in_score(&nexthmm->hmm))) {
00510                                         hmm_enter(&nexthmm->hmm,
00511                                                   newscore,
00512                                                   hmm_out_history(&hmm->hmm),
00513                                                   nf);
00514                                     }
00515                                 }
00516                             }
00517                             /* Enter single word-internal phone. */
00518                             else {
00519                                 if ((hmm_frame(&nexthmm->hmm) < cf)
00520                                     || (newscore BETTER_THAN
00521                                         hmm_in_score(&nexthmm->hmm))) {
00522                                     hmm_enter(&nexthmm->hmm, newscore,
00523                                               hmm_out_history(&hmm->hmm), nf);
00524                                 }
00525                             }
00526                         }
00527                     }
00528                     /* Right-context phones - apply word beam and exit. */
00529                     else {
00530                         if (newscore BETTER_THAN wordthresh) {
00531                             ngram_search_save_bp(ngs, cf, w, newscore,
00532                                                  hmm_out_history(&hmm->hmm),
00533                                                  hmm->info.rc_id);
00534                         }
00535                     }
00536                 }
00537                 /* Zero out inactive HMMs. */
00538                 else if (hmm_frame(&hmm->hmm) != nf) {
00539                     hmm_clear_scores(&hmm->hmm);
00540                 }
00541             }
00542         }
00543     }
00544 }
00545 
00546 static void
00547 get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win)
00548 {
00549     int32 f, sf, ef;
00550     ps_latnode_t *node;
00551 
00552     if (!ngs->fwdtree) {
00553         ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
00554         return;
00555     }
00556 
00557     sf = frm - win;
00558     if (sf < 0)
00559         sf = 0;
00560     ef = frm + win;
00561     if (ef > ngs->n_frame)
00562         ef = ngs->n_frame;
00563 
00564     bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
00565     ngs->n_expand_words = 0;
00566 
00567     for (f = sf; f < ef; f++) {
00568         for (node = ngs->frm_wordlist[f]; node; node = node->next) {
00569             if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) {
00570                 ngs->expand_word_list[ngs->n_expand_words++] = node->wid;
00571                 bitvec_set(ngs->expand_word_flag, node->wid);
00572             }
00573         }
00574     }
00575     ngs->expand_word_list[ngs->n_expand_words] = -1;
00576     ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
00577 }
00578 
00579 static void
00580 fwdflat_word_transition(ngram_search_t *ngs, int frame_idx)
00581 {
00582     int32 cf, nf, b, thresh, pip, i, w, newscore;
00583     int32 best_silrc_score = 0, best_silrc_bp = 0;      /* FIXME: good defaults? */
00584     bptbl_t *bp;
00585     int32 *rcss;
00586     root_chan_t *rhmm;
00587     int32 *awl;
00588     float32 lwf;
00589     s3dict_t *dict = ps_search_dict(ngs);
00590     dict2pid_t *d2p = ps_search_dict2pid(ngs);
00591 
00592     cf = frame_idx;
00593     nf = cf + 1;
00594     thresh = ngs->best_score + ngs->fwdflatbeam;
00595     pip = ngs->pip;
00596     best_silrc_score = WORST_SCORE;
00597     lwf = ngs->fwdflat_fwdtree_lw_ratio;
00598 
00599     /* Search for all words starting within a window of this frame.
00600      * These are the successors for words exiting now. */
00601     get_expand_wordlist(ngs, cf, ngs->max_sf_win);
00602 
00603     /* Scan words exited in current frame */
00604     for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) {
00605         xwdssid_t *rssid;
00606         int32 silscore;
00607 
00608         bp = ngs->bp_table + b;
00609         ngs->word_lat_idx[bp->wid] = NO_BP;
00610 
00611         if (bp->wid == ps_search_finish_wid(ngs))
00612             continue;
00613 
00614         /* DICT2PID location */
00615         /* Get the mapping from right context phone ID to index in the
00616          * right context table and the bscore_stack. */
00617         rcss = ngs->bscore_stack + bp->s_idx;
00618         if (bp->last2_phone == -1)
00619             rssid = NULL;
00620         else
00621             rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone);
00622 
00623         /* Transition to all successor words. */
00624         for (i = 0; ngs->expand_word_list[i] >= 0; i++) {
00625             int32 n_used;
00626 
00627             w = ngs->expand_word_list[i];
00628 
00629             /* Get the exit score we recorded in save_bwd_ptr(), or
00630              * something approximating it. */
00631             if (rssid)
00632                 newscore = rcss[rssid->cimap[s3dict_first_phone(dict, w)]];
00633             else
00634                 newscore = rcss[0];
00635             if (newscore == WORST_SCORE)
00636                 continue;
00637             /* FIXME: Floating point... */
00638             newscore += lwf
00639                 * ngram_tg_score(ngs->lmset,
00640                                  s3dict_basewid(dict, w),
00641                                  bp->real_wid,
00642                                  bp->prev_real_wid, &n_used);
00643             newscore += pip;
00644 
00645             /* Enter the next word */
00646             if (newscore BETTER_THAN thresh) {
00647                 rhmm = (root_chan_t *) ngs->word_chan[w];
00648                 if ((hmm_frame(&rhmm->hmm) < cf)
00649                     || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00650                     hmm_enter(&rhmm->hmm, newscore, b, nf);
00651                     /* DICT2PID: This is where mpx ssids get introduced. */
00652                     /* Look up the ssid to use when entering this mpx triphone. */
00653                     hmm_mpx_ssid(&rhmm->hmm, 0) =
00654                         d2p->ldiph_lc[rhmm->ciphone][rhmm->ci2phone]
00655                         [s3dict_last_phone(dict, bp->wid)];
00656                     assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0)));
00657                     E_DEBUG(6,("ssid %d(%d,%d) = %d\n",
00658                                rhmm->ciphone, s3dict_last_phone(dict, bp->wid), rhmm->ci2phone,
00659                                hmm_mpx_ssid(&rhmm->hmm, 0)));
00660                     bitvec_set(ngs->word_active, w);
00661                 }
00662             }
00663         }
00664 
00665         /* Get the best exit into silence. */
00666         if (rssid)
00667             silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]];
00668         else
00669             silscore = rcss[0];
00670         if (silscore BETTER_THAN best_silrc_score) {
00671             best_silrc_score = silscore;
00672             best_silrc_bp = b;
00673         }
00674     }
00675 
00676     /* Transition to <sil> */
00677     newscore = best_silrc_score + ngs->silpen + pip;
00678     if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
00679         w = ps_search_silence_wid(ngs);
00680         rhmm = (root_chan_t *) ngs->word_chan[w];
00681         if ((hmm_frame(&rhmm->hmm) < cf)
00682             || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00683             hmm_enter(&rhmm->hmm, newscore,
00684                       best_silrc_bp, nf);
00685             bitvec_set(ngs->word_active, w);
00686         }
00687     }
00688     /* Transition to noise words */
00689     newscore = best_silrc_score + ngs->fillpen + pip;
00690     if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
00691         for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
00692             rhmm = (root_chan_t *) ngs->word_chan[w];
00693             /* Noise words that aren't a single phone will have NULL here. */
00694             if (rhmm == NULL)
00695                 continue;
00696             if ((hmm_frame(&rhmm->hmm) < cf)
00697                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00698                 hmm_enter(&rhmm->hmm, newscore,
00699                           best_silrc_bp, nf);
00700                 bitvec_set(ngs->word_active, w);
00701             }
00702         }
00703     }
00704 
00705     /* Reset initial channels of words that have become inactive even after word trans. */
00706     i = ngs->n_active_word[cf & 0x1];
00707     awl = ngs->active_word_list[cf & 0x1];
00708     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00709         rhmm = (root_chan_t *) ngs->word_chan[w];
00710         if (hmm_frame(&rhmm->hmm) == cf) {
00711             hmm_clear_scores(&rhmm->hmm);
00712         }
00713     }
00714 }
00715 
00716 static void
00717 fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00718 {
00719     root_chan_t *rhmm;
00720     chan_t *hmm;
00721     int32 i, cf, w, *awl;
00722 
00723     cf = frame_idx;
00724 
00725     /* Renormalize individual word channels */
00726     i = ngs->n_active_word[cf & 0x1];
00727     awl = ngs->active_word_list[cf & 0x1];
00728     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00729         rhmm = (root_chan_t *) ngs->word_chan[w];
00730         if (hmm_frame(&rhmm->hmm) == cf) {
00731             hmm_normalize(&rhmm->hmm, norm);
00732         }
00733         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00734             if (hmm_frame(&hmm->hmm) == cf) {
00735                 hmm_normalize(&hmm->hmm, norm);
00736             }
00737         }
00738     }
00739 
00740     ngs->renormalized = TRUE;
00741 }
00742 
00743 int
00744 ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
00745 {
00746     int16 const *senscr;
00747     int32 nf, i, j;
00748     int32 *nawl;
00749 
00750     /* Activate our HMMs for the current frame if need be. */
00751     if (!ps_search_acmod(ngs)->compallsen)
00752         compute_fwdflat_sen_active(ngs, frame_idx);
00753 
00754     /* Compute GMM scores for the current frame. */
00755     senscr = acmod_score(ps_search_acmod(ngs), &frame_idx);
00756     ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
00757 
00758     /* Mark backpointer table for current frame. */
00759     ngram_search_mark_bptable(ngs, frame_idx);
00760 
00761     /* Renormalize if necessary (FIXME: Make sure to test this) */
00762     if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
00763         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
00764                frame_idx, ngs->best_score);
00765         fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score);
00766     }
00767 
00768     ngs->best_score = WORST_SCORE;
00769     hmm_context_set_senscore(ngs->hmmctx, senscr);
00770 
00771     /* Evaluate HMMs */
00772     fwdflat_eval_chan(ngs, frame_idx);
00773     /* Prune HMMs and do phone transitions. */
00774     fwdflat_prune_chan(ngs, frame_idx);
00775     /* Do word transitions. */
00776     fwdflat_word_transition(ngs, frame_idx);
00777 
00778     /* Create next active word list */
00779     nf = frame_idx + 1;
00780     nawl = ngs->active_word_list[nf & 0x1];
00781     for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00782         if (bitvec_is_set(ngs->word_active, ngs->fwdflat_wordlist[i])) {
00783             *(nawl++) = ngs->fwdflat_wordlist[i];
00784             j++;
00785         }
00786     }
00787     for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) {
00788         if (bitvec_is_set(ngs->word_active, i)) {
00789             *(nawl++) = i;
00790             j++;
00791         }
00792     }
00793     if (!ngs->fwdtree)
00794         ++ngs->n_frame;
00795     ngs->n_active_word[nf & 0x1] = j;
00796 
00797     /* Return the number of frames processed. */
00798     return 1;
00799 }
00800 
00804 static void
00805 destroy_fwdflat_wordlist(ngram_search_t *ngs)
00806 {
00807     ps_latnode_t *node, *tnode;
00808     int32 f;
00809 
00810     if (!ngs->fwdtree)
00811         return;
00812 
00813     for (f = 0; f < ngs->n_frame; f++) {
00814         for (node = ngs->frm_wordlist[f]; node; node = tnode) {
00815             tnode = node->next;
00816             listelem_free(ngs->latnode_alloc, node);
00817         }
00818     }
00819 }
00820 
00824 static void
00825 destroy_fwdflat_chan(ngram_search_t *ngs)
00826 {
00827     int32 i, wid;
00828 
00829     for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00830         root_chan_t *rhmm;
00831         chan_t *thmm;
00832         wid = ngs->fwdflat_wordlist[i];
00833         if (s3dict_pronlen(ps_search_dict(ngs),wid) == 1)
00834             continue;
00835         assert(ngs->word_chan[wid] != NULL);
00836 
00837         /* The first HMM in ngs->word_chan[wid] was allocated with
00838          * ngs->root_chan_alloc, but this will attempt to free it
00839          * using ngs->chan_alloc, which will not work.  Therefore we
00840          * free it manually and move the list forward before handing
00841          * it off. */
00842         rhmm = (root_chan_t *)ngs->word_chan[wid];
00843         thmm = rhmm->next;
00844         listelem_free(ngs->root_chan_alloc, rhmm);
00845         ngs->word_chan[wid] = thmm;
00846         ngram_search_free_all_rc(ngs, wid);
00847     }
00848 }
00849 
00850 void
00851 ngram_fwdflat_finish(ngram_search_t *ngs)
00852 {
00853     int32 cf;
00854 
00855     destroy_fwdflat_chan(ngs);
00856     destroy_fwdflat_wordlist(ngs);
00857     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00858 
00859     /* This is the number of frames processed. */
00860     cf = ps_search_acmod(ngs)->output_frame;
00861     /* Add a mark in the backpointer table for one past the final frame. */
00862     ngram_search_mark_bptable(ngs, cf);
00863 
00864     /* Print out some statistics. */
00865     if (cf > 0) {
00866         E_INFO("%8d words recognized (%d/fr)\n",
00867                ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
00868         E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
00869                (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
00870         E_INFO("%8d channels searched (%d/fr)\n",
00871                ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1));
00872         E_INFO("%8d words searched (%d/fr)\n",
00873                ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1));
00874         E_INFO("%8d word transitions (%d/fr)\n",
00875                ngs->st.n_fwdflat_word_transition,
00876                ngs->st.n_fwdflat_word_transition / (cf + 1));
00877     }
00878 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7