src/libpocketsphinx/ngram_search_fwdtree.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 #include <err.h>
00050 
00051 /* Local headers. */
00052 #include "ngram_search_fwdtree.h"
00053 #include "phone_loop_search.h"
00054 
00055 /* Turn this on to dump channels for debugging */
00056 #define __CHAN_DUMP__           0
00057 #if __CHAN_DUMP__
00058 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00059 #else
00060 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00061 #endif
00062 
00063 /*
00064  * Allocate that part of the search channel tree structure that is independent of the
00065  * LM in use.
00066  */
00067 static void
00068 init_search_tree(ngram_search_t *ngs)
00069 {
00070     int32 w, ndiph, i, n_words, n_ci;
00071     s3dict_t *dict = ps_search_dict(ngs);
00072     bitvec_t *dimap;
00073 
00074     n_words = ps_search_n_words(ngs);
00075     ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set));
00076 
00077     /* Find #single phone words, and #unique first diphones (#root channels) in dict. */
00078     ndiph = 0;
00079     ngs->n_1ph_words = 0;
00080     n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
00081     /* Allocate a bitvector with flags for each possible diphone. */
00082     dimap = bitvec_alloc(n_ci * n_ci);
00083     for (w = 0; w < n_words; w++) {
00084         if (!s3dict_real_word(dict, w))
00085             continue;
00086         if (s3dict_pronlen(dict, w) == 1)
00087             ++ngs->n_1ph_words;
00088         else {
00089             int ph0, ph1;
00090             ph0 = s3dict_pron(dict, w, 0);
00091             ph1 = s3dict_pron(dict, w, 1);
00092             /* Increment ndiph the first time we see a diphone. */
00093             if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
00094                 bitvec_set(dimap, ph0 * n_ci + ph1);
00095                 ++ndiph;
00096             }
00097         }
00098     }
00099     E_INFO("%d unique initial diphones\n", ndiph);
00100     bitvec_free(dimap);
00101 
00102     /* Add remaining dict words (</s>, <s>, <sil>, noise words) to single-phone words */
00103     ngs->n_1ph_words += s3dict_num_fillers(dict) + 2;
00104     ngs->n_root_chan_alloc = ndiph + 1;
00105     /* Verify that these are all *actually* single-phone words,
00106      * otherwise really bad things will happen to us. */
00107     for (w = 0; w < n_words; ++w) {
00108         if (s3dict_real_word(dict, w))
00109             continue;
00110         if (s3dict_pronlen(dict, w) != 1) {
00111             E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
00112                    w, s3dict_wordstr(dict, w));
00113             --ngs->n_1ph_words;
00114         }
00115     }
00116 
00117     /* Allocate and initialize root channels */
00118     ngs->root_chan =
00119         ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
00120     for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00121         hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1);
00122         ngs->root_chan[i].penult_phn_wid = -1;
00123         ngs->root_chan[i].next = NULL;
00124     }
00125 
00126     /* Permanently allocate and initialize channels for single-phone
00127      * words (1/word). */
00128     ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00129     i = 0;
00130     for (w = 0; w < n_words; w++) {
00131         if (s3dict_pronlen(dict, w) != 1)
00132             continue;
00133         /* Use SIL as right context for these. */
00134         ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
00135         ngs->rhmm_1ph[i].ciphone = s3dict_first_phone(dict, w);
00136         hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
00137                  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone),
00138                  ngs->rhmm_1ph[i].ciphone);
00139         ngs->rhmm_1ph[i].next = NULL;
00140 
00141         ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00142         i++;
00143     }
00144 
00145     ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
00146                                        sizeof(*ngs->single_phone_wid));
00147     E_INFO("%d root, %d non-root channels, %d single-phone words\n",
00148            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00149 }
00150 
00151 /*
00152  * One-time initialization of internal channels in HMM tree.
00153  */
00154 static void
00155 init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci)
00156 {
00157     hmm->next = NULL;
00158     hmm->alt = NULL;
00159     hmm->info.penult_phn_wid = -1;
00160     hmm->ciphone = ci;
00161     hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, ci);
00162 }
00163 
00164 /*
00165  * Allocate and initialize search channel-tree structure.
00166  * At this point, all the root-channels have been allocated and partly initialized
00167  * (as per init_search_tree()), and channels for all the single-phone words have been
00168  * allocated and initialized.  None of the interior channels of search-trees have
00169  * been allocated.
00170  * This routine may be called on every utterance, after reinit_search_tree() clears
00171  * the search tree created for the previous utterance.  Meant for reconfiguring the
00172  * search tree to suit the currently active LM.
00173  */
00174 static void
00175 create_search_tree(ngram_search_t *ngs)
00176 {
00177     chan_t *hmm;
00178     root_chan_t *rhmm;
00179     int32 w, i, j, p, ph;
00180     int32 n_words;
00181     s3dict_t *dict = ps_search_dict(ngs);
00182     dict2pid_t *d2p = ps_search_dict2pid(ngs);
00183 
00184     n_words = ps_search_n_words(ngs);
00185 
00186     E_INFO("Creating search tree\n");
00187 
00188     for (w = 0; w < n_words; w++)
00189         ngs->homophone_set[w] = -1;
00190 
00191     E_INFO("before: %d root, %d non-root channels, %d single-phone words\n",
00192            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00193 
00194     ngs->n_1ph_LMwords = 0;
00195     ngs->n_root_chan = 0;
00196     ngs->n_nonroot_chan = 0;
00197 
00198     for (w = 0; w < n_words; w++) {
00199         int ciphone, ci2phone;
00200 
00201         /* Ignore dictionary words not in LM */
00202         if (!ngram_model_set_known_wid(ngs->lmset, s3dict_basewid(dict, w)))
00203             continue;
00204 
00205         /* Handle single-phone words individually; not in channel tree */
00206         if (s3dict_pronlen(dict, w) == 1) {
00207             E_DEBUG(1,("single_phone_wid[%d] = %s\n",
00208                        ngs->n_1ph_LMwords, s3dict_wordstr(dict, w)));
00209             ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
00210             continue;
00211         }
00212 
00213         /* Find a root channel matching the initial diphone, or
00214          * allocate one if not found. */
00215         ciphone = s3dict_pron(dict, w, 0);
00216         ci2phone = s3dict_pron(dict, w, 1);
00217         for (i = 0; i < ngs->n_root_chan; ++i) {
00218             if (ngs->root_chan[i].ciphone == ciphone
00219                 && ngs->root_chan[i].ci2phone == ci2phone)
00220                 break;
00221         }
00222         if (i == ngs->n_root_chan) {
00223             rhmm = &(ngs->root_chan[ngs->n_root_chan]);
00224             rhmm->hmm.tmatid = ciphone;
00225             /* Begin with CI phone?  Not sure this makes a difference... */
00226             hmm_mpx_ssid(&rhmm->hmm, 0) =
00227                 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
00228             rhmm->ciphone = ciphone;
00229             rhmm->ci2phone = ci2phone;
00230             ngs->n_root_chan++;
00231         }
00232         else
00233             rhmm = &(ngs->root_chan[i]);
00234 
00235         E_DEBUG(2, ("word %s rhmm %d\n", s3dict_wordstr(dict, w), rhmm - ngs->root_chan));
00236         /* Now, rhmm = root channel for w.  Go on to remaining phones */
00237         if (s3dict_pronlen(dict, w) == 2) {
00238             /* Next phone is the last; not kept in tree; add w to penult_phn_wid set */
00239             if ((j = rhmm->penult_phn_wid) < 0)
00240                 rhmm->penult_phn_wid = w;
00241             else {
00242                 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00243                 ngs->homophone_set[j] = w;
00244             }
00245         }
00246         else {
00247             /* Add remaining phones, except the last, to tree */
00248             ph = dict2pid_internal(d2p, w, 1);
00249             hmm = rhmm->next;
00250             if (hmm == NULL) {
00251                 rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
00252                 init_nonroot_chan(ngs, hmm, ph, s3dict_pron(dict, w, 1));
00253                 ngs->n_nonroot_chan++;
00254             }
00255             else {
00256                 chan_t *prev_hmm = NULL;
00257 
00258                 for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt)
00259                     prev_hmm = hmm;
00260                 if (!hmm) {     /* thanks, rkm! */
00261                     prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00262                     init_nonroot_chan(ngs, hmm, ph, s3dict_pron(dict, w, 1));
00263                     ngs->n_nonroot_chan++;
00264                 }
00265             }
00266             E_DEBUG(3,("phone %s = %d\n",
00267                        bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
00268                                             s3dict_pron(dict, w, 1)), ph));
00269 
00270             for (p = 2; p < s3dict_pronlen(dict, w) - 1; p++) {
00271                 ph = dict2pid_internal(d2p, w, p);
00272                 if (!hmm->next) {
00273                     hmm->next = listelem_malloc(ngs->chan_alloc);
00274                     hmm = hmm->next;
00275                     init_nonroot_chan(ngs, hmm, ph, s3dict_pron(dict, w, p));
00276                     ngs->n_nonroot_chan++;
00277                 }
00278                 else {
00279                     chan_t *prev_hmm = NULL;
00280 
00281                     for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
00282                          hmm = hmm->alt)
00283                         prev_hmm = hmm;
00284                     if (!hmm) { /* thanks, rkm! */
00285                         prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00286                         init_nonroot_chan(ngs, hmm, ph, s3dict_pron(dict, w, p));
00287                         ngs->n_nonroot_chan++;
00288                     }
00289                 }
00290                 E_DEBUG(3,("phone %s = %d\n",
00291                            bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
00292                                                 s3dict_pron(dict, w, p)), ph));
00293             }
00294 
00295             /* All but last phone of w in tree; add w to hmm->info.penult_phn_wid set */
00296             if ((j = hmm->info.penult_phn_wid) < 0)
00297                 hmm->info.penult_phn_wid = w;
00298             else {
00299                 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00300                 ngs->homophone_set[j] = w;
00301             }
00302         }
00303     }
00304 
00305     ngs->n_1ph_words = ngs->n_1ph_LMwords;
00306 
00307     /* Add filler words to the array of 1ph words. */
00308     for (w = 0; w < n_words; ++w) {
00309         /* Skip anything that doesn't actually have a single phone. */
00310         if (s3dict_pronlen(dict, w) != 1)
00311             continue;
00312         /* Also skip "real words" and things that are in the LM. */
00313         if (s3dict_real_word(dict, w))
00314             continue;
00315         if (ngram_model_set_known_wid(ngs->lmset, s3dict_basewid(dict, w)))
00316             continue;
00317         E_DEBUG(1,("single_phone_wid[%d] = %s\n",
00318                    ngs->n_1ph_words, s3dict_wordstr(dict, w)));
00319         ngs->single_phone_wid[ngs->n_1ph_words++] = w;
00320     }
00321 
00322     if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
00323         /* Give some room for channels for new words added dynamically at run time */
00324         ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
00325         E_INFO("after: max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
00326 
00327         /* Free old active channel list array if any and allocate new one */
00328         if (ngs->active_chan_list)
00329             ckd_free_2d(ngs->active_chan_list);
00330         ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
00331                                               sizeof(**ngs->active_chan_list));
00332     }
00333 
00334     E_INFO("after: %d root, %d non-root channels, %d single-phone words\n",
00335            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00336 }
00337 
00338 static void
00339 reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
00340 {
00341     chan_t *child, *sibling;
00342 
00343     /* First free all children under hmm */
00344     for (child = hmm->next; child; child = sibling) {
00345         sibling = child->alt;
00346         reinit_search_subtree(ngs, child);
00347     }
00348 
00349     /* Now free hmm */
00350     hmm_deinit(&hmm->hmm);
00351     listelem_free(ngs->chan_alloc, hmm);
00352 }
00353 
00354 /*
00355  * Delete search tree by freeing all interior channels within search tree and
00356  * restoring root channel state to the init state (i.e., just after init_search_tree()).
00357  */
00358 static void
00359 reinit_search_tree(ngram_search_t *ngs)
00360 {
00361     int32 i;
00362     chan_t *hmm, *sibling;
00363 
00364     for (i = 0; i < ngs->n_root_chan; i++) {
00365         hmm = ngs->root_chan[i].next;
00366 
00367         while (hmm) {
00368             sibling = hmm->alt;
00369             reinit_search_subtree(ngs, hmm);
00370             hmm = sibling;
00371         }
00372 
00373         ngs->root_chan[i].penult_phn_wid = -1;
00374         ngs->root_chan[i].next = NULL;
00375     }
00376     ngs->n_nonroot_chan = 0;
00377 }
00378 
00379 void
00380 ngram_fwdtree_init(ngram_search_t *ngs)
00381 {
00382     /* Allocate bestbp_rc, lastphn_cand, last_ltrans */
00383     ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
00384                                 sizeof(*ngs->bestbp_rc));
00385     ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
00386                                    sizeof(*ngs->lastphn_cand));
00387     init_search_tree(ngs);
00388     create_search_tree(ngs);
00389 }
00390 
00391 void
00392 ngram_fwdtree_deinit(ngram_search_t *ngs)
00393 {
00394     int i, w, n_words;
00395 
00396     n_words = ps_search_n_words(ngs);
00397     /* Reset non-root channels. */
00398     reinit_search_tree(ngs);
00399 
00400     /* Now deallocate all the root channels too. */
00401     for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00402         hmm_deinit(&ngs->root_chan[i].hmm);
00403     }
00404     if (ngs->rhmm_1ph) {
00405         for (i = w = 0; w < n_words; ++w) {
00406             if (s3dict_pronlen(ps_search_dict(ngs), w) != 1)
00407                 continue;
00408             hmm_deinit(&ngs->rhmm_1ph[i].hmm);
00409             ++i;
00410         }
00411         ckd_free(ngs->rhmm_1ph);
00412     }
00413     ngs->n_nonroot_chan = 0;
00414     ckd_free(ngs->root_chan);
00415     ckd_free(ngs->homophone_set);
00416     ckd_free(ngs->single_phone_wid);
00417     ngs->max_nonroot_chan = 0;
00418     ckd_free_2d(ngs->active_chan_list);
00419     ckd_free(ngs->cand_sf);
00420     ckd_free(ngs->bestbp_rc);
00421     ckd_free(ngs->lastphn_cand);
00422 }
00423 
00424 int
00425 ngram_fwdtree_reinit(ngram_search_t *ngs)
00426 {
00427     reinit_search_tree(ngs);
00428     create_search_tree(ngs);
00429     return 0;
00430 }
00431 
00432 void
00433 ngram_fwdtree_start(ngram_search_t *ngs)
00434 {
00435     ps_search_t *base = (ps_search_t *)ngs;
00436     int32 i, w, n_words;
00437     root_chan_t *rhmm;
00438 
00439     n_words = ps_search_n_words(ngs);
00440 
00441     /* Reset utterance statistics. */
00442     memset(&ngs->st, 0, sizeof(ngs->st));
00443 
00444     /* Reset backpointer table. */
00445     ngs->bpidx = 0;
00446     ngs->bss_head = 0;
00447 
00448     /* Reset word lattice. */
00449     for (i = 0; i < n_words; ++i)
00450         ngs->word_lat_idx[i] = NO_BP;
00451 
00452     /* Reset active HMM and word lists. */
00453     ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
00454     ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
00455 
00456     /* Reset scores. */
00457     ngs->best_score = 0;
00458     ngs->renormalized = 0;
00459 
00460     /* Reset other stuff. */
00461     for (i = 0; i < n_words; i++)
00462         ngs->last_ltrans[i].sf = -1;
00463     ngs->n_frame = 0;
00464 
00465     /* Clear the hypothesis string. */
00466     ckd_free(base->hyp_str);
00467     base->hyp_str = NULL;
00468 
00469     /* Reset the permanently allocated single-phone words, since they
00470      * may have junk left over in them from FWDFLAT. */
00471     for (i = 0; i < ngs->n_1ph_words; i++) {
00472         w = ngs->single_phone_wid[i];
00473         rhmm = (root_chan_t *) ngs->word_chan[w];
00474         hmm_clear(&rhmm->hmm);
00475     }
00476 
00477     /* Start search with <s>; word_chan[<s>] is permanently allocated */
00478     rhmm = (root_chan_t *) ngs->word_chan[s3dict_startwid(ps_search_dict(ngs))];
00479     hmm_clear(&rhmm->hmm);
00480     hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00481 }
00482 
00483 /*
00484  * Mark the active senones for all senones belonging to channels that are active in the
00485  * current frame.
00486  */
00487 static void
00488 compute_sen_active(ngram_search_t *ngs, int frame_idx)
00489 {
00490     root_chan_t *rhmm;
00491     chan_t *hmm, **acl;
00492     int32 i, w, *awl;
00493 
00494     acmod_clear_active(ps_search_acmod(ngs));
00495 
00496     /* Flag active senones for root channels */
00497     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00498         if (hmm_frame(&rhmm->hmm) == frame_idx)
00499             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00500     }
00501 
00502     /* Flag active senones for nonroot channels in HMM tree */
00503     i = ngs->n_active_chan[frame_idx & 0x1];
00504     acl = ngs->active_chan_list[frame_idx & 0x1];
00505     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00506         acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00507     }
00508 
00509     /* Flag active senones for individual word channels */
00510     i = ngs->n_active_word[frame_idx & 0x1];
00511     awl = ngs->active_word_list[frame_idx & 0x1];
00512     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00513         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00514             acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00515         }
00516     }
00517     for (i = 0; i < ngs->n_1ph_words; i++) {
00518         w = ngs->single_phone_wid[i];
00519         rhmm = (root_chan_t *) ngs->word_chan[w];
00520 
00521         if (hmm_frame(&rhmm->hmm) == frame_idx)
00522             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00523     }
00524 }
00525 
00526 static void
00527 renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00528 {
00529     root_chan_t *rhmm;
00530     chan_t *hmm, **acl;
00531     int32 i, w, *awl;
00532 
00533     /* Renormalize root channels */
00534     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00535         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00536             hmm_normalize(&rhmm->hmm, norm);
00537         }
00538     }
00539 
00540     /* Renormalize nonroot channels in HMM tree */
00541     i = ngs->n_active_chan[frame_idx & 0x1];
00542     acl = ngs->active_chan_list[frame_idx & 0x1];
00543     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00544         hmm_normalize(&hmm->hmm, norm);
00545     }
00546 
00547     /* Renormalize individual word channels */
00548     i = ngs->n_active_word[frame_idx & 0x1];
00549     awl = ngs->active_word_list[frame_idx & 0x1];
00550     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00551         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00552             hmm_normalize(&hmm->hmm, norm);
00553         }
00554     }
00555     for (i = 0; i < ngs->n_1ph_words; i++) {
00556         w = ngs->single_phone_wid[i];
00557         rhmm = (root_chan_t *) ngs->word_chan[w];
00558         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00559             hmm_normalize(&rhmm->hmm, norm);
00560         }
00561     }
00562 
00563     ngs->renormalized = TRUE;
00564 }
00565 
00566 static int32
00567 eval_root_chan(ngram_search_t *ngs, int frame_idx)
00568 {
00569     root_chan_t *rhmm;
00570     int32 i, bestscore;
00571 
00572     bestscore = WORST_SCORE;
00573     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00574         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00575             int32 score = chan_v_eval(rhmm);
00576             if (score BETTER_THAN bestscore)
00577                 bestscore = score;
00578             ++ngs->st.n_root_chan_eval;
00579         }
00580     }
00581     return (bestscore);
00582 }
00583 
00584 static int32
00585 eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00586 {
00587     chan_t *hmm, **acl;
00588     int32 i, bestscore;
00589 
00590     i = ngs->n_active_chan[frame_idx & 0x1];
00591     acl = ngs->active_chan_list[frame_idx & 0x1];
00592     bestscore = WORST_SCORE;
00593     ngs->st.n_nonroot_chan_eval += i;
00594 
00595     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00596         int32 score = chan_v_eval(hmm);
00597         assert(hmm_frame(&hmm->hmm) == frame_idx);
00598         if (score BETTER_THAN bestscore)
00599             bestscore = score;
00600     }
00601 
00602     return bestscore;
00603 }
00604 
00605 static int32
00606 eval_word_chan(ngram_search_t *ngs, int frame_idx)
00607 {
00608     root_chan_t *rhmm;
00609     chan_t *hmm;
00610     int32 i, w, bestscore, *awl, j, k;
00611 
00612     k = 0;
00613     bestscore = WORST_SCORE;
00614     awl = ngs->active_word_list[frame_idx & 0x1];
00615 
00616     i = ngs->n_active_word[frame_idx & 0x1];
00617     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00618         assert(bitvec_is_set(ngs->word_active, w));
00619         bitvec_clear(ngs->word_active, w);
00620         assert(ngs->word_chan[w] != NULL);
00621 
00622         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00623             int32 score;
00624 
00625             assert(hmm_frame(&hmm->hmm) == frame_idx);
00626             score = chan_v_eval(hmm);
00627             /*printf("eval word chan %d score %d\n", w, score); */
00628 
00629             if (score BETTER_THAN bestscore)
00630                 bestscore = score;
00631 
00632             k++;
00633         }
00634     }
00635 
00636     /* Similarly for statically allocated single-phone words */
00637     j = 0;
00638     for (i = 0; i < ngs->n_1ph_words; i++) {
00639         int32 score;
00640 
00641         w = ngs->single_phone_wid[i];
00642         rhmm = (root_chan_t *) ngs->word_chan[w];
00643         if (hmm_frame(&rhmm->hmm) < frame_idx)
00644             continue;
00645 
00646         score = chan_v_eval(rhmm);
00647         /* printf("eval 1ph word chan %d score %d\n", w, score); */
00648         if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
00649             bestscore = score;
00650 
00651         j++;
00652     }
00653 
00654     ngs->st.n_last_chan_eval += k + j;
00655     ngs->st.n_nonroot_chan_eval += k + j;
00656     ngs->st.n_word_lastchan_eval +=
00657         ngs->n_active_word[frame_idx & 0x1] + j;
00658 
00659     return bestscore;
00660 }
00661 
00662 static int32
00663 evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
00664 {
00665     int32 bs;
00666 
00667     hmm_context_set_senscore(ngs->hmmctx, senone_scores);
00668     ngs->best_score = eval_root_chan(ngs, frame_idx);
00669     if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
00670         ngs->best_score = bs;
00671     if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
00672         ngs->best_score = bs;
00673     ngs->last_phone_best_score = bs;
00674 
00675     return ngs->best_score;
00676 }
00677 
00678 /*
00679  * Prune currently active root channels for next frame.  Also, perform exit
00680  * transitions out of them and activate successors.
00681  * score[] of pruned root chan set to WORST_SCORE elsewhere.
00682  */
00683 static void
00684 prune_root_chan(ngram_search_t *ngs, int frame_idx)
00685 {
00686     root_chan_t *rhmm;
00687     chan_t *hmm;
00688     int32 i, nf, w;
00689     int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00690     chan_t **nacl;              /* next active list */
00691     lastphn_cand_t *candp;
00692     phone_loop_search_t *pls;
00693 
00694     nf = frame_idx + 1;
00695     thresh = ngs->best_score + ngs->dynamic_beam;
00696     newphone_thresh = ngs->best_score + ngs->pbeam;
00697     lastphn_thresh = ngs->best_score + ngs->lpbeam;
00698     nacl = ngs->active_chan_list[nf & 0x1];
00699     pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
00700 
00701     for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
00702         E_DEBUG(3,("Root channel %d frame %d score %d thresh %d\n",
00703                    i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh));
00704         /* First check if this channel was active in current frame */
00705         if (hmm_frame(&rhmm->hmm) < frame_idx)
00706             continue;
00707 
00708         if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
00709             hmm_frame(&rhmm->hmm) = nf;  /* rhmm will be active in next frame */
00710             E_DEBUG(3, ("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)));
00711             /* transitions out of this root channel */
00712             /* transition to all next-level channels in the HMM tree */
00713             for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
00714                 newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip
00715                     + phone_loop_search_score(pls, hmm->ciphone);
00716                 if (newphone_score BETTER_THAN newphone_thresh) {
00717                     if ((hmm_frame(&hmm->hmm) < frame_idx)
00718                         || (newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) {
00719                         hmm_enter(&hmm->hmm, newphone_score,
00720                                   hmm_out_history(&rhmm->hmm), nf);
00721                         *(nacl++) = hmm;
00722                     }
00723                 }
00724             }
00725 
00726             /*
00727              * Transition to last phone of all words for which this is the
00728              * penultimate phone (the last phones may need multiple right contexts).
00729              * Remember to remove the temporary newword_penalty.
00730              */
00731             for (w = rhmm->penult_phn_wid; w >= 0;
00732                  w = ngs->homophone_set[w]) {
00733                 newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip
00734                     + phone_loop_search_score(pls, s3dict_last_phone(ps_search_dict(ngs),w));
00735                 E_DEBUG(3, ("wid %d newphone_score %d\n", w, newphone_score));
00736                 E_DEBUG(3, ("wid %d newphone_score %d\n", w, newphone_score));
00737                 if (newphone_score BETTER_THAN lastphn_thresh) {
00738                     candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00739                     ngs->n_lastphn_cand++;
00740                     candp->wid = w;
00741                     candp->score =
00742                         newphone_score - ngs->nwpen;
00743                     candp->bp = hmm_out_history(&rhmm->hmm);
00744                 }
00745             }
00746         }
00747     }
00748     ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00749 }
00750 
00751 /*
00752  * Prune currently active nonroot channels in HMM tree for next frame.  Also, perform
00753  * exit transitions out of such channels and activate successors.
00754  */
00755 static void
00756 prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00757 {
00758     chan_t *hmm, *nexthmm;
00759     int32 nf, w, i;
00760     int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00761     chan_t **acl, **nacl;       /* active list, next active list */
00762     lastphn_cand_t *candp;
00763     phone_loop_search_t *pls;
00764 
00765     nf = frame_idx + 1;
00766 
00767     thresh = ngs->best_score + ngs->dynamic_beam;
00768     newphone_thresh = ngs->best_score + ngs->pbeam;
00769     lastphn_thresh = ngs->best_score + ngs->lpbeam;
00770     pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
00771 
00772     acl = ngs->active_chan_list[frame_idx & 0x1];   /* currently active HMMs in tree */
00773     nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
00774 
00775     for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
00776          --i, hmm = *(acl++)) {
00777         assert(hmm_frame(&hmm->hmm) >= frame_idx);
00778 
00779         if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
00780             /* retain this channel in next frame */
00781             if (hmm_frame(&hmm->hmm) != nf) {
00782                 hmm_frame(&hmm->hmm) = nf;
00783                 *(nacl++) = hmm;
00784             }
00785 
00786             /* transition to all next-level channel in the HMM tree */
00787             for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
00788                 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip
00789                     + phone_loop_search_score(pls, nexthmm->ciphone);
00790                 if ((newphone_score BETTER_THAN newphone_thresh)
00791                     && ((hmm_frame(&nexthmm->hmm) < frame_idx)
00792                         || (newphone_score
00793                             BETTER_THAN hmm_in_score(&nexthmm->hmm)))) {
00794                     if (hmm_frame(&nexthmm->hmm) != nf) {
00795                         /* Keep this HMM on the active list */
00796                         *(nacl++) = nexthmm;
00797                     }
00798                     hmm_enter(&nexthmm->hmm, newphone_score,
00799                               hmm_out_history(&hmm->hmm), nf);
00800                 }
00801             }
00802 
00803             /*
00804              * Transition to last phone of all words for which this is the
00805              * penultimate phone (the last phones may need multiple right contexts).
00806              * Remember to remove the temporary newword_penalty.
00807              */
00808             for (w = hmm->info.penult_phn_wid; w >= 0;
00809                  w = ngs->homophone_set[w]) {
00810                 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip
00811                     + phone_loop_search_score(pls, s3dict_last_phone(ps_search_dict(ngs),w));
00812                 if (newphone_score BETTER_THAN lastphn_thresh) {
00813                     candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00814                     ngs->n_lastphn_cand++;
00815                     candp->wid = w;
00816                     candp->score =
00817                         newphone_score - ngs->nwpen;
00818                     candp->bp = hmm_out_history(&hmm->hmm);
00819                 }
00820             }
00821         }
00822         else if (hmm_frame(&hmm->hmm) != nf) {
00823             hmm_clear_scores(&hmm->hmm);
00824         }
00825     }
00826     ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00827 }
00828 
00829 /*
00830  * Execute the transition into the last phone for all candidates words emerging from
00831  * the HMM tree.  Attach LM scores to such transitions.
00832  * (Executed after pruning root and non-root, but before pruning word-chan.)
00833  */
00834 static void
00835 last_phone_transition(ngram_search_t *ngs, int frame_idx)
00836 {
00837     int32 i, j, k, nf, bp, bplast, w;
00838     lastphn_cand_t *candp;
00839     int32 *nawl;
00840     int32 thresh;
00841     int32 bestscore, dscr;
00842     chan_t *hmm;
00843     bptbl_t *bpe;
00844     int32 n_cand_sf = 0;
00845 
00846     nf = frame_idx + 1;
00847     nawl = ngs->active_word_list[nf & 0x1];
00848     ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
00849 
00850     /* For each candidate word (entering its last phone) */
00851     /* If best LM score and bp for candidate known use it, else sort cands by startfrm */
00852     E_DEBUG(3, ("n_lastphn_cand %d\n", ngs->n_lastphn_cand));
00853     for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00854         /* Backpointer entry for it. */
00855         bpe = &(ngs->bp_table[candp->bp]);
00856 
00857         /* Subtract starting score for candidate, leave it with only word score */
00858         candp->score -=
00859             ngram_search_exit_score
00860             (ngs, bpe, s3dict_first_phone(ps_search_dict(ngs), candp->wid));
00861         E_DEBUG(4, ("candp->score %d\n", candp->score));
00862         E_DEBUG(4, ("candp->score %d\n", candp->score));
00863 
00864         /*
00865          * If this candidate not occurred in an earlier frame, prepare for finding
00866          * best transition score into last phone; sort by start frame.
00867          */
00868         /* i.e. if we don't have an entry in last_ltrans for this
00869          * <word,sf>, then create one */
00870         if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
00871             /* Look for an entry in cand_sf matching the backpointer
00872              * for this candidate. */
00873             for (j = 0; j < n_cand_sf; j++) {
00874                 if (ngs->cand_sf[j].bp_ef == bpe->frame)
00875                     break;
00876             }
00877             /* Oh, we found one, so chain onto it. */
00878             if (j < n_cand_sf)
00879                 candp->next = ngs->cand_sf[j].cand;
00880             else {
00881                 /* Nope, let's make a new one, allocating cand_sf if necessary. */
00882                 if (n_cand_sf >= ngs->cand_sf_alloc) {
00883                     if (ngs->cand_sf_alloc == 0) {
00884                         ngs->cand_sf =
00885                             ckd_calloc(CAND_SF_ALLOCSIZE,
00886                                        sizeof(*ngs->cand_sf));
00887                         ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
00888                     }
00889                     else {
00890                         ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
00891                         ngs->cand_sf = ckd_realloc(ngs->cand_sf,
00892                                                    ngs->cand_sf_alloc
00893                                                    * sizeof(*ngs->cand_sf));
00894                         E_INFO("cand_sf[] increased to %d entries\n",
00895                                ngs->cand_sf_alloc);
00896                     }
00897                 }
00898 
00899                 /* Use the newly created cand_sf. */
00900                 j = n_cand_sf++;
00901                 candp->next = -1; /* End of the chain. */
00902                 ngs->cand_sf[j].bp_ef = bpe->frame;
00903             }
00904             /* Update it to point to this candidate. */
00905             ngs->cand_sf[j].cand = i;
00906 
00907             ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
00908             ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
00909         }
00910     }
00911 
00912     /* Compute best LM score and bp for new cands entered in the sorted lists above */
00913     for (i = 0; i < n_cand_sf; i++) {
00914         /* For the i-th unique end frame... */
00915         bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
00916         bplast = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1] - 1;
00917 
00918         for (bpe = &(ngs->bp_table[bp]); bp <= bplast; bp++, bpe++) {
00919             if (!bpe->valid)
00920                 continue;
00921             /* For each candidate at the start frame find bp->cand transition-score */
00922             for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
00923                 int32 n_used;
00924                 candp = &(ngs->lastphn_cand[j]);
00925                 dscr = 
00926                     ngram_search_exit_score
00927                     (ngs, bpe, s3dict_first_phone(ps_search_dict(ngs), candp->wid));
00928                 dscr += ngram_tg_score(ngs->lmset,
00929                                        s3dict_basewid(ps_search_dict(ngs), candp->wid),
00930                                        bpe->real_wid,
00931                                        bpe->prev_real_wid, &n_used);
00932 
00933                 if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
00934                     ngs->last_ltrans[candp->wid].dscr = dscr;
00935                     ngs->last_ltrans[candp->wid].bp = bp;
00936                 }
00937             }
00938         }
00939     }
00940 
00941     /* Update best transitions for all candidates; also update best lastphone score */
00942     bestscore = ngs->last_phone_best_score;
00943     for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00944         candp->score += ngs->last_ltrans[candp->wid].dscr;
00945         candp->bp = ngs->last_ltrans[candp->wid].bp;
00946 
00947         if (candp->score BETTER_THAN bestscore)
00948             bestscore = candp->score;
00949     }
00950     ngs->last_phone_best_score = bestscore;
00951 
00952     /* At this pt, we know the best entry score (with LM component) for all candidates */
00953     thresh = bestscore + ngs->lponlybeam;
00954     for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
00955         if (candp->score BETTER_THAN thresh) {
00956             w = candp->wid;
00957 
00958             ngram_search_alloc_all_rc(ngs, w);
00959 
00960             k = 0;
00961             for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00962                 if ((hmm_frame(&hmm->hmm) < frame_idx)
00963                     || (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) {
00964                     assert(hmm_frame(&hmm->hmm) != nf);
00965                     hmm_enter(&hmm->hmm,
00966                               candp->score, candp->bp, nf);
00967                     k++;
00968                 }
00969             }
00970             if (k > 0) {
00971                 assert(bitvec_is_clear(ngs->word_active, w));
00972                 assert(s3dict_pronlen(ps_search_dict(ngs), w) > 1);
00973                 *(nawl++) = w;
00974                 bitvec_set(ngs->word_active, w);
00975             }
00976         }
00977     }
00978     ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
00979 }
00980 
00981 /*
00982  * Prune currently active word channels for next frame.  Also, perform exit
00983  * transitions out of such channels and active successors.
00984  */
00985 static void
00986 prune_word_chan(ngram_search_t *ngs, int frame_idx)
00987 {
00988     root_chan_t *rhmm;
00989     chan_t *hmm, *thmm;
00990     chan_t **phmmp;             /* previous HMM-pointer */
00991     int32 nf, w, i, k;
00992     int32 newword_thresh, lastphn_thresh;
00993     int32 *awl, *nawl;
00994 
00995     nf = frame_idx + 1;
00996     newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
00997     lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
00998 
00999     awl = ngs->active_word_list[frame_idx & 0x1];
01000     nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
01001 
01002     /* Dynamically allocated last channels of multi-phone words */
01003     for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
01004          --i, w = *(awl++)) {
01005         k = 0;
01006         phmmp = &(ngs->word_chan[w]);
01007         for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
01008             assert(hmm_frame(&hmm->hmm) >= frame_idx);
01009 
01010             thmm = hmm->next;
01011             if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) {
01012                 /* retain this channel in next frame */
01013                 hmm_frame(&hmm->hmm) = nf;
01014                 k++;
01015                 phmmp = &(hmm->next);
01016 
01017                 /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
01018                 if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) {
01019                     /* can exit channel and recognize word */
01020                     ngram_search_save_bp(ngs, frame_idx, w,
01021                                  hmm_out_score(&hmm->hmm),
01022                                  hmm_out_history(&hmm->hmm),
01023                                  hmm->info.rc_id);
01024                 }
01025             }
01026             else if (hmm_frame(&hmm->hmm) == nf) {
01027                 phmmp = &(hmm->next);
01028             }
01029             else {
01030                 hmm_deinit(&hmm->hmm);
01031                 listelem_free(ngs->chan_alloc, hmm);
01032                 *phmmp = thmm;
01033             }
01034         }
01035         if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
01036             assert(s3dict_pronlen(ps_search_dict(ngs), w) > 1);
01037             *(nawl++) = w;
01038             bitvec_set(ngs->word_active, w);
01039         }
01040     }
01041     ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
01042 
01043     /*
01044      * Prune permanently allocated single-phone channels.
01045      * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere.
01046      */
01047     for (i = 0; i < ngs->n_1ph_words; i++) {
01048         w = ngs->single_phone_wid[i];
01049         rhmm = (root_chan_t *) ngs->word_chan[w];
01050         E_DEBUG(3,("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
01051                    s3dict_wordstr(ps_search_dict(ngs),w),
01052                    hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm),
01053                    lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh));
01054         if (hmm_frame(&rhmm->hmm) < frame_idx)
01055             continue;
01056         if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) {
01057             hmm_frame(&rhmm->hmm) = nf;
01058 
01059             /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
01060             if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) {
01061                 E_DEBUG(4,("Exiting single phone word %s with %d > %d, %d\n",
01062                            s3dict_wordstr(ps_search_dict(ngs),w),
01063                            hmm_out_score(&rhmm->hmm),
01064                            lastphn_thresh, newword_thresh));
01065                 ngram_search_save_bp(ngs, frame_idx, w,
01066                              hmm_out_score(&rhmm->hmm),
01067                              hmm_out_history(&rhmm->hmm), 0);
01068             }
01069         }
01070     }
01071 }
01072 
01073 static void
01074 prune_channels(ngram_search_t *ngs, int frame_idx)
01075 {
01076     /* Clear last phone candidate list. */
01077     ngs->n_lastphn_cand = 0;
01078     /* Set the dynamic beam based on maxhmmpf here. */
01079     ngs->dynamic_beam = ngs->beam;
01080     if (ngs->maxhmmpf != -1
01081         && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
01082         /* Build a histogram to approximately prune them. */
01083         int32 bins[256], bw, nhmms, i;
01084         root_chan_t *rhmm;
01085         chan_t **acl, *hmm;
01086 
01087         /* Bins go from zero (best score) to edge of beam. */
01088         bw = -ngs->beam / 256;
01089         memset(bins, 0, sizeof(bins));
01090         /* For each active root channel. */
01091         for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
01092             int32 b;
01093 
01094             /* Put it in a bin according to its bestscore. */
01095             b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
01096             if (b >= 256)
01097                 b = 255;
01098             ++bins[b];
01099         }
01100         /* For each active non-root channel. */
01101         acl = ngs->active_chan_list[frame_idx & 0x1];       /* currently active HMMs in tree */
01102         for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
01103              i > 0; --i, hmm = *(acl++)) {
01104             int32 b;
01105 
01106             /* Put it in a bin according to its bestscore. */
01107             b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
01108             if (b >= 256)
01109                 b = 255;
01110             ++bins[b];
01111         }
01112         /* Walk down the bins to find the new beam. */
01113         for (i = nhmms = 0; i < 256; ++i) {
01114             nhmms += bins[i];
01115             if (nhmms > ngs->maxhmmpf)
01116                 break;
01117         }
01118         ngs->dynamic_beam = -(i * bw);
01119     }
01120 
01121     prune_root_chan(ngs, frame_idx);
01122     prune_nonroot_chan(ngs, frame_idx);
01123     last_phone_transition(ngs, frame_idx);
01124     prune_word_chan(ngs, frame_idx);
01125 }
01126 
01127 /*
01128  * Limit the number of word exits in each frame to maxwpf.  And also limit the number of filler
01129  * words to 1.
01130  */
01131 static void
01132 bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
01133 {
01134     int32 bp, n;
01135     int32 bestscr, worstscr;
01136     bptbl_t *bpe, *bestbpe, *worstbpe;
01137 
01138     /* Don't prune if no pruing. */
01139     if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
01140         return;
01141 
01142     /* Allow only one filler word exit (the best) per frame */
01143     bestscr = (int32) 0x80000000;
01144     bestbpe = NULL;
01145     n = 0;
01146     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01147         bpe = &(ngs->bp_table[bp]);
01148         if (s3dict_filler_word(ps_search_dict(ngs), bpe->wid)) {
01149             if (bpe->score BETTER_THAN bestscr) {
01150                 bestscr = bpe->score;
01151                 bestbpe = bpe;
01152             }
01153             bpe->valid = FALSE; /* Flag to indicate invalidation */
01154             n++;                /* No. of filler words */
01155         }
01156     }
01157     /* Restore bestbpe to valid state */
01158     if (bestbpe != NULL) {
01159         bestbpe->valid = TRUE;
01160         --n;
01161     }
01162 
01163     /* Allow up to maxwpf best entries to survive; mark the remaining with valid = 0 */
01164     n = (ngs->bpidx
01165          - ngs->bp_table_idx[frame_idx]) - n;  /* No. of entries after limiting fillers */
01166     for (; n > ngs->maxwpf; --n) {
01167         /* Find worst BPTable entry */
01168         worstscr = (int32) 0x7fffffff;
01169         worstbpe = NULL;
01170         for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
01171             bpe = &(ngs->bp_table[bp]);
01172             if (bpe->valid && (bpe->score WORSE_THAN worstscr)) {
01173                 worstscr = bpe->score;
01174                 worstbpe = bpe;
01175             }
01176         }
01177         /* FIXME: Don't panic! */
01178         if (worstbpe == NULL)
01179             E_FATAL("PANIC: No worst BPtable entry remaining\n");
01180         worstbpe->valid = 0;
01181     }
01182 }
01183 
01184 static void
01185 word_transition(ngram_search_t *ngs, int frame_idx)
01186 {
01187     int32 i, k, bp, w, nf;
01188     int32 rc;
01189     int32 *rcss;                /* right context score stack */
01190     int32 thresh, newscore;
01191     bptbl_t *bpe;
01192     root_chan_t *rhmm;
01193     struct bestbp_rc_s *bestbp_rc_ptr;
01194     phone_loop_search_t *pls;
01195     s3dict_t *dict = ps_search_dict(ngs);
01196     dict2pid_t *d2p = ps_search_dict2pid(ngs);
01197 
01198     /*
01199      * Transition to start of new word instances (HMM tree roots); but only if words
01200      * other than </s> finished here.
01201      * But, first, find the best starting score for each possible right context phone.
01202      */
01203     for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
01204         ngs->bestbp_rc[i].score = WORST_SCORE;
01205     k = 0;
01206     pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
01207     /* Ugh, this is complicated.  Scan all word exits for this frame
01208      * (they have already been created by prune_word_chan()). */
01209     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01210         bpe = &(ngs->bp_table[bp]);
01211         ngs->word_lat_idx[bpe->wid] = NO_BP;
01212 
01213         if (bpe->wid == ps_search_finish_wid(ngs))
01214             continue;
01215         k++;
01216 
01217         /* DICT2PID */
01218         /* Array of HMM scores corresponding to all the possible right
01219          * context expansions of the final phone.  It's likely that a
01220          * lot of these are going to be missing, actually. */
01221         rcss = &(ngs->bscore_stack[bpe->s_idx]);
01222         if (bpe->last2_phone == -1) {
01223             /* No right context expansion. */
01224             for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
01225                 if (rcss[0] BETTER_THAN ngs->bestbp_rc[rc].score) {
01226                     E_DEBUG(4,("bestbp_rc[0] = %d lc %d\n", rcss[0], bpe->last_phone));
01227                     ngs->bestbp_rc[rc].score = rcss[0];
01228                     ngs->bestbp_rc[rc].path = bp;
01229                     ngs->bestbp_rc[rc].lc = bpe->last_phone;
01230                 }
01231             }
01232         }
01233         else {
01234             xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone);
01235             for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
01236                 if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
01237                     E_DEBUG(4,("bestbp_rc[%d] = %d lc %d\n",
01238                                rc, rcss[rssid->cimap[rc]], bpe->last_phone));
01239                     ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]];
01240                     ngs->bestbp_rc[rc].path = bp;
01241                     ngs->bestbp_rc[rc].lc = bpe->last_phone;
01242                 }
01243             }
01244         }
01245     }
01246     if (k == 0)
01247         return;
01248 
01249     nf = frame_idx + 1;
01250     thresh = ngs->best_score + ngs->dynamic_beam;
01251     /*
01252      * Hypothesize successors to words finished in this frame.
01253      * Main dictionary, multi-phone words transition to HMM-trees roots.
01254      */
01255     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01256         bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
01257 
01258         newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip
01259             + phone_loop_search_score(pls, rhmm->ciphone);
01260         if (newscore BETTER_THAN thresh) {
01261             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01262                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01263                 hmm_enter(&rhmm->hmm, newscore,
01264                           bestbp_rc_ptr->path, nf);
01265                 /* DICT2PID: Another place where mpx ssids are entered. */
01266                 /* Look up the ssid to use when entering this mpx triphone. */
01267                 hmm_mpx_ssid(&rhmm->hmm, 0) =
01268                     d2p->ldiph_lc[rhmm->ciphone][rhmm->ci2phone][bestbp_rc_ptr->lc];
01269                 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
01270             }
01271         }
01272     }
01273 
01274     /*
01275      * Single phone words; no right context for these.  Cannot use bestbp_rc as
01276      * LM scores have to be included.  First find best transition to these words.
01277      */
01278     for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01279         w = ngs->single_phone_wid[i];
01280         ngs->last_ltrans[w].dscr = (int32) 0x80000000;
01281     }
01282     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01283         bpe = &(ngs->bp_table[bp]);
01284         if (!bpe->valid)
01285             continue;
01286 
01287         for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01288             int32 n_used;
01289             w = ngs->single_phone_wid[i];
01290             newscore = ngram_search_exit_score
01291                 (ngs, bpe, s3dict_first_phone(dict, w));
01292             E_DEBUG(4, ("initial newscore for %s: %d\n",
01293                         s3dict_wordstr(dict, w), newscore));
01294             newscore += ngram_tg_score(ngs->lmset,
01295                                        s3dict_basewid(dict, w),
01296                                        bpe->real_wid,
01297                                        bpe->prev_real_wid, &n_used);
01298 
01299             if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
01300                 ngs->last_ltrans[w].dscr = newscore;
01301                 ngs->last_ltrans[w].bp = bp;
01302             }
01303         }
01304     }
01305 
01306     /* Now transition to in-LM single phone words */
01307     for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01308         w = ngs->single_phone_wid[i];
01309         rhmm = (root_chan_t *) ngs->word_chan[w];
01310         newscore = ngs->last_ltrans[w].dscr + ngs->pip
01311             + phone_loop_search_score(pls, rhmm->ciphone);
01312         if (newscore BETTER_THAN thresh) {
01313             bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
01314             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01315                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01316                 hmm_enter(&rhmm->hmm,
01317                           newscore, ngs->last_ltrans[w].bp, nf);
01318                 /* DICT2PID: another place where mpx ssids are entered. */
01319                 /* Look up the ssid to use when entering this mpx triphone. */
01320                 hmm_mpx_ssid(&rhmm->hmm, 0) =
01321                     d2p->ldiph_lc[rhmm->ciphone][rhmm->ci2phone]
01322                     [s3dict_last_phone(dict, bpe->wid)];
01323                 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
01324             }
01325         }
01326     }
01327 
01328     /* Remaining words: <sil>, noise words.  No mpx for these! */
01329     w = ps_search_silence_wid(ngs);
01330     rhmm = (root_chan_t *) ngs->word_chan[w];
01331     bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
01332     newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip
01333         + phone_loop_search_score(pls, rhmm->ciphone);
01334     if (newscore BETTER_THAN thresh) {
01335         if ((hmm_frame(&rhmm->hmm) < frame_idx)
01336             || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01337             hmm_enter(&rhmm->hmm,
01338                       newscore, bestbp_rc_ptr->path, nf);
01339         }
01340     }
01341     for (w = s3dict_filler_start(dict); w <= s3dict_filler_end(dict); w++) {
01342         if (w == ps_search_silence_wid(ngs))
01343             continue;
01344         rhmm = (root_chan_t *) ngs->word_chan[w];
01345         /* If this was not actually a single-phone word, rhmm will be NULL. */
01346         if (rhmm == NULL)
01347             continue;
01348         newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip
01349             + phone_loop_search_score(pls, rhmm->ciphone);
01350         if (newscore BETTER_THAN thresh) {
01351             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01352                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01353                 hmm_enter(&rhmm->hmm,
01354                           newscore, bestbp_rc_ptr->path, nf);
01355             }
01356         }
01357     }
01358 }
01359 
01360 static void
01361 deactivate_channels(ngram_search_t *ngs, int frame_idx)
01362 {
01363     root_chan_t *rhmm;
01364     int i;
01365 
01366     /* Clear score[] of pruned root channels */
01367     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01368         if (hmm_frame(&rhmm->hmm) == frame_idx) {
01369             hmm_clear_scores(&rhmm->hmm);
01370         }
01371     }
01372     /* Clear score[] of pruned single-phone channels */
01373     for (i = 0; i < ngs->n_1ph_words; i++) {
01374         int32 w = ngs->single_phone_wid[i];
01375         rhmm = (root_chan_t *) ngs->word_chan[w];
01376         if (hmm_frame(&rhmm->hmm) == frame_idx) {
01377             hmm_clear_scores(&rhmm->hmm);
01378         }
01379     }
01380 }
01381 
01382 int
01383 ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
01384 {
01385     int16 const *senscr;
01386 
01387     /* Activate our HMMs for the current frame if need be. */
01388     if (!ps_search_acmod(ngs)->compallsen)
01389         compute_sen_active(ngs, frame_idx);
01390 
01391     /* Compute GMM scores for the current frame. */
01392     if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
01393         return 0;
01394     ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
01395 
01396     /* Mark backpointer table for current frame. */
01397     ngram_search_mark_bptable(ngs, frame_idx);
01398 
01399     /* Renormalize if necessary (FIXME: Make sure to test this) */
01400     if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
01401         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
01402                frame_idx, ngs->best_score);
01403         renormalize_scores(ngs, frame_idx, ngs->best_score);
01404     }
01405 
01406     /* Evaluate HMMs */
01407     evaluate_channels(ngs, senscr, frame_idx);
01408     /* Prune HMMs and do phone transitions. */
01409     prune_channels(ngs, frame_idx);
01410     /* Do absolute pruning on word exits. */
01411     bptable_maxwpf(ngs, frame_idx);
01412     /* Do word transitions. */
01413     word_transition(ngs, frame_idx);
01414     /* Deactivate pruned HMMs. */
01415     deactivate_channels(ngs, frame_idx);
01416 
01417     ++ngs->n_frame;
01418     /* Return the number of frames processed. */
01419     return 1;
01420 }
01421 
01422 void
01423 ngram_fwdtree_finish(ngram_search_t *ngs)
01424 {
01425     int32 i, w, cf, *awl;
01426     root_chan_t *rhmm;
01427     chan_t *hmm, **acl;
01428 
01429     /* This is the number of frames processed. */
01430     cf = ps_search_acmod(ngs)->output_frame;
01431     /* Add a mark in the backpointer table for one past the final frame. */
01432     ngram_search_mark_bptable(ngs, cf);
01433 
01434     /* Deactivate channels lined up for the next frame */
01435     /* First, root channels of HMM tree */
01436     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01437         hmm_clear(&rhmm->hmm);
01438     }
01439 
01440     /* nonroot channels of HMM tree */
01441     i = ngs->n_active_chan[cf & 0x1];
01442     acl = ngs->active_chan_list[cf & 0x1];
01443     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
01444         hmm_clear(&hmm->hmm);
01445     }
01446 
01447     /* word channels */
01448     i = ngs->n_active_word[cf & 0x1];
01449     awl = ngs->active_word_list[cf & 0x1];
01450     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
01451         /* Don't accidentally free single-phone words! */
01452         if (s3dict_pronlen(ps_search_dict(ngs), w) == 1)
01453             continue;
01454         bitvec_clear(ngs->word_active, w);
01455         if (ngs->word_chan[w] == NULL)
01456             continue;
01457         ngram_search_free_all_rc(ngs, w);
01458     }
01459 
01460     /*
01461      * The previous search code did a postprocessing of the
01462      * backpointer table here, but we will postpone this until it is
01463      * absolutely necessary, i.e. when generating a word graph.
01464      * Likewise we don't actually have to decide what the exit word is
01465      * until somebody requests a backtrace.
01466      */
01467 
01468     /* Print out some statistics. */
01469     if (cf > 0) {
01470         E_INFO("%8d words recognized (%d/fr)\n",
01471                ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
01472         E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
01473                (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
01474         E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
01475                ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
01476                (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
01477                ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
01478         E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
01479                ngs->st.n_word_lastchan_eval,
01480                ngs->st.n_word_lastchan_eval / (cf + 1));
01481         E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
01482                ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
01483     }
01484 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7