src/libpocketsphinx/vithist.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * vithist.c -- Viterbi history
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1999 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * 
00049  * $Log: vithist.c,v $
00050  * Revision 1.9  2006/02/23 16:56:12  arthchan2003
00051  * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH
00052  * 1, Split latticehist_t from flat_fwd.c to  here.
00053  * 2, Introduced vithist_entry_cp.  This is much better than the direct
00054  * copy we have been using. (Which could cause memory problem easily)
00055  *
00056  * Revision 1.8.4.12  2006/01/16 18:11:39  arthchan2003
00057  * 1, Important Bug fixes, a local pointer is used when realloc is needed.  This causes invalid writing of the memory, 2, Acoustic scores of the last segment in IBM lattice generation couldn't be found in the past.  Now, this could be handled by the optional acoustic scores in the node of lattice.  Application code is not yet checked-in
00058  *
00059  * Revision 1.8.4.11  2005/11/17 06:46:02  arthchan2003
00060  * 3 changes. 1, Code was added for full triphone implementation, not yet working. 2, Senone scale is removed from vithist table. This was a bug introduced during some fixes in CALO.
00061  *
00062  * Revision 1.8.4.10  2005/10/17 04:58:30  arthchan2003
00063  * vithist.c is the true source of memory leaks in the past for full cwtp expansion.  There are two changes made to avoid this happen, 1, instead of using ve->rc_info as the indicator whether something should be done, used a flag bFullExpand to control it. 2, avoid doing direct C-struct copy (like *ve = *tve), it becomes the reason of why memory are leaked and why the code goes wrong.
00064  *
00065  * Revision 1.8.4.9  2005/10/07 20:05:05  arthchan2003
00066  * When rescoring in full triphone expansion, the code should use the score for the word end with corret right context.
00067  *
00068  * Revision 1.8.4.8  2005/09/26 07:23:06  arthchan2003
00069  * Also fixed a bug such SINGLE_RC_HISTORY=0 compiled.
00070  *
00071  * Revision 1.8.4.7  2005/09/26 02:28:00  arthchan2003
00072  * Remove a E_INFO in vithist.c
00073  *
00074  * Revision 1.8.4.6  2005/09/25 19:33:40  arthchan2003
00075  * (Change for comments) Added support for Viterbi history.
00076  *
00077  * Revision 1.8.4.5  2005/09/25 19:23:55  arthchan2003
00078  * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code.
00079  *
00080  * Revision 1.8.4.4  2005/09/11 03:00:15  arthchan2003
00081  * All lattice-related functions are not incorporated into vithist. So-called "lattice" is essentially the predecessor of vithist_t and fsg_history_t.  Later when vithist_t support by right context score and history.  It should replace both of them.
00082  *
00083  * Revision 1.8.4.3  2005/07/26 02:20:39  arthchan2003
00084  * merged hyp_t with srch_hyp_t.
00085  *
00086  * Revision 1.8.4.2  2005/07/17 05:55:45  arthchan2003
00087  * Removed vithist_dag_write_header
00088  *
00089  * Revision 1.8.4.1  2005/07/04 07:25:22  arthchan2003
00090  * Added vithist_entry_display and vh_lmstate_display in vithist.
00091  *
00092  * Revision 1.8  2005/06/22 02:47:35  arthchan2003
00093  * 1, Added reporting flag for vithist_init. 2, Added a flag to allow using words other than silence to be the last word for backtracing. 3, Fixed doxygen documentation. 4, Add  keyword.
00094  *
00095  * Revision 1.9  2005/06/16 04:59:10  archan
00096  * Sphinx3 to s3.generic, a gentle-refactored version of Dave's change in senone scale.
00097  *
00098  * Revision 1.8  2005/05/26 00:46:59  archan
00099  * Added functionalities that such that <sil> will not be inserted at the end of the utterance.
00100  *
00101  * Revision 1.7  2005/04/25 23:53:35  archan
00102  * 1, Some minor modification of vithist_t, vithist_rescore can now support optional LM rescoring, vithist also has its own reporting routine. A new argument -lmrescore is also added in decode and livepretend.  This can switch on and off the rescoring procedure. 2, I am reaching the final difficulty of mode 5 implementation.  That is, to implement an algorithm which dynamically decide which tree copies should be entered.  However, stuffs like score propagation in the leave nodes and non-leaves nodes are already done. 3, As briefly mentioned in 2, implementation of rescoring , which used to happened at leave nodes are now separated. The current implementation is not the most clever one. Wish I have time to change it before check-in to the canonical.
00103  *
00104  * Revision 1.6  2005/04/21 23:50:26  archan
00105  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
00106  *
00107  * Revision 1.5  2005/04/20 03:46:30  archan
00108  * factor dag header writer into vithist.[ch], do the corresponding change for lm_t
00109  *
00110  * Revision 1.4  2005/03/30 01:08:38  archan
00111  * codebase-wide update. Performed an age-old trick: Adding $Log into all .c and .h files. This will make sure the commit message be preprended into a file.
00112  *
00113  * 20.Apr.2001  RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
00114  *              Added vithist_free() to free allocated memory
00115  * 
00116  * 30-Dec-2000  Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
00117  *              Added vithist_partialutt_end() to allow backtracking in
00118  *              the middle of an utterance
00119  * 
00120  * 13-Aug-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00121  *              Added maxwpf handling.
00122  * 
00123  * 24-May-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00124  *              Started.
00125  */
00126 
00127 #include <listelem_alloc.h>
00128 #include <pio.h>
00129 #include <heap.h>
00130 
00131 #include "vithist.h"
00132 
00133 void
00134 vh_lmstate_display(vh_lmstate_t * vhl, s3dict_t * dict)
00135 {
00136     /* TODO: Also translate wid to string if dict is not NULL */
00137     E_INFO("lwid[0] %d\n", vhl->lm3g.lwid[0]);
00138     E_INFO("lwid[1] %d\n", vhl->lm3g.lwid[1]);
00139     E_INFO("lwid[2] %d\n", vhl->lm3g.lwid[2]);
00140 }
00141 
00142 void
00143 vithist_entry_display(vithist_entry_t * ve, s3dict_t * dict)
00144 {
00145     E_INFO("Word ID %d \n", ve->wid);
00146     E_INFO("Sf %d Ef %d \n", ve->sf, ve->ef);
00147     E_INFO("Ascr %d Lscr %d \n", ve->ascr, ve->lscr);
00148     E_INFO("Score %d \n", ve->path.score);
00149     E_INFO("Type %d\n", ve->type);
00150     E_INFO("Valid for LM rescoring? %d\n", ve->valid);
00151     vh_lmstate_display(&(ve->lmstate), dict);
00152 }
00153 
00154 
00155 vithist_t *
00156 vithist_init(int32 max, int32 n_ci, int32 wbeam, int32 bghist, int32 report)
00157 {
00158     vithist_t *vh;
00159 
00160     if (report)
00161         E_INFO("Initializing Viterbi-history module\n");
00162 
00163     vh = (vithist_t *) ckd_calloc(1, sizeof(vithist_t));
00164 
00165     vh->entry =
00166         (vithist_entry_t **) ckd_calloc(VITHIST_MAXBLKS,
00167                                         sizeof(vithist_entry_t *));
00168 
00169     vh->n_entry = 0;
00170 
00171     vh->frame_start =
00172         (int32 *) ckd_calloc(S3_MAX_FRAMES + 1, sizeof(int32));
00173 
00174     vh->bestscore = (int32 *) ckd_calloc(S3_MAX_FRAMES + 1, sizeof(int32));
00175     vh->bestvh = (int32 *) ckd_calloc(S3_MAX_FRAMES + 1, sizeof(int32));
00176 
00177     vh->wbeam = wbeam;
00178     vh->bghist = bghist;
00179 
00180     E_INFO("Allocation for Viterbi history, lmset-final size: %d\n", max);
00181     vh->lms2vh_root =
00182         (vh_lms2vh_t **) ckd_calloc(max, sizeof(vh_lms2vh_t *));
00183     vh->n_ci = n_ci;
00184 
00185     vh->lwidlist = NULL;
00186 
00187     vithist_report(vh);
00188     return vh;
00189 }
00190 
00194 static void
00195 clean_up_rc_info(backpointer_t * rc_info, int32 n_rc_info)
00196 {
00197     int32 i;
00198     for (i = 0; i < n_rc_info; i++) {
00199         rc_info[i].score = S3_LOGPROB_ZERO;
00200         rc_info[i].pred = -1;
00201     }
00202 }
00203 
00209 static void
00210 vithist_entry_dirty_cp(vithist_entry_t * va, vithist_entry_t * vb,
00211                        int32 n_rc_info)
00212 {
00213     backpointer_t *tmpshp;
00214     assert(vb->rc == NULL);
00215 
00216     tmpshp = va->rc;
00217     /* Do a direct copy */
00218     *va = *vb;
00219     va->rc = tmpshp;
00220     va->n_rc = n_rc_info;
00221 }
00222 
00227 static void
00228 vithist_entry_cp(vithist_entry_t * va, vithist_entry_t * vb)
00229 {
00230     int i;
00231     /* Do a direct copy */
00232     va->wid = vb->wid;
00233     va->sf = vb->sf;
00234     va->ef = vb->ef;
00235     va->ascr = vb->ascr;
00236     va->lscr = vb->lscr;
00237     va->path.score = vb->path.score;
00238     va->path.pred = vb->path.pred;
00239     va->type = vb->type;
00240     va->valid = vb->valid;
00241     va->lmstate.lm3g.lwid[0] = vb->lmstate.lm3g.lwid[0];
00242     va->lmstate.lm3g.lwid[1] = vb->lmstate.lm3g.lwid[1];
00243     va->n_rc = vb->n_rc;
00244 
00245     if (va->rc) {
00246         for (i = 0; i < vb->n_rc; i++)
00247             va->rc[i] = vb->rc[i];
00248     }
00249 }
00250 
00251 
00252 /*
00253  * Allocate a new entry at vh->n_entry if one doesn't exist and return ptr to it.
00254  */
00255 static vithist_entry_t *
00256 vithist_entry_alloc(vithist_t * vh)
00257 {
00258     int32 b, l;
00259     vithist_entry_t *ve;
00260 
00261     b = VITHIST_ID2BLK(vh->n_entry);
00262     l = VITHIST_ID2BLKOFFSET(vh->n_entry);
00263 
00264     if (l == 0) {               /* Crossed block boundary; allocate a new block of vithist space */
00265         if (b >= VITHIST_MAXBLKS)
00266             E_FATAL
00267                 ("Viterbi history array exhausted; increase VITHIST_MAXBLKS\n");
00268 
00269         assert(vh->entry[b] == NULL);
00270 
00271         ve = (vithist_entry_t *) ckd_calloc(VITHIST_BLKSIZE,
00272                                             sizeof(vithist_entry_t));
00273         vh->entry[b] = ve;
00274     }
00275     else
00276         ve = vh->entry[b] + l;
00277 
00278     vh->n_entry++;
00279     return ve;
00280 }
00281 
00282 
00283 int32
00284 vithist_utt_begin(vithist_t * vh, int32 startwid, int32 lwid)
00285 {
00286     vithist_entry_t *ve;
00287 
00288     assert(vh->n_entry == 0);
00289     assert(vh->entry[0] == NULL);
00290     assert(vh->lwidlist == NULL);
00291 
00292     /* Create an initial dummy <s> entry.  This is the root for the utterance */
00293     ve = vithist_entry_alloc(vh);
00294 
00295     ve->wid = startwid;
00296     ve->sf = -1;
00297     ve->ef = -1;
00298     ve->ascr = 0;
00299     ve->lscr = 0;
00300     ve->path.score = 0;
00301     ve->path.pred = -1;
00302     ve->type = 0;
00303     ve->valid = 1;
00304     ve->lmstate.lm3g.lwid[0] = lwid;
00305     ve->lmstate.lm3g.lwid[1] = NGRAM_INVALID_WID;
00306     vh->n_frm = 0;
00307     vh->frame_start[0] = 1;
00308     vh->bestscore[0] = MAX_NEG_INT32;
00309     vh->bestvh[0] = -1;
00310 
00311     return 0;
00312 }
00313 
00314 
00315 static int32
00316 vh_lmstate_find(vithist_t * vh, vh_lmstate_t * lms)
00317 {
00318     vh_lms2vh_t *lms2vh;
00319     s3lmwid32_t lwid;
00320     gnode_t *gn;
00321 
00322     lwid = lms->lm3g.lwid[0];
00323     if ((lms2vh = vh->lms2vh_root[lwid]) == NULL)
00324         return -1;
00325 
00326     assert(lms2vh->state == lwid);
00327 
00328     lwid = lms->lm3g.lwid[1];
00329     for (gn = lms2vh->children; gn; gn = gnode_next(gn)) {
00330         lms2vh = (vh_lms2vh_t *) gnode_ptr(gn);
00331         if (lms2vh->state == lwid)
00332             return lms2vh->vhid;
00333     }
00334 
00335     return -1;
00336 }
00337 
00338 
00339 /*
00340  * Enter a new LMstate into the current frame LMstates trees; called ONLY IF not already
00341  * present.
00342  */
00343 static void
00344 vithist_lmstate_enter(vithist_t * vh, int32 vhid, vithist_entry_t * ve)
00345 {
00346     vh_lms2vh_t *lms2vh, *child;
00347     s3lmwid32_t lwid;
00348 
00349     lwid = ve->lmstate.lm3g.lwid[0];
00350     if ((lms2vh = vh->lms2vh_root[lwid]) == NULL) {
00351         lms2vh = (vh_lms2vh_t *) ckd_calloc(1, sizeof(vh_lms2vh_t));
00352         vh->lms2vh_root[lwid] = lms2vh;
00353 
00354         lms2vh->state = lwid;
00355         lms2vh->children = NULL;
00356 
00357         vh->lwidlist = glist_add_int32(vh->lwidlist, (int32) lwid);
00358     }
00359     else {
00360         assert(lms2vh->state == lwid);
00361     }
00362 
00363     child = (vh_lms2vh_t *) ckd_calloc(1, sizeof(vh_lms2vh_t));
00364     child->state = ve->lmstate.lm3g.lwid[1];
00365     child->children = NULL;
00366     child->vhid = vhid;
00367     child->ve = ve;
00368 
00369     lms2vh->children = glist_add_ptr(lms2vh->children, (void *) child);
00370 }
00371 
00372 
00373 /* Rclist is separate from tve because C structure copying is used in *ve = *tve 
00374  */
00375 void
00376 vithist_enter(vithist_t * vh,              
00377               s3dict_t *dict,              
00378               dict2pid_t *dict2pid,        
00379               vithist_entry_t * tve,       
00380               int32 comp_rc                
00381     )
00382 {
00383     vithist_entry_t *ve;
00384     int32 vhid;
00385     int32 n_ci;
00386     int32 n_rc_info;
00387     int32 old_n_rc_info;
00388 
00389     n_ci = vh->n_ci;
00390     /* Check if an entry with this LM state already exists in current frame */
00391     vhid = vh_lmstate_find(vh, &(tve->lmstate));
00392     n_rc_info = 0;          /* Just fill in something if not using crossword triphon */
00393 
00394 
00395     assert(comp_rc < n_rc_info);
00396 
00397     if (vhid < 0) {             /* Not found; allocate new entry */
00398         vhid = vh->n_entry;
00399         ve = vithist_entry_alloc(vh);
00400 
00401         vithist_entry_dirty_cp(ve, tve, n_rc_info);
00402         vithist_lmstate_enter(vh, vhid, ve);    /* Enter new vithist info into LM state tree */
00403 
00404         /*      E_INFO("Start a new entry wid %d\n",ve->wid); */
00405         if (ve->rc != NULL)
00406             clean_up_rc_info(ve->rc, ve->n_rc);
00407 
00408         if (comp_rc != -1) {
00409             if (ve->rc == NULL) {
00410                 ve->n_rc =
00411                     get_rc_nssid(dict2pid, ve->wid, dict);
00412                 /* Always allocate n_ci for rc_info */
00413                 ve->rc = ckd_calloc(vh->n_ci, sizeof(*ve->rc));
00414                 clean_up_rc_info(ve->rc, ve->n_rc);
00415             }
00416 
00417             assert(comp_rc < ve->n_rc);
00418             if (ve->rc[comp_rc].score < tve->path.score) {
00419                 ve->rc[comp_rc].score = tve->path.score;
00420                 ve->rc[comp_rc].pred = tve->path.pred;
00421             }
00422         }
00423 
00424 
00425     }
00426     else {
00427         ve = vithist_id2entry(vh, vhid);
00428 
00429         /*      E_INFO("Replace the old entry\n"); */
00430         /*              E_INFO("Old entry wid %d, New entry wid %d\n",ve->wid, tve->wid); */
00431 
00432         if (comp_rc == -1) {
00433             if (ve->path.score < tve->path.score) {
00434                 vithist_entry_dirty_cp(ve, tve, n_rc_info);
00435                 assert(comp_rc < n_rc_info);
00436                 if (ve->rc != NULL)
00437                     clean_up_rc_info(ve->rc, ve->n_rc);
00438             }
00439 
00440         }
00441         else {
00442 
00443             /* This is wrong, the score 
00444                Alright, how vhid was searched in the first place? 
00445              */
00446             if (ve->path.score < tve->path.score) {
00447                 old_n_rc_info = ve->n_rc;
00448                 vithist_entry_dirty_cp(ve, tve, n_rc_info);
00449                 assert(comp_rc < n_rc_info);
00450 
00451                 assert(ve->rc);
00452                 clean_up_rc_info(ve->rc, ve->n_rc);
00453                 ve->rc[comp_rc].score = tve->path.score;
00454                 ve->rc[comp_rc].pred = tve->path.pred;
00455             }
00456 
00457         }
00458 
00459     }
00460 
00461     /* Update best exit score in this frame */
00462     if (vh->bestscore[vh->n_frm] < tve->path.score) {
00463         vh->bestscore[vh->n_frm] = tve->path.score;
00464         vh->bestvh[vh->n_frm] = vhid;
00465     }
00466 }
00467 
00468 
00469 void
00470 vithist_rescore(vithist_t * vh, ngram_model_t *lm,
00471                 s3dict_t *dict, dict2pid_t *dict2pid,
00472                 fillpen_t *fp,
00473                 s3wid_t wid, int32 ef, int32 score,
00474                 int32 pred, int32 type, int32 rc)
00475 {
00476     vithist_entry_t *pve, tve;
00477     int32 lwid;
00478     int32 se, fe;
00479     int32 i;
00480 
00481     assert(vh->n_frm == ef);
00482     if (pred == -1) {
00483         /* Always do E_FATAL assuming upper level function take care of error checking. */
00484         E_FATAL
00485             ("Hmm->out.history equals to -1 with score %d, some active phone was not computed?\n",
00486              score);
00487     }
00488 
00489     /* pve is the previous entry before word with wid or, se an fe is the
00490        first to the last entry before pve. So pve is w_{n-1} */
00491 
00492     pve = vithist_id2entry(vh, pred);
00493 
00494     /* Create a temporary entry with all the info currently available */
00495     tve.wid = wid;
00496     tve.sf = pve->ef + 1;
00497     tve.ef = ef;
00498     tve.type = type;
00499     tve.valid = 1;
00500     tve.ascr = score - pve->path.score;
00501     tve.lscr = 0;
00502     tve.rc = NULL;
00503     tve.n_rc = 0;
00504 
00505     /* Filler words only have unigram language model scores, so not
00506      * much special needs to be done for them.  vithist_prune() is
00507      * going to prune out most of these later on, anyway. */
00508     if (s3dict_filler_word(dict, wid)) {
00509         tve.path.score = score;
00510         tve.lscr = fillpen(fp, wid);
00511         tve.path.score += tve.lscr;
00512         if ((tve.path.score - vh->wbeam) >= vh->bestscore[vh->n_frm]) {
00513             tve.path.pred = pred;
00514             /* Note that they just propagate the same LM state since
00515              * they are not in the LM. */
00516             tve.lmstate.lm3g = pve->lmstate.lm3g;
00517             vithist_enter(vh, dict, dict2pid, &tve, rc);
00518         }
00519     }
00520     else {
00521         if (pred == 0) {            /* Special case for the initial <s> entry */
00522             se = 0;
00523             fe = 1;
00524         }
00525         else {
00526             se = vh->frame_start[pve->ef];
00527             fe = vh->frame_start[pve->ef + 1];
00528         }
00529 
00530         /* Now if it is a word, backtrack again to get all possible previous word
00531            So  pve becomes the w_{n-2}. 
00532          */
00533 
00534         lwid = ngram_wid(lm, s3dict_wordstr(dict, s3dict_basewid(dict, wid)));
00535 
00536         tve.lmstate.lm3g.lwid[0] = lwid;
00537 
00538         /* FIXME: This loop is completely awful.  For each entry in
00539          * this frame, we scan every entry in the previous frame,
00540          * potentially creating a new history entry.  This means that
00541          * without pruning, the size of the vithist table (and thus
00542          * the time taken here) is exponential in the number of
00543          * frames! */
00544         for (i = se; i < fe; i++) {
00545             pve = vithist_id2entry(vh, i);
00546 
00547             if (pve->valid) {
00548                 int n_used;
00549                 tve.path.score = pve->path.score + tve.ascr;
00550                 /* Try at all costs to avoid calling ngram_tg_score()
00551                  * because it is the main time consuming part here
00552                  * (but as noted above... ugh...) See below as well. */
00553                 if ((tve.path.score - vh->wbeam) < vh->bestscore[vh->n_frm])
00554                     continue;
00555                 /* The trigram cache is supposed to make this fast,
00556                  * but due to the crazy number of times this could be
00557                  * called, it's still slow compared to a hash
00558                  * table. */
00559                 tve.lscr = ngram_tg_score(lm, lwid, pve->lmstate.lm3g.lwid[0],
00560                                           pve->lmstate.lm3g.lwid[1], &n_used);
00561                 tve.path.score += tve.lscr;
00562                 /* A different word exit threshold - we would have to
00563                  * be inside the general word beam in order to get
00564                  * here, now we apply a second beam to the *vithist
00565                  * entries* in this frame.  There can be an ungodly
00566                  * number of them for reasons that aren't entirely
00567                  * clear to me, so this is kind of a pre-pruning.
00568                  * NOTE: the "backwards" math here is because
00569                  * vh->bestscore is frequently MAX_NEG_INT32.  ALSO
00570                  * NOTE: We can't precompute the threshold since the
00571                  * best score will be updated by vithist_enter(). */
00572                 if ((tve.path.score - vh->wbeam) >= vh->bestscore[vh->n_frm]) {
00573                     tve.path.pred = i;
00574                     tve.lmstate.lm3g.lwid[1] = pve->lmstate.lm3g.lwid[0];
00575 
00576                     vithist_enter(vh, dict, dict2pid, &tve, rc);
00577                 }
00578             }
00579         }
00580     }
00581 }
00582 
00583 
00584 /*
00585  * Garbage collect invalid entries in current frame, right after marking invalid entries.
00586  */
00587 static void
00588 vithist_frame_gc(vithist_t * vh, int32 frm)
00589 {
00590     vithist_entry_t *ve, *tve;
00591     int32 se, fe, te, bs, bv;
00592     int32 i, j;
00593     int32 l;
00594     int32 n_rc_info;
00595 
00596     n_rc_info = 0;
00597     se = vh->frame_start[frm];
00598     fe = vh->n_entry - 1;
00599     te = se;
00600 
00601     bs = MAX_NEG_INT32;
00602     bv = -1;
00603     E_DEBUG(2,("GC in frame %d, scanning vithist entries from %d to %d\n",
00604                frm, se, fe));
00605     for (i = se; i <= fe; i++) {
00606         ve = vithist_id2entry(vh, i);
00607         if (ve->valid) {
00608             E_DEBUG(2,("Valid entry %d score %d\n", i, ve->path.score));
00609             if (i != te) {      /* Move i to te */
00610                 tve = vithist_id2entry(vh, te);
00612                 vithist_entry_cp(tve, ve);
00613             }
00614 
00615             if (ve->path.score > bs) {
00616                 bs = ve->path.score;
00617                 bv = te;
00618             }
00619 
00620             te++;
00621         }
00622     }
00623     E_DEBUG(2,("GC bs %d vh->bestscore[frm] %d\n", bs, vh->bestscore[frm]));
00624 
00625     /* Can't assert this any more because history pruning could actually make the 
00626        best token go away 
00627 
00628      */
00629     assert(bs == vh->bestscore[frm]);
00630 
00631 
00632     vh->bestvh[frm] = bv;
00633 
00634     /* Free up entries [te..n_entry-1] */
00635     i = VITHIST_ID2BLK(vh->n_entry - 1);
00636     j = VITHIST_ID2BLK(te - 1);
00637     for (; i > j; --i) {
00638 
00639         for (l = 0; l < VITHIST_BLKSIZE; l++) {
00640             ve = vh->entry[i] + l;
00641             if (ve->rc != NULL) {
00642                 ckd_free(ve->rc);
00643                 ve->rc = NULL;
00644             }
00645         }
00646 
00647         ckd_free((void *) vh->entry[i]);
00648         vh->entry[i] = NULL;
00649     }
00650     vh->n_entry = te;
00651 }
00652 
00653 
00654 
00655 void
00656 vithist_prune(vithist_t * vh, s3dict_t * dict, int32 frm,
00657               int32 maxwpf, int32 maxhist, int32 beam)
00658 {
00659     int32 se, fe, filler_done, th;
00660     vithist_entry_t *ve;
00661     heap_t h;
00662     s3wid_t *wid = NULL;
00663     int32 i, nwf, nhf;
00664 
00665     if (maxwpf == -1 && maxhist == -1)
00666         return;
00667     nwf = nhf = 0;
00668 
00669     assert(frm >= 0);
00670 
00671     se = vh->frame_start[frm];
00672     fe = vh->n_entry - 1;
00673 
00674     th = vh->bestscore[frm] + beam;
00675 
00676     h = heap_new();
00677     if (maxwpf > 0) {
00678         wid = (s3wid_t *) ckd_calloc(maxwpf + 1, sizeof(s3wid_t));
00679         wid[0] = BAD_S3WID;
00680     }
00681 
00682     E_DEBUG(1, ("vithist_prune frame %d has %d entries\n", frm, fe-se+1));
00683     for (i = se; i <= fe; i++) {
00684         ve = vithist_id2entry(vh, i);
00685         heap_insert(h, (void *) ve, -(ve->path.score));
00686         ve->valid = 0;
00687     }
00688 
00689     /* Mark invalid entries: beyond maxwpf words and below threshold */
00690     filler_done = 0;
00691     while (heap_pop(h, (void **) (&ve), &i)
00692            && ve->path.score >= th /* the score (or the cw scores) is above threshold */
00693            && (nhf < maxhist || maxhist == -1)) {
00694         if (s3dict_filler_word(dict, ve->wid)) {
00695             /* Major HACK!!  Keep only one best filler word entry per frame */
00696             if (filler_done)
00697                 continue;
00698             filler_done = 1;
00699         }
00700 
00701         /* Check if this word already valid (e.g., under a different history) */
00702         if (wid)
00703             for (i = 0; IS_S3WID(wid[i]) && (wid[i] != ve->wid); i++);
00704         if (wid && NOT_S3WID(wid[i])) {
00705             /* New word; keep only if <maxwpf words already entered, even if >= thresh */
00706             if (nwf < maxwpf || maxwpf == -1) {
00707                 if (wid) {
00708                     wid[i] = ve->wid;
00709                     wid[i + 1] = BAD_S3WID;
00710                 }
00711 
00712                 ++nwf;
00713                 ++nhf;
00714                 ve->valid = 1;
00715             }
00716         }
00717         else if (!vh->bghist) {
00718             ++nhf;
00719             ve->valid = 1;
00720         }
00721     }
00722 
00723     ckd_free((void *) wid);
00724     heap_destroy(h);
00725 
00726     E_DEBUG(1, ("vithist_prune frame %d retained %d entries\n", frm, nhf));
00727     /* Garbage collect invalid entries */
00728     vithist_frame_gc(vh, frm);
00729 }
00730 
00731 
00732 static void
00733 vithist_lmstate_reset(vithist_t * vh)
00734 {
00735     gnode_t *lgn, *gn;
00736     int32 i;
00737     vh_lms2vh_t *lms2vh, *child;
00738 
00739     for (lgn = vh->lwidlist; lgn; lgn = gnode_next(lgn)) {
00740         i = (int32) gnode_int32(lgn);
00741         lms2vh = vh->lms2vh_root[i];
00742 
00743         for (gn = lms2vh->children; gn; gn = gnode_next(gn)) {
00744             child = (vh_lms2vh_t *) gnode_ptr(gn);
00745             ckd_free((void *) child);
00746         }
00747         glist_free(lms2vh->children);
00748 
00749         ckd_free((void *) lms2vh);
00750 
00751         vh->lms2vh_root[i] = NULL;
00752     }
00753 
00754     glist_free(vh->lwidlist);
00755     vh->lwidlist = NULL;
00756 }
00757 
00758 
00759 void
00760 vithist_frame_windup(vithist_t * vh, int32 frm, FILE * fp,
00761                      ngram_model_t *lm, s3dict_t *dict)
00762 {
00763     assert(vh->n_frm == frm);
00764 
00765     vh->n_frm++;
00766     vh->frame_start[vh->n_frm] = vh->n_entry;
00767 
00768     if (fp)
00769         vithist_dump(vh, frm, lm, dict, fp);
00770 
00771     vithist_lmstate_reset(vh);
00772 
00773     vh->bestscore[vh->n_frm] = MAX_NEG_INT32;
00774     vh->bestvh[vh->n_frm] = -1;
00775 }
00776 
00777 
00778 int32
00779 vithist_utt_end(vithist_t * vh, ngram_model_t *lm, s3dict_t *dict,
00780                 dict2pid_t *dict2pid, fillpen_t *fp)
00781 {
00782     int32 f, i;
00783     int32 sv, nsv, scr, bestscore, bestvh, vhid;
00784     vithist_entry_t *ve, *bestve = 0;
00785     int32 endwid = NGRAM_INVALID_WID;
00786 
00787     bestscore = MAX_NEG_INT32;
00788     bestvh = -1;
00789 
00790     /* Find last frame with entries in vithist table */
00791     /* by ARCHAN 20050525, it is possible that the last frame will not be reached in decoding */
00792 
00793     for (f = vh->n_frm - 1; f >= 0; --f) {
00794         sv = vh->frame_start[f];        /* First vithist entry in frame f */
00795         nsv = vh->frame_start[f + 1];   /* First vithist entry in next frame (f+1) */
00796 
00797         if (sv < nsv)
00798             break;
00799     }
00800     if (f < 0)
00801         return -1;
00802 
00803     if (f != vh->n_frm - 1)
00804         E_WARN("No word exit in frame %d, using exits from frame %d\n",
00805                vh->n_frm - 1, f);
00806 
00807     /* Terminate in a final </s> node (make this optional?) */
00808     endwid = ngram_wid(lm, S3_FINISH_WORD);
00809 
00810     for (i = sv; i < nsv; i++) {
00811         int n_used;
00812         ve = vithist_id2entry(vh, i);
00813         scr = ve->path.score;
00814         scr += ngram_tg_score(lm, endwid, ve->lmstate.lm3g.lwid[0],
00815                               ve->lmstate.lm3g.lwid[1], &n_used);
00816         if (bestscore < scr) {
00817             bestscore = scr;
00818             bestvh = i;
00819             bestve = ve;
00820         }
00821     }
00822     assert(bestvh >= 0);
00823 
00824 
00825     if (f != vh->n_frm - 1) {
00826         E_ERROR("No word exit in frame %d, using exits from frame %d\n",
00827                 vh->n_frm - 1, f);
00828 
00829         /* Add a dummy silwid covering the remainder of the utterance */
00830         assert(vh->frame_start[vh->n_frm - 1] ==
00831                vh->frame_start[vh->n_frm]);
00832         vh->n_frm -= 1;
00833         vithist_rescore(vh, lm, dict, dict2pid, fp,
00834                         s3dict_silwid(dict), vh->n_frm,
00835                         bestve->path.score, bestvh, -1, -1);
00836         vh->n_frm += 1;
00837         vh->frame_start[vh->n_frm] = vh->n_entry;
00838 
00839         return vithist_utt_end(vh, lm, dict, dict2pid, fp);
00840     }
00841 
00842     /*    vithist_dump(vh,-1,kbc,stdout); */
00843     /* Create an </s> entry */
00844     ve = vithist_entry_alloc(vh);
00845 
00846     ve->wid = s3dict_finishwid(dict);
00847     ve->sf = (bestve->ef == BAD_S3FRMID) ? 0 : bestve->ef + 1;
00848     ve->ef = vh->n_frm;
00849     ve->ascr = 0;
00850     ve->lscr = bestscore - bestve->path.score;
00851     ve->path.score = bestscore;
00852     ve->path.pred = bestvh;
00853     ve->type = 0;
00854     ve->valid = 1;
00855     ve->lmstate.lm3g.lwid[0] = endwid;
00856     ve->lmstate.lm3g.lwid[1] = ve->lmstate.lm3g.lwid[0];
00857 
00858     vhid = vh->n_entry - 1;
00859 
00860 
00861     /*    vithist_dump(vh,-1,kbc,stdout); */
00862 
00863     return vhid;
00864 
00865 }
00866 
00867 
00868 int32
00869 vithist_partialutt_end(vithist_t * vh, ngram_model_t *lm, s3dict_t *dict)
00870 {
00871     int32 f, i;
00872     int32 sv, nsv, scr, bestscore, bestvh;
00873     vithist_entry_t *ve, *bestve;
00874     int32 endwid;
00875 
00876     /* Find last frame with entries in vithist table */
00877     for (f = vh->n_frm - 1; f >= 0; --f) {
00878         sv = vh->frame_start[f];        /* First vithist entry in frame f */
00879         nsv = vh->frame_start[f + 1];   /* First vithist entry in next frame (f+1) */
00880 
00881         if (sv < nsv)
00882             break;
00883     }
00884     if (f < 0)
00885         return -1;
00886 
00887     if (f != vh->n_frm - 1) {
00888         E_ERROR("No word exits from in block with last frame= %d\n",
00889                 vh->n_frm - 1);
00890         return -1;
00891     }
00892 
00893     /* Terminate in a final </s> node (make this optional?) */
00894     endwid = ngram_wid(lm, S3_FINISH_WORD);
00895 
00896     bestscore = MAX_NEG_INT32;
00897     bestvh = -1;
00898 
00899     for (i = sv; i < nsv; i++) {
00900         int n_used;
00901         ve = vithist_id2entry(vh, i);
00902 
00903         scr = ve->path.score;
00904         scr += ngram_tg_score(lm, endwid, ve->lmstate.lm3g.lwid[0],
00905                               ve->lmstate.lm3g.lwid[1], &n_used);
00906 
00907         if (bestscore < scr) {
00908             bestscore = scr;
00909             bestvh = i;
00910             bestve = ve;
00911         }
00912     }
00913 
00914     return bestvh;
00915 }
00916 
00917 
00918 void
00919 vithist_utt_reset(vithist_t * vh)
00920 {
00921     int32 b;
00922     int32 ent;
00923 
00924     vithist_lmstate_reset(vh);
00925 
00926     for (b = VITHIST_ID2BLK(vh->n_entry - 1); b >= 0; --b) {
00927 
00928         /* If rc_info is used, then free them */
00929         if (b != 0)
00930             ent = VITHIST_BLKSIZE - 1;
00931         else
00932             ent = vh->n_entry - 1;
00933         ckd_free((void *) vh->entry[b]);
00934         vh->entry[b] = NULL;
00935     }
00936     vh->n_entry = 0;
00937 
00938     vh->bestscore[0] = MAX_NEG_INT32;
00939     vh->bestvh[0] = -1;
00940 }
00941 
00942 void
00943 vithist_dump(vithist_t * vh, int32 frm, ngram_model_t *lm, s3dict_t *dict, FILE * fp)
00944 {
00945     int32 i, j;
00946     vithist_entry_t *ve;
00947     int32 sf, ef;
00948 
00949     if (frm >= 0) {
00950         sf = frm;
00951         ef = frm;
00952 
00953         fprintf(fp, "VITHIST  frame %d  #entries %d\n",
00954                 frm, vh->frame_start[sf + 1] - vh->frame_start[sf]);
00955     }
00956     else {
00957         sf = 0;
00958         ef = vh->n_frm - 1;
00959 
00960         fprintf(fp, "VITHIST  #frames %d  #entries %d\n", vh->n_frm,
00961                 vh->n_entry);
00962     }
00963     fprintf(fp, "\t%7s %5s %5s %11s %9s %8s %7s %4s Word (LM-state)\n",
00964             "Seq/Val", "SFrm", "EFrm", "PathScr", "SegAScr", "SegLScr",
00965             "Pred", "Type");
00966 
00967     for (i = sf; i <= ef; i++) {
00968         fprintf(fp, "%5d BS: %11d BV: %8d\n", i, vh->bestscore[i],
00969                 vh->bestvh[i]);
00970 
00971         for (j = vh->frame_start[i]; j < vh->frame_start[i + 1]; j++) {
00972             int32 lwid;
00973             ve = vithist_id2entry(vh, j);
00974 
00975             fprintf(fp, "\t%c%6d %5d %5d %11d %9d %8d %7d %4d %s",
00976                     (ve->valid ? ' ' : '*'), j,
00977                     ve->sf, ve->ef, ve->path.score, ve->ascr, ve->lscr,
00978                     ve->path.pred, ve->type, s3dict_wordstr(dict, ve->wid));
00979 
00980             fprintf(fp, " (%s", ngram_word(lm, ve->lmstate.lm3g.lwid[0]));
00981             lwid = ve->lmstate.lm3g.lwid[1];
00982             fprintf(fp, ", %s", ngram_word(lm, lwid));
00983             fprintf(fp, ")\n");
00984         }
00985 
00986         if (j == vh->frame_start[i])
00987             fprintf(fp, "\n");
00988     }
00989 
00990     fprintf(fp, "END_VITHIST\n");
00991     fflush(fp);
00992 }
00993 
00994 /* 
00995  * RAH, free memory allocated in vithist_free 
00996  */
00997 void
00998 vithist_free(vithist_t * v)
00999 {
01000 
01001     if (v) {
01002         vithist_utt_reset(v);
01003 
01004         if (v->entry) {
01005             ckd_free((void *) v->entry);
01006         }
01007 
01008         if (v->frame_start)
01009             ckd_free((void *) v->frame_start);
01010 
01011         if (v->bestscore)
01012             ckd_free((void *) v->bestscore);
01013 
01014         if (v->bestvh)
01015             ckd_free((void *) v->bestvh);
01016 
01017         if (v->lms2vh_root)
01018             ckd_free((void *) v->lms2vh_root);
01019 
01020         ckd_free((void *) v);
01021     }
01022 
01023 }
01024 
01025 void
01026 vithist_report(vithist_t * vh)
01027 {
01028     E_INFO_NOFN("Initialization of vithist_t, report:\n");
01029     if (vh) {
01030         E_INFO_NOFN("Word beam = %d\n", vh->wbeam);
01031         E_INFO_NOFN("Bigram Mode =%d\n", vh->bghist);
01032         E_INFO_NOFN("\n");
01033     }
01034     else {
01035         E_INFO_NOFN("Viterbi history is (null)\n");
01036     }
01037 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7