src/libpocketsphinx/vithist.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * vithist.h -- Viterbi history
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1999 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log$
00049  * Revision 1.1  2006/04/05  20:27:30  dhdfu
00050  * A Great Reorganzation of header files and executables
00051  * 
00052  * Revision 1.12  2006/02/23 16:56:13  arthchan2003
00053  * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH
00054  * 1, Split latticehist_t from flat_fwd.c to  here.
00055  * 2, Introduced vithist_entry_cp.  This is much better than the direct
00056  * copy we have been using. (Which could cause memory problem easily)
00057  *
00058  * Revision 1.11.4.9  2006/01/16 18:11:39  arthchan2003
00059  * 1, Important Bug fixes, a local pointer is used when realloc is needed.  This causes invalid writing of the memory, 2, Acoustic scores of the last segment in IBM lattice generation couldn't be found in the past.  Now, this could be handled by the optional acoustic scores in the node of lattice.  Application code is not yet checked-in
00060  *
00061  * Revision 1.11.4.8  2005/11/17 06:46:02  arthchan2003
00062  * 3 changes. 1, Code was added for full triphone implementation, not yet working. 2, Senone scale is removed from vithist table. This was a bug introduced during some fixes in CALO.
00063  *
00064  * Revision 1.11.4.7  2005/10/17 04:58:30  arthchan2003
00065  * vithist.c is the true source of memory leaks in the past for full cwtp expansion.  There are two changes made to avoid this happen, 1, instead of using ve->rc_info as the indicator whether something should be done, used a flag bFullExpand to control it. 2, avoid doing direct C-struct copy (like *ve = *tve), it becomes the reason of why memory are leaked and why the code goes wrong.
00066  *
00067  * Revision 1.11.4.6  2005/10/07 20:05:05  arthchan2003
00068  * When rescoring in full triphone expansion, the code should use the score for the word end with corret right context.
00069  *
00070  * Revision 1.11.4.5  2005/09/26 06:37:33  arthchan2003
00071  * Before anyone get hurt, quickly change back to using SINGLE_RC_HISTORY.
00072  *
00073  * Revision 1.11.4.4  2005/09/25 19:23:55  arthchan2003
00074  * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code.
00075  *
00076  * Revision 1.11.4.3  2005/09/11 03:00:15  arthchan2003
00077  * All lattice-related functions are not incorporated into vithist. So-called "lattice" is essentially the predecessor of vithist_t and fsg_history_t.  Later when vithist_t support by right context score and history.  It should replace both of them.
00078  *
00079  * Revision 1.11.4.2  2005/07/26 02:20:39  arthchan2003
00080  * merged hyp_t with srch_hyp_t.
00081  *
00082  * Revision 1.11.4.1  2005/07/04 07:25:22  arthchan2003
00083  * Added vithist_entry_display and vh_lmstate_display in vithist.
00084  *
00085  * Revision 1.11  2005/06/22 02:47:35  arthchan2003
00086  * 1, Added reporting flag for vithist_init. 2, Added a flag to allow using words other than silence to be the last word for backtracing. 3, Fixed doxygen documentation. 4, Add  keyword.
00087  *
00088  * Revision 1.10  2005/06/16 04:59:10  archan
00089  * Sphinx3 to s3.generic, a gentle-refactored version of Dave's change in senone scale.
00090  *
00091  * Revision 1.9  2005/06/13 04:02:59  archan
00092  * Fixed most doxygen-style documentation under libs3decoder.
00093  *
00094  * Revision 1.8  2005/05/26 00:46:59  archan
00095  * Added functionalities that such that <sil> will not be inserted at the end of the utterance.
00096  *
00097  * Revision 1.7  2005/04/25 23:53:35  archan
00098  * 1, Some minor modification of vithist_t, vithist_rescore can now support optional LM rescoring, vithist also has its own reporting routine. A new argument -lmrescore is also added in decode and livepretend.  This can switch on and off the rescoring procedure. 2, I am reaching the final difficulty of mode 5 implementation.  That is, to implement an algorithm which dynamically decide which tree copies should be entered.  However, stuffs like score propagation in the leave nodes and non-leaves nodes are already done. 3, As briefly mentioned in 2, implementation of rescoring , which used to happened at leave nodes are now separated. The current implementation is not the most clever one. Wish I have time to change it before check-in to the canonical.
00099  *
00100  * Revision 1.6  2005/04/21 23:50:26  archan
00101  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
00102  *
00103  * Revision 1.5  2005/04/20 03:46:30  archan
00104  * factor dag header writer into vithist.[ch], do the corresponding change for lm_t
00105  *
00106  * Revision 1.4  2005/03/30 01:22:47  archan
00107  * Fixed mistakes in last updates. Add
00108  *
00109  * 
00110  * 20.Apr.2001  RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
00111  *              Added vithist_free() to free allocated memory
00112  * 
00113  * 30-Sep-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00114  *              Added vithist_entry_t.ascr.
00115  * 
00116  * 13-Aug-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00117  *              Added maxwpf handling.
00118  * 
00119  * 24-May-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00120  *              Started.
00121  */
00122 
00123 
00124 #ifndef _S3_VITHIST_H_
00125 #define _S3_VITHIST_H_
00126 
00127 #include <stdio.h>
00128 
00129 #include <ngram_model.h>
00130 #include <cmd_ln.h>
00131 #include <logmath.h>
00132 #include <glist.h>
00133 
00134 #include "s3types.h"
00135 #include "fillpen.h"
00136 #include "s3dict.h"
00137 #include "dict2pid.h"
00138 
00147 #ifdef __cplusplus
00148 extern "C" {
00149 #endif
00150 #if 0
00151 } /* Fool Emacs into not indenting things. */
00152 #endif
00153 
00159 typedef union vh_lmstate_u {
00160     struct {
00165         int32 lwid[2];
00166     } lm3g;
00167 } vh_lmstate_t;
00168 
00169 
00170 typedef struct backpointer_s {
00171     int32 score;
00172     int32 pred;
00173 } backpointer_t;
00174 
00178 typedef struct {
00179     backpointer_t path;         
00180     vh_lmstate_t lmstate;       
00181     s3wid_t wid;                
00182     s3frmid_t sf, ef;           
00183     int32 ascr;                 
00184     int32 lscr;                 
00185     int16 type;                 
00186     int16 valid;                
00187     backpointer_t *rc;          
00188     int32 n_rc;                 
00189 } vithist_entry_t;
00190 
00192 #define vithist_entry_wid(ve)   ((ve)->wid)
00193 
00195 #define vithist_entry_sf(ve)    ((ve)->sf)
00196 
00198 #define vithist_entry_ef(ve)    ((ve)->ef)
00199 
00201 #define vithist_entry_ascr(ve)  ((ve)->ascr)
00202 
00204 #define vithist_entry_lscr(ve)  ((ve)->lscr)
00205 
00207 #define vithist_entry_score(ve) ((ve)->path.score)
00208 #define vithist_entry_pred(ve)  ((ve)->path.pred)
00209 #define vithist_entry_valid(ve) ((ve)->valid)
00210 
00211 
00223 typedef struct {                
00224     int32 state;                
00225     int32 vhid;                 
00226     vithist_entry_t *ve;        
00227     glist_t children;           
00229 } vh_lms2vh_t;
00230 
00231 
00239 typedef struct {
00240     vithist_entry_t **entry;    
00241     int32 *frame_start;         
00243     int32 n_entry;              
00244     int32 n_frm;                
00245     int32 n_ci;                 
00246     int32 bghist;               
00249     int32 wbeam;                
00251     int32 *bestscore;           
00252     int32 *bestvh;              
00254     vh_lms2vh_t **lms2vh_root;  
00255     glist_t lwidlist;           
00256 } vithist_t;
00257 
00258 
00259 #define VITHIST_BLKSIZE         16384   /* (1 << 14) */
00260 #define VITHIST_MAXBLKS         256
00261 #define VITHIST_ID2BLK(i)       ((i) >> 14)
00262 #define VITHIST_ID2BLKOFFSET(i) ((i) & 0x00003fff)      /* 14 LSB */
00263 
00267 #define vithist_id2entry(vh,id) ((vh)->entry[VITHIST_ID2BLK(id)] + VITHIST_ID2BLKOFFSET(id))
00268 
00270 #define vithist_n_entry(vh)             ((vh)->n_entry)
00271 
00273 #define vithist_bestscore(vh)           ((vh)->bestscore)
00274 
00276 #define vithist_bestvh(vh)              ((vh)->bestvh)
00277 
00279 #define vithist_lms2vh_root(vh,w)       ((vh)->lms2vh_root[w])
00280 
00282 #define vithist_lwidlist(vh)            ((vh)->lwidlist)
00283 
00285 #define vithist_first_entry(vh,f)       ((vh)->frame_start[f])
00286 
00288 #define vithist_last_entry(vh,f)        ((vh)->frame_start[f+1] - 1)
00289 
00290 
00297 vithist_t *vithist_init(int32 lm_nword, 
00298                         int32 n_ci,     
00299                         int32 wbeam,    
00300                         int32 bghist,   
00301                         int32 report    
00302     );
00303 
00304 
00309 int32 vithist_utt_begin(vithist_t *vh,  
00310                         int32 wid,   
00311                         int32 lwid   
00312     );
00313 
00314 
00320 int32 vithist_utt_end(vithist_t *vh, 
00321                       ngram_model_t *lm,
00322                       s3dict_t *dict,
00323                       dict2pid_t *dict2pid,
00324                       fillpen_t *fp
00325     );
00326 
00327 
00332 int32 vithist_partialutt_end(vithist_t *vh, 
00333                              ngram_model_t *lm,
00334                              s3dict_t *dict
00335     );
00336 
00337 /* Invoked at the end of each utterance to clear up and deallocate space */
00338 void vithist_utt_reset(vithist_t *vh  
00339     );
00340 
00341 
00346 glist_t vithist_backtrace(vithist_t *vh,        
00347                           int32 id,             
00348                           s3dict_t *dict        
00349     );
00350 
00351 
00356 void vithist_enter(vithist_t * vh,              
00357                    s3dict_t *dict,              
00358                    dict2pid_t *dict2pid,        
00359                    vithist_entry_t * tve,       
00360                    int32 comp_rc                
00362     );
00363 
00371 void vithist_rescore(vithist_t *vh,    
00372                      ngram_model_t *lm,  
00373                      s3dict_t *dict,     
00374                      dict2pid_t *dict2pid,
00375                      fillpen_t *fp,    
00376                      s3wid_t wid,      
00377                      int32 ef,          
00378                      int32 score,       
00379                      int32 pred,        
00380                      int32 type,       
00381                      int32 rc          
00382     );
00383 
00384 
00386 void vithist_frame_windup(vithist_t *vh,        
00387                           int32 frm,            
00388                           FILE *fp,             
00390                           ngram_model_t *lm,  
00391                           s3dict_t *dict      
00392     );
00393 
00398 void vithist_prune(vithist_t *vh,      
00399                    s3dict_t *dict,      
00400                    int32 frm,           
00401                    int32 maxwpf,        
00402                    int32 maxhist,       
00403                    int32 beam   
00404     );
00405 
00409 void vithist_dump(vithist_t *vh,      
00410                   int32 frm,          
00412                   ngram_model_t *lm,  
00413                   s3dict_t *dict,     
00414                   FILE *fp            
00415     );
00416 
00417 #if 0
00418 
00421 dag_t *vithist_dag_build(vithist_t * vh, glist_t hyp, s3dict_t * dict, int32 endid,
00422                          cmd_ln_t *config, logmath_t *logmath);
00423 #endif
00424 
00429 void vithist_free(vithist_t *vh         
00430     );
00431 
00432 
00437 void vithist_report(vithist_t *vh       
00438     );
00439 
00444 void vh_lmstate_display(vh_lmstate_t *vhl, 
00445                         s3dict_t *dict 
00446     );
00447 
00451 void vithist_entry_display(vithist_entry_t *ve, 
00452                            s3dict_t* dict  
00453     );
00454 
00455 #if 0
00456 { /* Stop indent from complaining */
00457 #endif
00458 #ifdef __cplusplus
00459 }
00460 #endif
00461 
00462 #endif

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7