00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * vithist.h -- Viterbi history 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1999 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00050 * A Great Reorganzation of header files and executables 00051 * 00052 * Revision 1.12 2006/02/23 16:56:13 arthchan2003 00053 * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH 00054 * 1, Split latticehist_t from flat_fwd.c to here. 00055 * 2, Introduced vithist_entry_cp. This is much better than the direct 00056 * copy we have been using. (Which could cause memory problem easily) 00057 * 00058 * Revision 1.11.4.9 2006/01/16 18:11:39 arthchan2003 00059 * 1, Important Bug fixes, a local pointer is used when realloc is needed. This causes invalid writing of the memory, 2, Acoustic scores of the last segment in IBM lattice generation couldn't be found in the past. Now, this could be handled by the optional acoustic scores in the node of lattice. Application code is not yet checked-in 00060 * 00061 * Revision 1.11.4.8 2005/11/17 06:46:02 arthchan2003 00062 * 3 changes. 1, Code was added for full triphone implementation, not yet working. 2, Senone scale is removed from vithist table. This was a bug introduced during some fixes in CALO. 00063 * 00064 * Revision 1.11.4.7 2005/10/17 04:58:30 arthchan2003 00065 * vithist.c is the true source of memory leaks in the past for full cwtp expansion. There are two changes made to avoid this happen, 1, instead of using ve->rc_info as the indicator whether something should be done, used a flag bFullExpand to control it. 2, avoid doing direct C-struct copy (like *ve = *tve), it becomes the reason of why memory are leaked and why the code goes wrong. 00066 * 00067 * Revision 1.11.4.6 2005/10/07 20:05:05 arthchan2003 00068 * When rescoring in full triphone expansion, the code should use the score for the word end with corret right context. 00069 * 00070 * Revision 1.11.4.5 2005/09/26 06:37:33 arthchan2003 00071 * Before anyone get hurt, quickly change back to using SINGLE_RC_HISTORY. 00072 * 00073 * Revision 1.11.4.4 2005/09/25 19:23:55 arthchan2003 00074 * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code. 00075 * 00076 * Revision 1.11.4.3 2005/09/11 03:00:15 arthchan2003 00077 * All lattice-related functions are not incorporated into vithist. So-called "lattice" is essentially the predecessor of vithist_t and fsg_history_t. Later when vithist_t support by right context score and history. It should replace both of them. 00078 * 00079 * Revision 1.11.4.2 2005/07/26 02:20:39 arthchan2003 00080 * merged hyp_t with srch_hyp_t. 00081 * 00082 * Revision 1.11.4.1 2005/07/04 07:25:22 arthchan2003 00083 * Added vithist_entry_display and vh_lmstate_display in vithist. 00084 * 00085 * Revision 1.11 2005/06/22 02:47:35 arthchan2003 00086 * 1, Added reporting flag for vithist_init. 2, Added a flag to allow using words other than silence to be the last word for backtracing. 3, Fixed doxygen documentation. 4, Add keyword. 00087 * 00088 * Revision 1.10 2005/06/16 04:59:10 archan 00089 * Sphinx3 to s3.generic, a gentle-refactored version of Dave's change in senone scale. 00090 * 00091 * Revision 1.9 2005/06/13 04:02:59 archan 00092 * Fixed most doxygen-style documentation under libs3decoder. 00093 * 00094 * Revision 1.8 2005/05/26 00:46:59 archan 00095 * Added functionalities that such that <sil> will not be inserted at the end of the utterance. 00096 * 00097 * Revision 1.7 2005/04/25 23:53:35 archan 00098 * 1, Some minor modification of vithist_t, vithist_rescore can now support optional LM rescoring, vithist also has its own reporting routine. A new argument -lmrescore is also added in decode and livepretend. This can switch on and off the rescoring procedure. 2, I am reaching the final difficulty of mode 5 implementation. That is, to implement an algorithm which dynamically decide which tree copies should be entered. However, stuffs like score propagation in the leave nodes and non-leaves nodes are already done. 3, As briefly mentioned in 2, implementation of rescoring , which used to happened at leave nodes are now separated. The current implementation is not the most clever one. Wish I have time to change it before check-in to the canonical. 00099 * 00100 * Revision 1.6 2005/04/21 23:50:26 archan 00101 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used. 00102 * 00103 * Revision 1.5 2005/04/20 03:46:30 archan 00104 * factor dag header writer into vithist.[ch], do the corresponding change for lm_t 00105 * 00106 * Revision 1.4 2005/03/30 01:22:47 archan 00107 * Fixed mistakes in last updates. Add 00108 * 00109 * 00110 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) 00111 * Added vithist_free() to free allocated memory 00112 * 00113 * 30-Sep-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00114 * Added vithist_entry_t.ascr. 00115 * 00116 * 13-Aug-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00117 * Added maxwpf handling. 00118 * 00119 * 24-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00120 * Started. 00121 */ 00122 00123 00124 #ifndef _S3_VITHIST_H_ 00125 #define _S3_VITHIST_H_ 00126 00127 #include <stdio.h> 00128 00129 #include <ngram_model.h> 00130 #include <cmd_ln.h> 00131 #include <logmath.h> 00132 #include <glist.h> 00133 00134 #include "s3types.h" 00135 #include "fillpen.h" 00136 #include "s3dict.h" 00137 #include "dict2pid.h" 00138 00147 #ifdef __cplusplus 00148 extern "C" { 00149 #endif 00150 #if 0 00151 } /* Fool Emacs into not indenting things. */ 00152 #endif 00153 00159 typedef union vh_lmstate_u { 00160 struct { 00165 int32 lwid[2]; 00166 } lm3g; 00167 } vh_lmstate_t; 00168 00169 00170 typedef struct backpointer_s { 00171 int32 score; 00172 int32 pred; 00173 } backpointer_t; 00174 00178 typedef struct { 00179 backpointer_t path; 00180 vh_lmstate_t lmstate; 00181 s3wid_t wid; 00182 s3frmid_t sf, ef; 00183 int32 ascr; 00184 int32 lscr; 00185 int16 type; 00186 int16 valid; 00187 backpointer_t *rc; 00188 int32 n_rc; 00189 } vithist_entry_t; 00190 00192 #define vithist_entry_wid(ve) ((ve)->wid) 00193 00195 #define vithist_entry_sf(ve) ((ve)->sf) 00196 00198 #define vithist_entry_ef(ve) ((ve)->ef) 00199 00201 #define vithist_entry_ascr(ve) ((ve)->ascr) 00202 00204 #define vithist_entry_lscr(ve) ((ve)->lscr) 00205 00207 #define vithist_entry_score(ve) ((ve)->path.score) 00208 #define vithist_entry_pred(ve) ((ve)->path.pred) 00209 #define vithist_entry_valid(ve) ((ve)->valid) 00210 00211 00223 typedef struct { 00224 int32 state; 00225 int32 vhid; 00226 vithist_entry_t *ve; 00227 glist_t children; 00229 } vh_lms2vh_t; 00230 00231 00239 typedef struct { 00240 vithist_entry_t **entry; 00241 int32 *frame_start; 00243 int32 n_entry; 00244 int32 n_frm; 00245 int32 n_ci; 00246 int32 bghist; 00249 int32 wbeam; 00251 int32 *bestscore; 00252 int32 *bestvh; 00254 vh_lms2vh_t **lms2vh_root; 00255 glist_t lwidlist; 00256 } vithist_t; 00257 00258 00259 #define VITHIST_BLKSIZE 16384 /* (1 << 14) */ 00260 #define VITHIST_MAXBLKS 256 00261 #define VITHIST_ID2BLK(i) ((i) >> 14) 00262 #define VITHIST_ID2BLKOFFSET(i) ((i) & 0x00003fff) /* 14 LSB */ 00263 00267 #define vithist_id2entry(vh,id) ((vh)->entry[VITHIST_ID2BLK(id)] + VITHIST_ID2BLKOFFSET(id)) 00268 00270 #define vithist_n_entry(vh) ((vh)->n_entry) 00271 00273 #define vithist_bestscore(vh) ((vh)->bestscore) 00274 00276 #define vithist_bestvh(vh) ((vh)->bestvh) 00277 00279 #define vithist_lms2vh_root(vh,w) ((vh)->lms2vh_root[w]) 00280 00282 #define vithist_lwidlist(vh) ((vh)->lwidlist) 00283 00285 #define vithist_first_entry(vh,f) ((vh)->frame_start[f]) 00286 00288 #define vithist_last_entry(vh,f) ((vh)->frame_start[f+1] - 1) 00289 00290 00297 vithist_t *vithist_init(int32 lm_nword, 00298 int32 n_ci, 00299 int32 wbeam, 00300 int32 bghist, 00301 int32 report 00302 ); 00303 00304 00309 int32 vithist_utt_begin(vithist_t *vh, 00310 int32 wid, 00311 int32 lwid 00312 ); 00313 00314 00320 int32 vithist_utt_end(vithist_t *vh, 00321 ngram_model_t *lm, 00322 s3dict_t *dict, 00323 dict2pid_t *dict2pid, 00324 fillpen_t *fp 00325 ); 00326 00327 00332 int32 vithist_partialutt_end(vithist_t *vh, 00333 ngram_model_t *lm, 00334 s3dict_t *dict 00335 ); 00336 00337 /* Invoked at the end of each utterance to clear up and deallocate space */ 00338 void vithist_utt_reset(vithist_t *vh 00339 ); 00340 00341 00346 glist_t vithist_backtrace(vithist_t *vh, 00347 int32 id, 00348 s3dict_t *dict 00349 ); 00350 00351 00356 void vithist_enter(vithist_t * vh, 00357 s3dict_t *dict, 00358 dict2pid_t *dict2pid, 00359 vithist_entry_t * tve, 00360 int32 comp_rc 00362 ); 00363 00371 void vithist_rescore(vithist_t *vh, 00372 ngram_model_t *lm, 00373 s3dict_t *dict, 00374 dict2pid_t *dict2pid, 00375 fillpen_t *fp, 00376 s3wid_t wid, 00377 int32 ef, 00378 int32 score, 00379 int32 pred, 00380 int32 type, 00381 int32 rc 00382 ); 00383 00384 00386 void vithist_frame_windup(vithist_t *vh, 00387 int32 frm, 00388 FILE *fp, 00390 ngram_model_t *lm, 00391 s3dict_t *dict 00392 ); 00393 00398 void vithist_prune(vithist_t *vh, 00399 s3dict_t *dict, 00400 int32 frm, 00401 int32 maxwpf, 00402 int32 maxhist, 00403 int32 beam 00404 ); 00405 00409 void vithist_dump(vithist_t *vh, 00410 int32 frm, 00412 ngram_model_t *lm, 00413 s3dict_t *dict, 00414 FILE *fp 00415 ); 00416 00417 #if 0 00418 00421 dag_t *vithist_dag_build(vithist_t * vh, glist_t hyp, s3dict_t * dict, int32 endid, 00422 cmd_ln_t *config, logmath_t *logmath); 00423 #endif 00424 00429 void vithist_free(vithist_t *vh 00430 ); 00431 00432 00437 void vithist_report(vithist_t *vh 00438 ); 00439 00444 void vh_lmstate_display(vh_lmstate_t *vhl, 00445 s3dict_t *dict 00446 ); 00447 00451 void vithist_entry_display(vithist_entry_t *ve, 00452 s3dict_t* dict 00453 ); 00454 00455 #if 0 00456 { /* Stop indent from complaining */ 00457 #endif 00458 #ifdef __cplusplus 00459 } 00460 #endif 00461 00462 #endif
1.6.1