00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * lextree.h -- 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1999 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00050 * A Great Reorganzation of header files and executables 00051 * 00052 * Revision 1.11 2006/02/23 15:08:24 arthchan2003 00053 * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH: 00054 * 00055 * 1, Fixed memory leaks. 00056 * 2, Add logic for full triphone expansion. At this point, the 00057 * propagation of scores in word end is still incorrect. So composite 00058 * triphone should still be used by default. 00059 * 3, Removed lextree_copies_hmm_propagate. 00060 * 00061 * Revision 1.10.4.6 2005/11/17 06:28:50 arthchan2003 00062 * Changed the code to used compressed triphones. Not yet correct at this point 00063 * 00064 * Revision 1.10.4.5 2005/10/17 04:53:44 arthchan2003 00065 * Shrub the trees so that the run-time memory could be controlled. 00066 * 00067 * Revision 1.10.4.4 2005/10/07 19:34:31 arthchan2003 00068 * In full cross-word triphones expansion, the previous implementation has several flaws, e.g, 1, it didn't consider the phone beam on cross word triphones. 2, Also, when the cross word triphone phone is used, children of the last phones will be regarded as cross word triphone. So, the last phone should not be evaluated at all. Last implementation has not safe-guaded that. 3, The rescoring for language model is not done correctly. What we still need to do: a, test the algorithm in more databases. b, implement some speed up schemes. 00069 * 00070 * Revision 1.10.4.2 2005/09/25 19:23:55 arthchan2003 00071 * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code. 00072 * 00073 * Revision 1.10.4.1 2005/06/27 05:37:05 arthchan2003 00074 * Incorporated several fixes to the search. 1, If a tree is empty, it will be removed and put back to the pool of tree, so number of trees will not be always increasing. 2, In the previous search, the answer is always "STOP P I T G S B U R G H </s>"and filler words never occurred in the search. The reason is very simple, fillers was not properly propagated in the search at all <**exculamation**> This version fixed this problem. The current search will give <sil> P I T T S B U R G H </sil> </s> to me. This I think it looks much better now. 00075 * 00076 * Revision 1.10 2005/06/21 23:32:58 arthchan2003 00077 * Log. Introduced lextree_init and filler_init to wrap up lextree_build 00078 * process. Split the hmm propagation to propagation for leaves and 00079 * non-leaves node. This allows an easier time for turning off the 00080 * rescoring stage. However, the implementation is not clever enough. One 00081 * should split the array to leave array and non-leave array. 00082 * 00083 * Revision 1.7 2005/06/16 04:59:10 archan 00084 * Sphinx3 to s3.generic, a gentle-refactored version of Dave's change in senone scale. 00085 * 00086 * Revision 1.6 2005/06/13 04:02:59 archan 00087 * Fixed most doxygen-style documentation under libs3decoder. 00088 * 00089 * Revision 1.5 2005/04/25 23:53:35 archan 00090 * 1, Some minor modification of vithist_t, vithist_rescore can now support optional LM rescoring, vithist also has its own reporting routine. A new argument -lmrescore is also added in decode and livepretend. This can switch on and off the rescoring procedure. 2, I am reaching the final difficulty of mode 5 implementation. That is, to implement an algorithm which dynamically decide which tree copies should be entered. However, stuffs like score propagation in the leave nodes and non-leaves nodes are already done. 3, As briefly mentioned in 2, implementation of rescoring , which used to happened at leave nodes are now separated. The current implementation is not the most clever one. Wish I have time to change it before check-in to the canonical. 00091 * 00092 * Revision 1.4 2005/04/25 19:22:47 archan 00093 * Refactor out the code of rescoring from lexical tree. Potentially we want to turn off the rescoring if we need. 00094 * 00095 * Revision 1.3 2005/03/30 01:22:47 archan 00096 * Fixed mistakes in last updates. Add 00097 * 00098 * 00099 * 29-Feb-2000 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00100 * Modified some functions to be able to deal with HMMs with any number 00101 * of states. 00102 * 00103 * 07-Jul-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00104 * Added lextree_node_t.ci and lextree_ci_active(). 00105 * 00106 * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00107 * Started. 00108 */ 00109 00110 00111 #ifndef _S3_LEXTREE_H_ 00112 #define _S3_LEXTREE_H_ 00113 00114 #include <stdio.h> 00115 00116 #include <bitvec.h> 00117 #include <ngram_model.h> 00118 #include <glist.h> 00119 00120 #include "s3types.h" 00121 #include "hmm.h" 00122 #include "tmat.h" 00123 #include "vithist.h" 00124 #include "s3dict.h" 00125 #include "bin_mdef.h" 00126 #include "fillpen.h" 00127 00128 #ifdef __cplusplus 00129 extern "C" { 00130 #endif 00131 #if 0 00132 } /* Fool Emacs into not indenting things. */ 00133 #endif 00134 00135 #define LEXTREE_OPERATION_SUCCESS 1 00136 #define LEXTREE_OPERATION_FAILURE 0 00137 00138 #define LEXTREE_TYPE_FILLER -1 00139 #define LEXTREE_TYPE_UNIGRAM 0 00140 #define LEXTREE_TYPE_BIGRAM 1 00141 #define LEXTREE_TYPE_TRIGRAM 2 00142 00143 #if 0 /*Number reserved but not used */ 00144 #define LEXTREE_TYPE_QUADGRAM 3 00145 #define LEXTREE_TYPE_QUINGRAM 4 00146 #endif 00147 00148 00180 /* 00181 * \struct lextree_node_t 00182 * One node in a lextree. 00183 */ 00184 typedef struct { 00185 hmm_t hmm; 00187 glist_t children; 00201 int32 wid; 00202 int32 prob; 00203 int32 ssid; 00204 s3cipid_t rc; 00206 s3cipid_t ci; 00207 int8 composite; 00208 } lextree_node_t; 00209 00210 /* Access macros; not meant for arbitrary use */ 00211 #define lextree_node_wid(n) ((n)->wid) 00212 #define lextree_node_prob(n) ((n)->prob) 00213 #define lextree_node_ssid(n) ((n)->ssid) 00214 #define lextree_node_rc(n) ((n)->rc) 00215 #define lextree_node_composite(n) ((n)->composite) 00216 #define lextree_node_frame(n) ((n)->frame) 00217 00218 00219 /* 00220 * \struct lextree_lcroot_t 00221 * Root nodes of the lextree valid for a given left context CIphone. 00222 */ 00223 typedef struct { 00224 s3cipid_t lc; /* Left context CIphone */ 00225 glist_t root; /* Its data.ptr are the root nodes (lextree_node_t *) of interest; subset 00226 of the entire lextree roots */ 00227 } lextree_lcroot_t; 00228 00229 /* 00230 * \struct lextree_t 00231 * Entire lextree. 00232 */ 00233 typedef struct { 00234 int32 type; 00236 glist_t root; 00237 lextree_lcroot_t *lcroot; 00239 int32 n_lc; 00240 int32 n_node; 00241 int32 n_alloc_node; 00242 int32 n_alloc_blk_sz; 00244 bin_mdef_t *mdef; 00245 s3dict_t *dict; 00246 dict2pid_t *dict2pid; 00247 fillpen_t *fp; 00248 ngram_model_t *lm; 00249 tmat_t *tmat; 00251 hmm_context_t *ctx; 00252 hmm_context_t *comctx; 00254 lextree_node_t **active; 00255 lextree_node_t **next_active; 00257 int32 n_active; 00258 int32 n_next_active; 00260 int32 best; 00261 int32 wbest; 00263 char prev_word[100]; 00264 } lextree_t; 00265 00266 /* Access macros; not meant for arbitrary usage */ 00267 #define lextree_type(l) ((l)->type) 00268 #define lextree_root(l) ((l)->root) 00269 #define lextree_lcroot(l) ((l)->lcroot) 00270 #define lextree_n_lc(l) ((l)->n_lc) 00271 #define lextree_n_node(l) ((l)->n_node) 00272 #define lextree_n_alloc_node(l) ((l)->n_alloc_node) 00273 #define lextree_active(l) ((l)->active) 00274 #define lextree_next_active(l) ((l)->next_active) 00275 #define lextree_n_active(l) ((l)->n_active) 00276 #define lextree_n_next_active(l) ((l)->n_next_active) 00277 00278 00282 lextree_t* lextree_init( 00283 bin_mdef_t *mdef, 00284 tmat_t *tmat, 00285 s3dict_t *dict, 00286 dict2pid_t *dict2pid, 00287 fillpen_t *fp, 00288 ngram_model_t* lm, 00289 const char *lmname, 00290 int32 istreeUgProb, 00291 int32 bReport, 00292 int32 type 00293 ); 00294 00297 lextree_t* fillertree_init(bin_mdef_t *mdef, tmat_t *tmat, s3dict_t *dict, 00298 dict2pid_t *dict2pid, fillpen_t *fillpen); 00299 00300 00303 void lextree_report( 00304 lextree_t *ltree 00305 ); 00306 00310 typedef struct { 00311 s3wid_t wid; 00312 int32 prob; 00313 } wordprob_t; 00314 00315 /* Free a lextree that was created by lextree_init */ 00316 void lextree_free (lextree_t *lextree); 00317 00322 void lextree_utt_end (lextree_t *l); 00323 00324 00328 void lextree_enter (lextree_t *lextree, 00329 s3cipid_t lc, 00330 int32 frame, 00331 int32 inscore, 00332 int32 inhist, 00333 int32 thresh 00335 ); 00336 00342 void lextree_active_swap (lextree_t *lextree 00343 ); 00344 00345 00350 void lextree_ssid_active (lextree_t *lextree, 00351 bitvec_t *ssid, 00353 bitvec_t *comssid 00355 ); 00356 00360 void lextree_ci_active (lextree_t *lextree, 00361 bitvec_t *ci_active 00362 ); 00363 00368 int32 lextree_hmm_eval (lextree_t *lextree, 00370 int16 *senscr, 00371 int16 *comsen, 00372 int32 f, 00373 FILE *fp 00374 ); 00375 00376 /* 00377 * ARCHAN: Starting from Sphinx 3.6, lextree_hmm_propagate is separated to 00378 * lextree_hmm_propagate_non_leaves and lextree_hmm_propagate_leaves. This allows 00379 * the rescoring routine to be more easily switched off 00380 */ 00381 00406 int32 lextree_hmm_propagate_non_leaves (lextree_t *lextree, 00408 int32 cf, 00409 int32 th, 00410 int32 pth, 00411 int32 wth 00412 ); 00413 00414 00425 int32 lextree_hmm_propagate_leaves (lextree_t *lextree, 00427 vithist_t *vh, 00429 int32 cf, 00430 int32 wth 00431 ); 00432 00433 00440 void lextree_hmm_histbin (lextree_t *lextree, 00441 int32 bestscr, 00442 int32 *bin, 00444 int32 nbin, 00445 int32 bw 00446 ); 00447 00449 void lextree_dump (lextree_t *lextree, 00450 FILE *fp, 00451 int32 fmt 00452 ); 00453 00455 int32 num_lextree_links(lextree_t *ltree 00456 ); 00457 #if 0 00458 { /* Stop indent from complaining */ 00459 #endif 00460 #ifdef __cplusplus 00461 } 00462 #endif 00463 00464 #endif
1.6.1