00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00042
00043 #include <string.h>
00044 #include <assert.h>
00045
00046
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 #include <err.h>
00050
00051
00052 #include "ngram_search_fwdtree.h"
00053 #include "phone_loop_search.h"
00054
00055
00056 #define __CHAN_DUMP__ 0
00057 #if __CHAN_DUMP__
00058 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00059 #else
00060 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00061 #endif
00062
00063
00064
00065
00066
00067 static void
00068 init_search_tree(ngram_search_t *ngs)
00069 {
00070 int32 w, ndiph, i, n_words, n_ci;
00071 s3dict_t *dict = ps_search_dict(ngs);
00072 bitvec_t *dimap;
00073
00074 n_words = ps_search_n_words(ngs);
00075 ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set));
00076
00077
00078 ndiph = 0;
00079 ngs->n_1ph_words = 0;
00080 n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
00081
00082 dimap = bitvec_alloc(n_ci * n_ci);
00083 for (w = 0; w < n_words; w++) {
00084 if (!s3dict_real_word(dict, w))
00085 continue;
00086 if (s3dict_pronlen(dict, w) == 1)
00087 ++ngs->n_1ph_words;
00088 else {
00089 int ph0, ph1;
00090 ph0 = s3dict_pron(dict, w, 0);
00091 ph1 = s3dict_pron(dict, w, 1);
00092
00093 if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
00094 bitvec_set(dimap, ph0 * n_ci + ph1);
00095 ++ndiph;
00096 }
00097 }
00098 }
00099 E_INFO("%d unique initial diphones\n", ndiph);
00100 bitvec_free(dimap);
00101
00102
00103 ngs->n_1ph_words += s3dict_num_fillers(dict) + 2;
00104 ngs->n_root_chan_alloc = ndiph + 1;
00105
00106
00107 for (w = 0; w < n_words; ++w) {
00108 if (s3dict_real_word(dict, w))
00109 continue;
00110 if (s3dict_pronlen(dict, w) != 1) {
00111 E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
00112 w, s3dict_wordstr(dict, w));
00113 --ngs->n_1ph_words;
00114 }
00115 }
00116
00117
00118 ngs->root_chan =
00119 ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
00120 for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00121 hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1);
00122 ngs->root_chan[i].penult_phn_wid = -1;
00123 ngs->root_chan[i].next = NULL;
00124 }
00125
00126
00127
00128 ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00129 i = 0;
00130 for (w = 0; w < n_words; w++) {
00131 if (s3dict_pronlen(dict, w) != 1)
00132 continue;
00133
00134 ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
00135 ngs->rhmm_1ph[i].ciphone = s3dict_first_phone(dict, w);
00136 hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
00137 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone),
00138 ngs->rhmm_1ph[i].ciphone);
00139 ngs->rhmm_1ph[i].next = NULL;
00140
00141 ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00142 i++;
00143 }
00144
00145 ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
00146 sizeof(*ngs->single_phone_wid));
00147 E_INFO("%d root, %d non-root channels, %d single-phone words\n",
00148 ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00149 }
00150
00151
00152
00153
00154 static void
00155 init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci)
00156 {
00157 hmm->next = NULL;
00158 hmm->alt = NULL;
00159 hmm->info.penult_phn_wid = -1;
00160 hmm->ciphone = ci;
00161 hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, ci);
00162 }
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174 static void
00175 create_search_tree(ngram_search_t *ngs)
00176 {
00177 chan_t *hmm;
00178 root_chan_t *rhmm;
00179 int32 w, i, j, p, ph;
00180 int32 n_words;
00181 s3dict_t *dict = ps_search_dict(ngs);
00182 dict2pid_t *d2p = ps_search_dict2pid(ngs);
00183
00184 n_words = ps_search_n_words(ngs);
00185
00186 E_INFO("Creating search tree\n");
00187
00188 for (w = 0; w < n_words; w++)
00189 ngs->homophone_set[w] = -1;
00190
00191 E_INFO("before: %d root, %d non-root channels, %d single-phone words\n",
00192 ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00193
00194 ngs->n_1ph_LMwords = 0;
00195 ngs->n_root_chan = 0;
00196 ngs->n_nonroot_chan = 0;
00197
00198 for (w = 0; w < n_words; w++) {
00199 int ciphone, ci2phone;
00200
00201
00202 if (!ngram_model_set_known_wid(ngs->lmset, s3dict_basewid(dict, w)))
00203 continue;
00204
00205
00206 if (s3dict_pronlen(dict, w) == 1) {
00207 E_DEBUG(1,("single_phone_wid[%d] = %s\n",
00208 ngs->n_1ph_LMwords, s3dict_wordstr(dict, w)));
00209 ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
00210 continue;
00211 }
00212
00213
00214
00215 ciphone = s3dict_pron(dict, w, 0);
00216 ci2phone = s3dict_pron(dict, w, 1);
00217 for (i = 0; i < ngs->n_root_chan; ++i) {
00218 if (ngs->root_chan[i].ciphone == ciphone
00219 && ngs->root_chan[i].ci2phone == ci2phone)
00220 break;
00221 }
00222 if (i == ngs->n_root_chan) {
00223 rhmm = &(ngs->root_chan[ngs->n_root_chan]);
00224 rhmm->hmm.tmatid = ciphone;
00225
00226 hmm_mpx_ssid(&rhmm->hmm, 0) =
00227 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
00228 rhmm->ciphone = ciphone;
00229 rhmm->ci2phone = ci2phone;
00230 ngs->n_root_chan++;
00231 }
00232 else
00233 rhmm = &(ngs->root_chan[i]);
00234
00235 E_DEBUG(2, ("word %s rhmm %d\n", s3dict_wordstr(dict, w), rhmm - ngs->root_chan));
00236
00237 if (s3dict_pronlen(dict, w) == 2) {
00238
00239 if ((j = rhmm->penult_phn_wid) < 0)
00240 rhmm->penult_phn_wid = w;
00241 else {
00242 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00243 ngs->homophone_set[j] = w;
00244 }
00245 }
00246 else {
00247
00248 ph = dict2pid_internal(d2p, w, 1);
00249 hmm = rhmm->next;
00250 if (hmm == NULL) {
00251 rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
00252 init_nonroot_chan(ngs, hmm, ph, s3dict_pron(dict, w, 1));
00253 ngs->n_nonroot_chan++;
00254 }
00255 else {
00256 chan_t *prev_hmm = NULL;
00257
00258 for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt)
00259 prev_hmm = hmm;
00260 if (!hmm) {
00261 prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00262 init_nonroot_chan(ngs, hmm, ph, s3dict_pron(dict, w, 1));
00263 ngs->n_nonroot_chan++;
00264 }
00265 }
00266 E_DEBUG(3,("phone %s = %d\n",
00267 bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
00268 s3dict_pron(dict, w, 1)), ph));
00269
00270 for (p = 2; p < s3dict_pronlen(dict, w) - 1; p++) {
00271 ph = dict2pid_internal(d2p, w, p);
00272 if (!hmm->next) {
00273 hmm->next = listelem_malloc(ngs->chan_alloc);
00274 hmm = hmm->next;
00275 init_nonroot_chan(ngs, hmm, ph, s3dict_pron(dict, w, p));
00276 ngs->n_nonroot_chan++;
00277 }
00278 else {
00279 chan_t *prev_hmm = NULL;
00280
00281 for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
00282 hmm = hmm->alt)
00283 prev_hmm = hmm;
00284 if (!hmm) {
00285 prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00286 init_nonroot_chan(ngs, hmm, ph, s3dict_pron(dict, w, p));
00287 ngs->n_nonroot_chan++;
00288 }
00289 }
00290 E_DEBUG(3,("phone %s = %d\n",
00291 bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
00292 s3dict_pron(dict, w, p)), ph));
00293 }
00294
00295
00296 if ((j = hmm->info.penult_phn_wid) < 0)
00297 hmm->info.penult_phn_wid = w;
00298 else {
00299 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00300 ngs->homophone_set[j] = w;
00301 }
00302 }
00303 }
00304
00305 ngs->n_1ph_words = ngs->n_1ph_LMwords;
00306
00307
00308 for (w = 0; w < n_words; ++w) {
00309
00310 if (s3dict_pronlen(dict, w) != 1)
00311 continue;
00312
00313 if (s3dict_real_word(dict, w))
00314 continue;
00315 if (ngram_model_set_known_wid(ngs->lmset, s3dict_basewid(dict, w)))
00316 continue;
00317 E_DEBUG(1,("single_phone_wid[%d] = %s\n",
00318 ngs->n_1ph_words, s3dict_wordstr(dict, w)));
00319 ngs->single_phone_wid[ngs->n_1ph_words++] = w;
00320 }
00321
00322 if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
00323
00324 ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
00325 E_INFO("after: max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
00326
00327
00328 if (ngs->active_chan_list)
00329 ckd_free_2d(ngs->active_chan_list);
00330 ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
00331 sizeof(**ngs->active_chan_list));
00332 }
00333
00334 E_INFO("after: %d root, %d non-root channels, %d single-phone words\n",
00335 ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00336 }
00337
00338 static void
00339 reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
00340 {
00341 chan_t *child, *sibling;
00342
00343
00344 for (child = hmm->next; child; child = sibling) {
00345 sibling = child->alt;
00346 reinit_search_subtree(ngs, child);
00347 }
00348
00349
00350 hmm_deinit(&hmm->hmm);
00351 listelem_free(ngs->chan_alloc, hmm);
00352 }
00353
00354
00355
00356
00357
00358 static void
00359 reinit_search_tree(ngram_search_t *ngs)
00360 {
00361 int32 i;
00362 chan_t *hmm, *sibling;
00363
00364 for (i = 0; i < ngs->n_root_chan; i++) {
00365 hmm = ngs->root_chan[i].next;
00366
00367 while (hmm) {
00368 sibling = hmm->alt;
00369 reinit_search_subtree(ngs, hmm);
00370 hmm = sibling;
00371 }
00372
00373 ngs->root_chan[i].penult_phn_wid = -1;
00374 ngs->root_chan[i].next = NULL;
00375 }
00376 ngs->n_nonroot_chan = 0;
00377 }
00378
00379 void
00380 ngram_fwdtree_init(ngram_search_t *ngs)
00381 {
00382
00383 ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
00384 sizeof(*ngs->bestbp_rc));
00385 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
00386 sizeof(*ngs->lastphn_cand));
00387 init_search_tree(ngs);
00388 create_search_tree(ngs);
00389 }
00390
00391 void
00392 ngram_fwdtree_deinit(ngram_search_t *ngs)
00393 {
00394 int i, w, n_words;
00395
00396 n_words = ps_search_n_words(ngs);
00397
00398 reinit_search_tree(ngs);
00399
00400
00401 for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00402 hmm_deinit(&ngs->root_chan[i].hmm);
00403 }
00404 if (ngs->rhmm_1ph) {
00405 for (i = w = 0; w < n_words; ++w) {
00406 if (s3dict_pronlen(ps_search_dict(ngs), w) != 1)
00407 continue;
00408 hmm_deinit(&ngs->rhmm_1ph[i].hmm);
00409 ++i;
00410 }
00411 ckd_free(ngs->rhmm_1ph);
00412 }
00413 ngs->n_nonroot_chan = 0;
00414 ckd_free(ngs->root_chan);
00415 ckd_free(ngs->homophone_set);
00416 ckd_free(ngs->single_phone_wid);
00417 ngs->max_nonroot_chan = 0;
00418 ckd_free_2d(ngs->active_chan_list);
00419 ckd_free(ngs->cand_sf);
00420 ckd_free(ngs->bestbp_rc);
00421 ckd_free(ngs->lastphn_cand);
00422 }
00423
00424 int
00425 ngram_fwdtree_reinit(ngram_search_t *ngs)
00426 {
00427 reinit_search_tree(ngs);
00428 create_search_tree(ngs);
00429 return 0;
00430 }
00431
00432 void
00433 ngram_fwdtree_start(ngram_search_t *ngs)
00434 {
00435 ps_search_t *base = (ps_search_t *)ngs;
00436 int32 i, w, n_words;
00437 root_chan_t *rhmm;
00438
00439 n_words = ps_search_n_words(ngs);
00440
00441
00442 memset(&ngs->st, 0, sizeof(ngs->st));
00443
00444
00445 ngs->bpidx = 0;
00446 ngs->bss_head = 0;
00447
00448
00449 for (i = 0; i < n_words; ++i)
00450 ngs->word_lat_idx[i] = NO_BP;
00451
00452
00453 ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
00454 ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
00455
00456
00457 ngs->best_score = 0;
00458 ngs->renormalized = 0;
00459
00460
00461 for (i = 0; i < n_words; i++)
00462 ngs->last_ltrans[i].sf = -1;
00463 ngs->n_frame = 0;
00464
00465
00466 ckd_free(base->hyp_str);
00467 base->hyp_str = NULL;
00468
00469
00470
00471 for (i = 0; i < ngs->n_1ph_words; i++) {
00472 w = ngs->single_phone_wid[i];
00473 rhmm = (root_chan_t *) ngs->word_chan[w];
00474 hmm_clear(&rhmm->hmm);
00475 }
00476
00477
00478 rhmm = (root_chan_t *) ngs->word_chan[s3dict_startwid(ps_search_dict(ngs))];
00479 hmm_clear(&rhmm->hmm);
00480 hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00481 }
00482
00483
00484
00485
00486
00487 static void
00488 compute_sen_active(ngram_search_t *ngs, int frame_idx)
00489 {
00490 root_chan_t *rhmm;
00491 chan_t *hmm, **acl;
00492 int32 i, w, *awl;
00493
00494 acmod_clear_active(ps_search_acmod(ngs));
00495
00496
00497 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00498 if (hmm_frame(&rhmm->hmm) == frame_idx)
00499 acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00500 }
00501
00502
00503 i = ngs->n_active_chan[frame_idx & 0x1];
00504 acl = ngs->active_chan_list[frame_idx & 0x1];
00505 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00506 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00507 }
00508
00509
00510 i = ngs->n_active_word[frame_idx & 0x1];
00511 awl = ngs->active_word_list[frame_idx & 0x1];
00512 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00513 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00514 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00515 }
00516 }
00517 for (i = 0; i < ngs->n_1ph_words; i++) {
00518 w = ngs->single_phone_wid[i];
00519 rhmm = (root_chan_t *) ngs->word_chan[w];
00520
00521 if (hmm_frame(&rhmm->hmm) == frame_idx)
00522 acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00523 }
00524 }
00525
00526 static void
00527 renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00528 {
00529 root_chan_t *rhmm;
00530 chan_t *hmm, **acl;
00531 int32 i, w, *awl;
00532
00533
00534 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00535 if (hmm_frame(&rhmm->hmm) == frame_idx) {
00536 hmm_normalize(&rhmm->hmm, norm);
00537 }
00538 }
00539
00540
00541 i = ngs->n_active_chan[frame_idx & 0x1];
00542 acl = ngs->active_chan_list[frame_idx & 0x1];
00543 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00544 hmm_normalize(&hmm->hmm, norm);
00545 }
00546
00547
00548 i = ngs->n_active_word[frame_idx & 0x1];
00549 awl = ngs->active_word_list[frame_idx & 0x1];
00550 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00551 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00552 hmm_normalize(&hmm->hmm, norm);
00553 }
00554 }
00555 for (i = 0; i < ngs->n_1ph_words; i++) {
00556 w = ngs->single_phone_wid[i];
00557 rhmm = (root_chan_t *) ngs->word_chan[w];
00558 if (hmm_frame(&rhmm->hmm) == frame_idx) {
00559 hmm_normalize(&rhmm->hmm, norm);
00560 }
00561 }
00562
00563 ngs->renormalized = TRUE;
00564 }
00565
00566 static int32
00567 eval_root_chan(ngram_search_t *ngs, int frame_idx)
00568 {
00569 root_chan_t *rhmm;
00570 int32 i, bestscore;
00571
00572 bestscore = WORST_SCORE;
00573 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00574 if (hmm_frame(&rhmm->hmm) == frame_idx) {
00575 int32 score = chan_v_eval(rhmm);
00576 if (score BETTER_THAN bestscore)
00577 bestscore = score;
00578 ++ngs->st.n_root_chan_eval;
00579 }
00580 }
00581 return (bestscore);
00582 }
00583
00584 static int32
00585 eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00586 {
00587 chan_t *hmm, **acl;
00588 int32 i, bestscore;
00589
00590 i = ngs->n_active_chan[frame_idx & 0x1];
00591 acl = ngs->active_chan_list[frame_idx & 0x1];
00592 bestscore = WORST_SCORE;
00593 ngs->st.n_nonroot_chan_eval += i;
00594
00595 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00596 int32 score = chan_v_eval(hmm);
00597 assert(hmm_frame(&hmm->hmm) == frame_idx);
00598 if (score BETTER_THAN bestscore)
00599 bestscore = score;
00600 }
00601
00602 return bestscore;
00603 }
00604
00605 static int32
00606 eval_word_chan(ngram_search_t *ngs, int frame_idx)
00607 {
00608 root_chan_t *rhmm;
00609 chan_t *hmm;
00610 int32 i, w, bestscore, *awl, j, k;
00611
00612 k = 0;
00613 bestscore = WORST_SCORE;
00614 awl = ngs->active_word_list[frame_idx & 0x1];
00615
00616 i = ngs->n_active_word[frame_idx & 0x1];
00617 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00618 assert(bitvec_is_set(ngs->word_active, w));
00619 bitvec_clear(ngs->word_active, w);
00620 assert(ngs->word_chan[w] != NULL);
00621
00622 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00623 int32 score;
00624
00625 assert(hmm_frame(&hmm->hmm) == frame_idx);
00626 score = chan_v_eval(hmm);
00627
00628
00629 if (score BETTER_THAN bestscore)
00630 bestscore = score;
00631
00632 k++;
00633 }
00634 }
00635
00636
00637 j = 0;
00638 for (i = 0; i < ngs->n_1ph_words; i++) {
00639 int32 score;
00640
00641 w = ngs->single_phone_wid[i];
00642 rhmm = (root_chan_t *) ngs->word_chan[w];
00643 if (hmm_frame(&rhmm->hmm) < frame_idx)
00644 continue;
00645
00646 score = chan_v_eval(rhmm);
00647
00648 if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
00649 bestscore = score;
00650
00651 j++;
00652 }
00653
00654 ngs->st.n_last_chan_eval += k + j;
00655 ngs->st.n_nonroot_chan_eval += k + j;
00656 ngs->st.n_word_lastchan_eval +=
00657 ngs->n_active_word[frame_idx & 0x1] + j;
00658
00659 return bestscore;
00660 }
00661
00662 static int32
00663 evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
00664 {
00665 int32 bs;
00666
00667 hmm_context_set_senscore(ngs->hmmctx, senone_scores);
00668 ngs->best_score = eval_root_chan(ngs, frame_idx);
00669 if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
00670 ngs->best_score = bs;
00671 if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
00672 ngs->best_score = bs;
00673 ngs->last_phone_best_score = bs;
00674
00675 return ngs->best_score;
00676 }
00677
00678
00679
00680
00681
00682
00683 static void
00684 prune_root_chan(ngram_search_t *ngs, int frame_idx)
00685 {
00686 root_chan_t *rhmm;
00687 chan_t *hmm;
00688 int32 i, nf, w;
00689 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00690 chan_t **nacl;
00691 lastphn_cand_t *candp;
00692 phone_loop_search_t *pls;
00693
00694 nf = frame_idx + 1;
00695 thresh = ngs->best_score + ngs->dynamic_beam;
00696 newphone_thresh = ngs->best_score + ngs->pbeam;
00697 lastphn_thresh = ngs->best_score + ngs->lpbeam;
00698 nacl = ngs->active_chan_list[nf & 0x1];
00699 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
00700
00701 for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
00702 E_DEBUG(3,("Root channel %d frame %d score %d thresh %d\n",
00703 i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh));
00704
00705 if (hmm_frame(&rhmm->hmm) < frame_idx)
00706 continue;
00707
00708 if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
00709 hmm_frame(&rhmm->hmm) = nf;
00710 E_DEBUG(3, ("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)));
00711
00712
00713 for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
00714 newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip
00715 + phone_loop_search_score(pls, hmm->ciphone);
00716 if (newphone_score BETTER_THAN newphone_thresh) {
00717 if ((hmm_frame(&hmm->hmm) < frame_idx)
00718 || (newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) {
00719 hmm_enter(&hmm->hmm, newphone_score,
00720 hmm_out_history(&rhmm->hmm), nf);
00721 *(nacl++) = hmm;
00722 }
00723 }
00724 }
00725
00726
00727
00728
00729
00730
00731 for (w = rhmm->penult_phn_wid; w >= 0;
00732 w = ngs->homophone_set[w]) {
00733 newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip
00734 + phone_loop_search_score(pls, s3dict_last_phone(ps_search_dict(ngs),w));
00735 E_DEBUG(3, ("wid %d newphone_score %d\n", w, newphone_score));
00736 E_DEBUG(3, ("wid %d newphone_score %d\n", w, newphone_score));
00737 if (newphone_score BETTER_THAN lastphn_thresh) {
00738 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00739 ngs->n_lastphn_cand++;
00740 candp->wid = w;
00741 candp->score =
00742 newphone_score - ngs->nwpen;
00743 candp->bp = hmm_out_history(&rhmm->hmm);
00744 }
00745 }
00746 }
00747 }
00748 ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00749 }
00750
00751
00752
00753
00754
00755 static void
00756 prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00757 {
00758 chan_t *hmm, *nexthmm;
00759 int32 nf, w, i;
00760 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00761 chan_t **acl, **nacl;
00762 lastphn_cand_t *candp;
00763 phone_loop_search_t *pls;
00764
00765 nf = frame_idx + 1;
00766
00767 thresh = ngs->best_score + ngs->dynamic_beam;
00768 newphone_thresh = ngs->best_score + ngs->pbeam;
00769 lastphn_thresh = ngs->best_score + ngs->lpbeam;
00770 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
00771
00772 acl = ngs->active_chan_list[frame_idx & 0x1];
00773 nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
00774
00775 for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
00776 --i, hmm = *(acl++)) {
00777 assert(hmm_frame(&hmm->hmm) >= frame_idx);
00778
00779 if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
00780
00781 if (hmm_frame(&hmm->hmm) != nf) {
00782 hmm_frame(&hmm->hmm) = nf;
00783 *(nacl++) = hmm;
00784 }
00785
00786
00787 for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
00788 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip
00789 + phone_loop_search_score(pls, nexthmm->ciphone);
00790 if ((newphone_score BETTER_THAN newphone_thresh)
00791 && ((hmm_frame(&nexthmm->hmm) < frame_idx)
00792 || (newphone_score
00793 BETTER_THAN hmm_in_score(&nexthmm->hmm)))) {
00794 if (hmm_frame(&nexthmm->hmm) != nf) {
00795
00796 *(nacl++) = nexthmm;
00797 }
00798 hmm_enter(&nexthmm->hmm, newphone_score,
00799 hmm_out_history(&hmm->hmm), nf);
00800 }
00801 }
00802
00803
00804
00805
00806
00807
00808 for (w = hmm->info.penult_phn_wid; w >= 0;
00809 w = ngs->homophone_set[w]) {
00810 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip
00811 + phone_loop_search_score(pls, s3dict_last_phone(ps_search_dict(ngs),w));
00812 if (newphone_score BETTER_THAN lastphn_thresh) {
00813 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00814 ngs->n_lastphn_cand++;
00815 candp->wid = w;
00816 candp->score =
00817 newphone_score - ngs->nwpen;
00818 candp->bp = hmm_out_history(&hmm->hmm);
00819 }
00820 }
00821 }
00822 else if (hmm_frame(&hmm->hmm) != nf) {
00823 hmm_clear_scores(&hmm->hmm);
00824 }
00825 }
00826 ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00827 }
00828
00829
00830
00831
00832
00833
00834 static void
00835 last_phone_transition(ngram_search_t *ngs, int frame_idx)
00836 {
00837 int32 i, j, k, nf, bp, bplast, w;
00838 lastphn_cand_t *candp;
00839 int32 *nawl;
00840 int32 thresh;
00841 int32 bestscore, dscr;
00842 chan_t *hmm;
00843 bptbl_t *bpe;
00844 int32 n_cand_sf = 0;
00845
00846 nf = frame_idx + 1;
00847 nawl = ngs->active_word_list[nf & 0x1];
00848 ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
00849
00850
00851
00852 E_DEBUG(3, ("n_lastphn_cand %d\n", ngs->n_lastphn_cand));
00853 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00854
00855 bpe = &(ngs->bp_table[candp->bp]);
00856
00857
00858 candp->score -=
00859 ngram_search_exit_score
00860 (ngs, bpe, s3dict_first_phone(ps_search_dict(ngs), candp->wid));
00861 E_DEBUG(4, ("candp->score %d\n", candp->score));
00862 E_DEBUG(4, ("candp->score %d\n", candp->score));
00863
00864
00865
00866
00867
00868
00869
00870 if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
00871
00872
00873 for (j = 0; j < n_cand_sf; j++) {
00874 if (ngs->cand_sf[j].bp_ef == bpe->frame)
00875 break;
00876 }
00877
00878 if (j < n_cand_sf)
00879 candp->next = ngs->cand_sf[j].cand;
00880 else {
00881
00882 if (n_cand_sf >= ngs->cand_sf_alloc) {
00883 if (ngs->cand_sf_alloc == 0) {
00884 ngs->cand_sf =
00885 ckd_calloc(CAND_SF_ALLOCSIZE,
00886 sizeof(*ngs->cand_sf));
00887 ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
00888 }
00889 else {
00890 ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
00891 ngs->cand_sf = ckd_realloc(ngs->cand_sf,
00892 ngs->cand_sf_alloc
00893 * sizeof(*ngs->cand_sf));
00894 E_INFO("cand_sf[] increased to %d entries\n",
00895 ngs->cand_sf_alloc);
00896 }
00897 }
00898
00899
00900 j = n_cand_sf++;
00901 candp->next = -1;
00902 ngs->cand_sf[j].bp_ef = bpe->frame;
00903 }
00904
00905 ngs->cand_sf[j].cand = i;
00906
00907 ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
00908 ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
00909 }
00910 }
00911
00912
00913 for (i = 0; i < n_cand_sf; i++) {
00914
00915 bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
00916 bplast = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1] - 1;
00917
00918 for (bpe = &(ngs->bp_table[bp]); bp <= bplast; bp++, bpe++) {
00919 if (!bpe->valid)
00920 continue;
00921
00922 for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
00923 int32 n_used;
00924 candp = &(ngs->lastphn_cand[j]);
00925 dscr =
00926 ngram_search_exit_score
00927 (ngs, bpe, s3dict_first_phone(ps_search_dict(ngs), candp->wid));
00928 dscr += ngram_tg_score(ngs->lmset,
00929 s3dict_basewid(ps_search_dict(ngs), candp->wid),
00930 bpe->real_wid,
00931 bpe->prev_real_wid, &n_used);
00932
00933 if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
00934 ngs->last_ltrans[candp->wid].dscr = dscr;
00935 ngs->last_ltrans[candp->wid].bp = bp;
00936 }
00937 }
00938 }
00939 }
00940
00941
00942 bestscore = ngs->last_phone_best_score;
00943 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00944 candp->score += ngs->last_ltrans[candp->wid].dscr;
00945 candp->bp = ngs->last_ltrans[candp->wid].bp;
00946
00947 if (candp->score BETTER_THAN bestscore)
00948 bestscore = candp->score;
00949 }
00950 ngs->last_phone_best_score = bestscore;
00951
00952
00953 thresh = bestscore + ngs->lponlybeam;
00954 for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
00955 if (candp->score BETTER_THAN thresh) {
00956 w = candp->wid;
00957
00958 ngram_search_alloc_all_rc(ngs, w);
00959
00960 k = 0;
00961 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00962 if ((hmm_frame(&hmm->hmm) < frame_idx)
00963 || (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) {
00964 assert(hmm_frame(&hmm->hmm) != nf);
00965 hmm_enter(&hmm->hmm,
00966 candp->score, candp->bp, nf);
00967 k++;
00968 }
00969 }
00970 if (k > 0) {
00971 assert(bitvec_is_clear(ngs->word_active, w));
00972 assert(s3dict_pronlen(ps_search_dict(ngs), w) > 1);
00973 *(nawl++) = w;
00974 bitvec_set(ngs->word_active, w);
00975 }
00976 }
00977 }
00978 ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
00979 }
00980
00981
00982
00983
00984
00985 static void
00986 prune_word_chan(ngram_search_t *ngs, int frame_idx)
00987 {
00988 root_chan_t *rhmm;
00989 chan_t *hmm, *thmm;
00990 chan_t **phmmp;
00991 int32 nf, w, i, k;
00992 int32 newword_thresh, lastphn_thresh;
00993 int32 *awl, *nawl;
00994
00995 nf = frame_idx + 1;
00996 newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
00997 lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
00998
00999 awl = ngs->active_word_list[frame_idx & 0x1];
01000 nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
01001
01002
01003 for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
01004 --i, w = *(awl++)) {
01005 k = 0;
01006 phmmp = &(ngs->word_chan[w]);
01007 for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
01008 assert(hmm_frame(&hmm->hmm) >= frame_idx);
01009
01010 thmm = hmm->next;
01011 if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) {
01012
01013 hmm_frame(&hmm->hmm) = nf;
01014 k++;
01015 phmmp = &(hmm->next);
01016
01017
01018 if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) {
01019
01020 ngram_search_save_bp(ngs, frame_idx, w,
01021 hmm_out_score(&hmm->hmm),
01022 hmm_out_history(&hmm->hmm),
01023 hmm->info.rc_id);
01024 }
01025 }
01026 else if (hmm_frame(&hmm->hmm) == nf) {
01027 phmmp = &(hmm->next);
01028 }
01029 else {
01030 hmm_deinit(&hmm->hmm);
01031 listelem_free(ngs->chan_alloc, hmm);
01032 *phmmp = thmm;
01033 }
01034 }
01035 if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
01036 assert(s3dict_pronlen(ps_search_dict(ngs), w) > 1);
01037 *(nawl++) = w;
01038 bitvec_set(ngs->word_active, w);
01039 }
01040 }
01041 ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
01042
01043
01044
01045
01046
01047 for (i = 0; i < ngs->n_1ph_words; i++) {
01048 w = ngs->single_phone_wid[i];
01049 rhmm = (root_chan_t *) ngs->word_chan[w];
01050 E_DEBUG(3,("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
01051 s3dict_wordstr(ps_search_dict(ngs),w),
01052 hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm),
01053 lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh));
01054 if (hmm_frame(&rhmm->hmm) < frame_idx)
01055 continue;
01056 if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) {
01057 hmm_frame(&rhmm->hmm) = nf;
01058
01059
01060 if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) {
01061 E_DEBUG(4,("Exiting single phone word %s with %d > %d, %d\n",
01062 s3dict_wordstr(ps_search_dict(ngs),w),
01063 hmm_out_score(&rhmm->hmm),
01064 lastphn_thresh, newword_thresh));
01065 ngram_search_save_bp(ngs, frame_idx, w,
01066 hmm_out_score(&rhmm->hmm),
01067 hmm_out_history(&rhmm->hmm), 0);
01068 }
01069 }
01070 }
01071 }
01072
01073 static void
01074 prune_channels(ngram_search_t *ngs, int frame_idx)
01075 {
01076
01077 ngs->n_lastphn_cand = 0;
01078
01079 ngs->dynamic_beam = ngs->beam;
01080 if (ngs->maxhmmpf != -1
01081 && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
01082
01083 int32 bins[256], bw, nhmms, i;
01084 root_chan_t *rhmm;
01085 chan_t **acl, *hmm;
01086
01087
01088 bw = -ngs->beam / 256;
01089 memset(bins, 0, sizeof(bins));
01090
01091 for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
01092 int32 b;
01093
01094
01095 b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
01096 if (b >= 256)
01097 b = 255;
01098 ++bins[b];
01099 }
01100
01101 acl = ngs->active_chan_list[frame_idx & 0x1];
01102 for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
01103 i > 0; --i, hmm = *(acl++)) {
01104 int32 b;
01105
01106
01107 b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
01108 if (b >= 256)
01109 b = 255;
01110 ++bins[b];
01111 }
01112
01113 for (i = nhmms = 0; i < 256; ++i) {
01114 nhmms += bins[i];
01115 if (nhmms > ngs->maxhmmpf)
01116 break;
01117 }
01118 ngs->dynamic_beam = -(i * bw);
01119 }
01120
01121 prune_root_chan(ngs, frame_idx);
01122 prune_nonroot_chan(ngs, frame_idx);
01123 last_phone_transition(ngs, frame_idx);
01124 prune_word_chan(ngs, frame_idx);
01125 }
01126
01127
01128
01129
01130
01131 static void
01132 bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
01133 {
01134 int32 bp, n;
01135 int32 bestscr, worstscr;
01136 bptbl_t *bpe, *bestbpe, *worstbpe;
01137
01138
01139 if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
01140 return;
01141
01142
01143 bestscr = (int32) 0x80000000;
01144 bestbpe = NULL;
01145 n = 0;
01146 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01147 bpe = &(ngs->bp_table[bp]);
01148 if (s3dict_filler_word(ps_search_dict(ngs), bpe->wid)) {
01149 if (bpe->score BETTER_THAN bestscr) {
01150 bestscr = bpe->score;
01151 bestbpe = bpe;
01152 }
01153 bpe->valid = FALSE;
01154 n++;
01155 }
01156 }
01157
01158 if (bestbpe != NULL) {
01159 bestbpe->valid = TRUE;
01160 --n;
01161 }
01162
01163
01164 n = (ngs->bpidx
01165 - ngs->bp_table_idx[frame_idx]) - n;
01166 for (; n > ngs->maxwpf; --n) {
01167
01168 worstscr = (int32) 0x7fffffff;
01169 worstbpe = NULL;
01170 for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
01171 bpe = &(ngs->bp_table[bp]);
01172 if (bpe->valid && (bpe->score WORSE_THAN worstscr)) {
01173 worstscr = bpe->score;
01174 worstbpe = bpe;
01175 }
01176 }
01177
01178 if (worstbpe == NULL)
01179 E_FATAL("PANIC: No worst BPtable entry remaining\n");
01180 worstbpe->valid = 0;
01181 }
01182 }
01183
01184 static void
01185 word_transition(ngram_search_t *ngs, int frame_idx)
01186 {
01187 int32 i, k, bp, w, nf;
01188 int32 rc;
01189 int32 *rcss;
01190 int32 thresh, newscore;
01191 bptbl_t *bpe;
01192 root_chan_t *rhmm;
01193 struct bestbp_rc_s *bestbp_rc_ptr;
01194 phone_loop_search_t *pls;
01195 s3dict_t *dict = ps_search_dict(ngs);
01196 dict2pid_t *d2p = ps_search_dict2pid(ngs);
01197
01198
01199
01200
01201
01202
01203 for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
01204 ngs->bestbp_rc[i].score = WORST_SCORE;
01205 k = 0;
01206 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
01207
01208
01209 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01210 bpe = &(ngs->bp_table[bp]);
01211 ngs->word_lat_idx[bpe->wid] = NO_BP;
01212
01213 if (bpe->wid == ps_search_finish_wid(ngs))
01214 continue;
01215 k++;
01216
01217
01218
01219
01220
01221 rcss = &(ngs->bscore_stack[bpe->s_idx]);
01222 if (bpe->last2_phone == -1) {
01223
01224 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
01225 if (rcss[0] BETTER_THAN ngs->bestbp_rc[rc].score) {
01226 E_DEBUG(4,("bestbp_rc[0] = %d lc %d\n", rcss[0], bpe->last_phone));
01227 ngs->bestbp_rc[rc].score = rcss[0];
01228 ngs->bestbp_rc[rc].path = bp;
01229 ngs->bestbp_rc[rc].lc = bpe->last_phone;
01230 }
01231 }
01232 }
01233 else {
01234 xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone);
01235 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
01236 if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
01237 E_DEBUG(4,("bestbp_rc[%d] = %d lc %d\n",
01238 rc, rcss[rssid->cimap[rc]], bpe->last_phone));
01239 ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]];
01240 ngs->bestbp_rc[rc].path = bp;
01241 ngs->bestbp_rc[rc].lc = bpe->last_phone;
01242 }
01243 }
01244 }
01245 }
01246 if (k == 0)
01247 return;
01248
01249 nf = frame_idx + 1;
01250 thresh = ngs->best_score + ngs->dynamic_beam;
01251
01252
01253
01254
01255 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01256 bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
01257
01258 newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip
01259 + phone_loop_search_score(pls, rhmm->ciphone);
01260 if (newscore BETTER_THAN thresh) {
01261 if ((hmm_frame(&rhmm->hmm) < frame_idx)
01262 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01263 hmm_enter(&rhmm->hmm, newscore,
01264 bestbp_rc_ptr->path, nf);
01265
01266
01267 hmm_mpx_ssid(&rhmm->hmm, 0) =
01268 d2p->ldiph_lc[rhmm->ciphone][rhmm->ci2phone][bestbp_rc_ptr->lc];
01269 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
01270 }
01271 }
01272 }
01273
01274
01275
01276
01277
01278 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01279 w = ngs->single_phone_wid[i];
01280 ngs->last_ltrans[w].dscr = (int32) 0x80000000;
01281 }
01282 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01283 bpe = &(ngs->bp_table[bp]);
01284 if (!bpe->valid)
01285 continue;
01286
01287 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01288 int32 n_used;
01289 w = ngs->single_phone_wid[i];
01290 newscore = ngram_search_exit_score
01291 (ngs, bpe, s3dict_first_phone(dict, w));
01292 E_DEBUG(4, ("initial newscore for %s: %d\n",
01293 s3dict_wordstr(dict, w), newscore));
01294 newscore += ngram_tg_score(ngs->lmset,
01295 s3dict_basewid(dict, w),
01296 bpe->real_wid,
01297 bpe->prev_real_wid, &n_used);
01298
01299 if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
01300 ngs->last_ltrans[w].dscr = newscore;
01301 ngs->last_ltrans[w].bp = bp;
01302 }
01303 }
01304 }
01305
01306
01307 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01308 w = ngs->single_phone_wid[i];
01309 rhmm = (root_chan_t *) ngs->word_chan[w];
01310 newscore = ngs->last_ltrans[w].dscr + ngs->pip
01311 + phone_loop_search_score(pls, rhmm->ciphone);
01312 if (newscore BETTER_THAN thresh) {
01313 bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
01314 if ((hmm_frame(&rhmm->hmm) < frame_idx)
01315 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01316 hmm_enter(&rhmm->hmm,
01317 newscore, ngs->last_ltrans[w].bp, nf);
01318
01319
01320 hmm_mpx_ssid(&rhmm->hmm, 0) =
01321 d2p->ldiph_lc[rhmm->ciphone][rhmm->ci2phone]
01322 [s3dict_last_phone(dict, bpe->wid)];
01323 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
01324 }
01325 }
01326 }
01327
01328
01329 w = ps_search_silence_wid(ngs);
01330 rhmm = (root_chan_t *) ngs->word_chan[w];
01331 bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
01332 newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip
01333 + phone_loop_search_score(pls, rhmm->ciphone);
01334 if (newscore BETTER_THAN thresh) {
01335 if ((hmm_frame(&rhmm->hmm) < frame_idx)
01336 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01337 hmm_enter(&rhmm->hmm,
01338 newscore, bestbp_rc_ptr->path, nf);
01339 }
01340 }
01341 for (w = s3dict_filler_start(dict); w <= s3dict_filler_end(dict); w++) {
01342 if (w == ps_search_silence_wid(ngs))
01343 continue;
01344 rhmm = (root_chan_t *) ngs->word_chan[w];
01345
01346 if (rhmm == NULL)
01347 continue;
01348 newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip
01349 + phone_loop_search_score(pls, rhmm->ciphone);
01350 if (newscore BETTER_THAN thresh) {
01351 if ((hmm_frame(&rhmm->hmm) < frame_idx)
01352 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01353 hmm_enter(&rhmm->hmm,
01354 newscore, bestbp_rc_ptr->path, nf);
01355 }
01356 }
01357 }
01358 }
01359
01360 static void
01361 deactivate_channels(ngram_search_t *ngs, int frame_idx)
01362 {
01363 root_chan_t *rhmm;
01364 int i;
01365
01366
01367 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01368 if (hmm_frame(&rhmm->hmm) == frame_idx) {
01369 hmm_clear_scores(&rhmm->hmm);
01370 }
01371 }
01372
01373 for (i = 0; i < ngs->n_1ph_words; i++) {
01374 int32 w = ngs->single_phone_wid[i];
01375 rhmm = (root_chan_t *) ngs->word_chan[w];
01376 if (hmm_frame(&rhmm->hmm) == frame_idx) {
01377 hmm_clear_scores(&rhmm->hmm);
01378 }
01379 }
01380 }
01381
01382 int
01383 ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
01384 {
01385 int16 const *senscr;
01386
01387
01388 if (!ps_search_acmod(ngs)->compallsen)
01389 compute_sen_active(ngs, frame_idx);
01390
01391
01392 if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
01393 return 0;
01394 ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
01395
01396
01397 ngram_search_mark_bptable(ngs, frame_idx);
01398
01399
01400 if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
01401 E_INFO("Renormalizing Scores at frame %d, best score %d\n",
01402 frame_idx, ngs->best_score);
01403 renormalize_scores(ngs, frame_idx, ngs->best_score);
01404 }
01405
01406
01407 evaluate_channels(ngs, senscr, frame_idx);
01408
01409 prune_channels(ngs, frame_idx);
01410
01411 bptable_maxwpf(ngs, frame_idx);
01412
01413 word_transition(ngs, frame_idx);
01414
01415 deactivate_channels(ngs, frame_idx);
01416
01417 ++ngs->n_frame;
01418
01419 return 1;
01420 }
01421
01422 void
01423 ngram_fwdtree_finish(ngram_search_t *ngs)
01424 {
01425 int32 i, w, cf, *awl;
01426 root_chan_t *rhmm;
01427 chan_t *hmm, **acl;
01428
01429
01430 cf = ps_search_acmod(ngs)->output_frame;
01431
01432 ngram_search_mark_bptable(ngs, cf);
01433
01434
01435
01436 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01437 hmm_clear(&rhmm->hmm);
01438 }
01439
01440
01441 i = ngs->n_active_chan[cf & 0x1];
01442 acl = ngs->active_chan_list[cf & 0x1];
01443 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
01444 hmm_clear(&hmm->hmm);
01445 }
01446
01447
01448 i = ngs->n_active_word[cf & 0x1];
01449 awl = ngs->active_word_list[cf & 0x1];
01450 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
01451
01452 if (s3dict_pronlen(ps_search_dict(ngs), w) == 1)
01453 continue;
01454 bitvec_clear(ngs->word_active, w);
01455 if (ngs->word_chan[w] == NULL)
01456 continue;
01457 ngram_search_free_all_rc(ngs, w);
01458 }
01459
01460
01461
01462
01463
01464
01465
01466
01467
01468
01469 if (cf > 0) {
01470 E_INFO("%8d words recognized (%d/fr)\n",
01471 ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
01472 E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
01473 (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
01474 E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
01475 ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
01476 (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
01477 ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
01478 E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
01479 ngs->st.n_word_lastchan_eval,
01480 ngs->st.n_word_lastchan_eval / (cf + 1));
01481 E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
01482 ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
01483 }
01484 }