src/libpocketsphinx/fsg_search.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 
00035 /*
00036  * fsg_search.c -- Search structures for FSM decoding.
00037  * 
00038  * **********************************************
00039  * CMU ARPA Speech Project
00040  *
00041  * Copyright (c) 2004 Carnegie Mellon University.
00042  * ALL RIGHTS RESERVED.
00043  * **********************************************
00044  * 
00045  * HISTORY
00046  *
00047  * 18-Feb-2004  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00048  *              Started.
00049  */
00050 
00051 /* System headers. */
00052 #include <stdio.h>
00053 #include <string.h>
00054 #include <assert.h>
00055 
00056 /* SphinxBase headers. */
00057 #include <err.h>
00058 #include <ckd_alloc.h>
00059 #include <cmd_ln.h>
00060 
00061 /* Local headers. */
00062 #include "pocketsphinx_internal.h"
00063 #include "ps_lattice_internal.h"
00064 #include "fsg_search_internal.h"
00065 #include "fsg_history.h"
00066 #include "fsg_lextree.h"
00067 
00068 /* Turn this on for detailed debugging dump */
00069 #define __FSG_DBG__             0
00070 #define __FSG_DBG_CHAN__        0
00071 
00072 static ps_seg_t *fsg_search_seg_iter(ps_search_t *search, int32 *out_score);
00073 static ps_lattice_t *fsg_search_lattice(ps_search_t *search);
00074 static int fsg_search_prob(ps_search_t *search);
00075 
00076 static ps_searchfuncs_t fsg_funcs = {
00077     /* name: */   "fsg",
00078     /* start: */  fsg_search_start,
00079     /* step: */   fsg_search_step,
00080     /* finish: */ fsg_search_finish,
00081     /* reinit: */ fsg_search_reinit,
00082     /* free: */   fsg_search_free,
00083     /* lattice: */  fsg_search_lattice,
00084     /* hyp: */      fsg_search_hyp,
00085     /* prob: */     fsg_search_prob,
00086     /* seg_iter: */ fsg_search_seg_iter,
00087 };
00088 
00089 ps_search_t *
00090 fsg_search_init(cmd_ln_t *config,
00091                 acmod_t *acmod,
00092                 s3dict_t *dict,
00093                 dict2pid_t *d2p)
00094 {
00095     fsg_search_t *fsgs;
00096     char const *path;
00097 
00098     fsgs = ckd_calloc(1, sizeof(*fsgs));
00099     ps_search_init(ps_search_base(fsgs), &fsg_funcs, config, acmod, dict, d2p);
00100 
00101     /* Initialize HMM context. */
00102     fsgs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
00103                                     acmod->tmat->tp, NULL, acmod->mdef->sseq);
00104     if (fsgs->hmmctx == NULL) {
00105         ps_search_free(ps_search_base(fsgs));
00106         return NULL;
00107     }
00108 
00109     /* Intialize the search history object */
00110     fsgs->history = fsg_history_init(NULL, dict);
00111     fsgs->frame = -1;
00112 
00113     /* Initialize FSG table. */
00114     fsgs->fsgs = hash_table_new(5, FALSE);
00115 
00116     /* Get search pruning parameters */
00117     fsgs->beam_factor = 1.0f;
00118     fsgs->beam = fsgs->beam_orig
00119         = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"));
00120     fsgs->pbeam = fsgs->pbeam_orig
00121         = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"));
00122     fsgs->wbeam = fsgs->wbeam_orig
00123         = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"));
00124 
00125     /* LM related weights/penalties */
00126     fsgs->lw = cmd_ln_float32_r(config, "-lw");
00127     fsgs->pip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip"))
00128                            * fsgs->lw);
00129     fsgs->wip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip"))
00130                            * fsgs->lw);
00131 
00132     /* Best path search (and confidence annotation)? */
00133     if (cmd_ln_boolean_r(config, "-bestpath"))
00134         fsgs->bestpath = TRUE;
00135 
00136     /* Acoustic score scale for posterior probabilities. */
00137     fsgs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
00138 
00139     E_INFO("FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n",
00140            fsgs->beam_orig, fsgs->pbeam_orig, fsgs->wbeam_orig,
00141            fsgs->wip, fsgs->pip);
00142 
00143     /* Load an FSG if one was specified in config */
00144     if ((path = cmd_ln_str_r(config, "-fsg"))) {
00145         fsg_model_t *fsg;
00146 
00147         if ((fsg = fsg_model_readfile(path, acmod->lmath, fsgs->lw)) == NULL)
00148             goto error_out;
00149         if (fsg_set_add(fsgs, fsg_model_name(fsg), fsg) != fsg) {
00150             fsg_model_free(fsg);
00151             goto error_out;
00152         }
00153         if (fsg_set_select(fsgs, fsg_model_name(fsg)) == NULL)
00154             goto error_out;
00155         if (fsg_search_reinit(ps_search_base(fsgs)) < 0)
00156             goto error_out;
00157     }
00158     /* Or load a JSGF grammar */
00159     else if ((path = cmd_ln_str_r(config, "-jsgf"))) {
00160         fsg_model_t *fsg;
00161         jsgf_rule_t *rule;
00162         char const *toprule;
00163 
00164         if ((fsgs->jsgf = jsgf_parse_file(path, NULL)) == NULL)
00165             goto error_out;
00166 
00167         rule = NULL;
00168         /* Take the -toprule if specified. */
00169         if ((toprule = cmd_ln_str_r(config, "-toprule"))) {
00170             char *anglerule;
00171 
00172             anglerule = ckd_calloc(1, strlen(toprule) + 3);
00173             anglerule[0] = '<';
00174             strcat(anglerule, toprule);
00175             strcat(anglerule, ">");
00176             rule = jsgf_get_rule(fsgs->jsgf, anglerule);
00177             ckd_free(anglerule);
00178             if (rule == NULL) {
00179                 E_ERROR("Start rule %s not found\n", toprule);
00180                 goto error_out;
00181             }
00182         }
00183         /* Otherwise, take the first public rule. */
00184         else {
00185             jsgf_rule_iter_t *itor;
00186 
00187             for (itor = jsgf_rule_iter(fsgs->jsgf); itor;
00188                  itor = jsgf_rule_iter_next(itor)) {
00189                 rule = jsgf_rule_iter_rule(itor);
00190                 if (jsgf_rule_public(rule))
00191                     break;
00192             }
00193             if (rule == NULL) {
00194                 E_ERROR("No public rules found in %s\n", path);
00195                 goto error_out;
00196             }
00197         }
00198         fsg = jsgf_build_fsg(fsgs->jsgf, rule, acmod->lmath, fsgs->lw);
00199         if (fsg_set_add(fsgs, fsg_model_name(fsg), fsg) != fsg) {
00200             fsg_model_free(fsg);
00201             goto error_out;
00202         }
00203         if (fsg_set_select(fsgs, fsg_model_name(fsg)) == NULL)
00204             goto error_out;
00205         if (fsg_search_reinit(ps_search_base(fsgs)) < 0)
00206             goto error_out;
00207     }
00208 
00209     return ps_search_base(fsgs);
00210 
00211 error_out:
00212     fsg_search_free(ps_search_base(fsgs));
00213     return NULL;
00214 }
00215 
00216 void
00217 fsg_search_free(ps_search_t *search)
00218 {
00219     fsg_search_t *fsgs = (fsg_search_t *)search;
00220     hash_iter_t *itor;
00221 
00222     ps_search_deinit(search);
00223     if (fsgs->jsgf)
00224         jsgf_grammar_free(fsgs->jsgf);
00225     fsg_lextree_free(fsgs->lextree);
00226     fsg_history_reset(fsgs->history);
00227     fsg_history_set_fsg(fsgs->history, NULL, NULL);
00228     for (itor = hash_table_iter(fsgs->fsgs);
00229          itor; itor = hash_table_iter_next(itor)) {
00230         fsg_model_t *fsg = (fsg_model_t *) hash_entry_val(itor->ent);
00231         fsg_model_free(fsg);
00232     }
00233     hash_table_free(fsgs->fsgs);
00234     fsg_history_free(fsgs->history);
00235     hmm_context_free(fsgs->hmmctx);
00236     ckd_free(fsgs);
00237 }
00238 
00239 int
00240 fsg_search_reinit(ps_search_t *search)
00241 {
00242     fsg_search_t *fsgs = (fsg_search_t *)search;
00243 
00244     /* Free the old lextree */
00245     if (fsgs->lextree)
00246         fsg_lextree_free(fsgs->lextree);
00247 
00248     /* Allocate new lextree for the given FSG */
00249     fsgs->lextree = fsg_lextree_init(fsgs->fsg, ps_search_dict(fsgs),
00250                                      ps_search_dict2pid(fsgs),
00251                                      ps_search_acmod(fsgs)->mdef,
00252                                      fsgs->hmmctx, fsgs->wip, fsgs->pip);
00253 
00254     /* Inform the history module of the new fsg */
00255     fsg_history_set_fsg(fsgs->history, fsgs->fsg, ps_search_dict(fsgs));
00256 
00257     return 0;
00258 }
00259 
00260 
00261 static int
00262 fsg_search_add_silences(fsg_search_t *fsgs, fsg_model_t *fsg)
00263 {
00264     s3dict_t *dict;
00265     int32 wid;
00266     int n_sil;
00267 
00268     dict = ps_search_dict(fsgs);
00269     /*
00270      * NOTE: Unlike N-Gram search, we do not use explicit start and
00271      * end symbols.  This is because the start and end nodes are
00272      * defined in the grammar.  We do add silence/filler self-loops to
00273      * all states in order to allow for silence between words and at
00274      * the beginning and end of utterances.
00275      *
00276      * This has some implications for word graph generation, namely,
00277      * that there can (and usually will) be multiple start and end
00278      * states in the word graph.  We therefore do add explicit start
00279      * and end nodes to the graph.
00280      */
00281     /* Add silence self-loops to all states. */
00282     fsg_model_add_silence(fsg, "<sil>", -1,
00283                           cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"));
00284     n_sil = 0;
00285     /* Add self-loops for all other fillers. */
00286     for (wid = s3dict_filler_start(dict); wid < s3dict_filler_end(dict); ++wid) {
00287         char const *word = s3dict_wordstr(dict, wid);
00288         if (wid == s3dict_startwid(dict) || wid == s3dict_finishwid(dict))
00289             continue;
00290         fsg_model_add_silence(fsg, word, -1,
00291                               cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"));
00292         ++n_sil;
00293     }
00294 
00295     return n_sil;
00296 }
00297 
00298 /* Scans the dictionary and check if all words are present. */
00299 static int
00300 fsg_search_check_dict(fsg_search_t *fsgs, fsg_model_t *fsg)
00301 {
00302     s3dict_t *dict;
00303     int i;
00304 
00305     dict = ps_search_dict(fsgs);
00306     for (i = 0; i < fsg_model_n_word(fsg); ++i) {
00307         char const *word;
00308         int32 wid;
00309 
00310         word = fsg_model_word_str(fsg, i);
00311         wid = s3dict_wordid(dict, word);
00312         if (wid == BAD_S3WID) {
00313             E_ERROR("The word '%s' is missing in the dictionary\n", word);
00314             return FALSE;
00315         }
00316     }
00317 
00318     return TRUE;
00319 }
00320 
00321 static int
00322 fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg)
00323 {
00324     s3dict_t *dict;
00325     int n_alt;
00326     int i;
00327 
00328     dict = ps_search_dict(fsgs);
00329     /* Scan FSG's vocabulary for words that have alternate pronunciations. */
00330     n_alt = 0;
00331     for (i = 0; i < fsg_model_n_word(fsg); ++i) {
00332         char const *word;
00333         int32 wid;
00334 
00335         word = fsg_model_word_str(fsg, i);
00336         wid = s3dict_wordid(dict, word);
00337         if (wid != BAD_S3WID) {
00338             while ((wid = s3dict_nextalt(dict, wid)) != BAD_S3WID) {
00339                 fsg_model_add_alt(fsg, word, s3dict_wordstr(dict, wid));
00340                 ++n_alt;
00341             }
00342         }
00343     }
00344 
00345     return n_alt;
00346 }
00347 
00348 fsg_model_t *
00349 fsg_set_get_fsg(fsg_search_t *fsgs, const char *name)
00350 {
00351     void *val;
00352 
00353     if (hash_table_lookup(fsgs->fsgs, name, &val) < 0)
00354         return NULL;
00355     return (fsg_model_t *)val;
00356 }
00357 
00358 fsg_model_t *
00359 fsg_set_add(fsg_search_t *fsgs, char const *name, fsg_model_t *fsg)
00360 {
00361     if (name == NULL)
00362         name = fsg_model_name(fsg);
00363 
00364     if (!fsg_search_check_dict(fsgs, fsg))
00365         return NULL;
00366 
00367     /* Add silence transitions and alternate words. */
00368     if (cmd_ln_boolean_r(ps_search_config(fsgs), "-fsgusefiller")
00369         && !fsg_model_has_sil(fsg))
00370         fsg_search_add_silences(fsgs, fsg);
00371     if (cmd_ln_boolean_r(ps_search_config(fsgs), "-fsgusealtpron")
00372         && !fsg_model_has_alt(fsg))
00373         fsg_search_add_altpron(fsgs, fsg);
00374 
00375     return (fsg_model_t *)hash_table_enter(fsgs->fsgs, name, fsg);
00376 }
00377 
00378 
00379 fsg_model_t *
00380 fsg_set_remove_byname(fsg_search_t *fsgs, char const *key)
00381 {
00382     fsg_model_t *oldfsg;
00383     void *val;
00384 
00385     /* Look for the matching FSG. */
00386     if (hash_table_lookup(fsgs->fsgs, key, &val) < 0) {
00387         E_ERROR("FSG `%s' to be deleted not found\n", key);
00388         return NULL;
00389     }
00390     oldfsg = val;
00391 
00392     /* Remove it from the FSG table. */
00393     hash_table_delete(fsgs->fsgs, key);
00394     /* If this was the currently active FSG, also delete other stuff */
00395     if (fsgs->fsg == oldfsg) {
00396         fsg_lextree_free(fsgs->lextree);
00397         fsgs->lextree = NULL;
00398         fsg_history_set_fsg(fsgs->history, NULL, NULL);
00399         fsgs->fsg = NULL;
00400     }
00401     return oldfsg;
00402 }
00403 
00404 
00405 fsg_model_t *
00406 fsg_set_remove(fsg_search_t *fsgs, fsg_model_t *fsg)
00407 {
00408     char const *key;
00409     hash_iter_t *itor;
00410 
00411     key = NULL;
00412     for (itor = hash_table_iter(fsgs->fsgs);
00413          itor; itor = hash_table_iter_next(itor)) {
00414         fsg_model_t *oldfsg;
00415 
00416         oldfsg = (fsg_model_t *) hash_entry_val(itor->ent);
00417         if (oldfsg == fsg) {
00418             key = hash_entry_key(itor->ent);
00419             hash_table_iter_free(itor);
00420             break;
00421         }
00422     }
00423     if (key == NULL) {
00424         E_WARN("FSG '%s' to be deleted not found\n", fsg_model_name(fsg));
00425         return NULL;
00426     }
00427     else
00428         return fsg_set_remove_byname(fsgs, key);
00429 }
00430 
00431 
00432 fsg_model_t *
00433 fsg_set_select(fsg_search_t *fsgs, const char *name)
00434 {
00435     fsg_model_t *fsg;
00436 
00437     fsg = fsg_set_get_fsg(fsgs, name);
00438     if (fsg == NULL) {
00439         E_ERROR("FSG '%s' not known; cannot make it current\n", name);
00440         return NULL;
00441     }
00442     fsgs->fsg = fsg;
00443     return fsg;
00444 }
00445 
00446 fsg_set_iter_t *
00447 fsg_set_iter(fsg_set_t *fsgs)
00448 {
00449     return hash_table_iter(fsgs->fsgs);
00450 }
00451 
00452 fsg_set_iter_t *
00453 fsg_set_iter_next(fsg_set_iter_t *itor)
00454 {
00455     return hash_table_iter_next(itor);
00456 }
00457 
00458 fsg_model_t *
00459 fsg_set_iter_fsg(fsg_set_iter_t *itor)
00460 {
00461     return ((fsg_model_t *)itor->ent->val);
00462 }
00463 
00464 void
00465 fsg_set_iter_free(fsg_set_iter_t *itor)
00466 {
00467     hash_table_iter_free(itor);
00468 }
00469 
00470 static void
00471 fsg_search_sen_active(fsg_search_t *fsgs)
00472 {
00473     gnode_t *gn;
00474     fsg_pnode_t *pnode;
00475     hmm_t *hmm;
00476 
00477     acmod_clear_active(ps_search_acmod(fsgs));
00478 
00479     for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
00480         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00481         hmm = fsg_pnode_hmmptr(pnode);
00482         assert(hmm_frame(hmm) == fsgs->frame);
00483         acmod_activate_hmm(ps_search_acmod(fsgs), hmm);
00484     }
00485 }
00486 
00487 
00488 /*
00489  * Evaluate all the active HMMs.
00490  * (Executed once per frame.)
00491  */
00492 static void
00493 fsg_search_hmm_eval(fsg_search_t *fsgs)
00494 {
00495     gnode_t *gn;
00496     fsg_pnode_t *pnode;
00497     hmm_t *hmm;
00498     int32 bestscore;
00499     int32 n, maxhmmpf;
00500 
00501     bestscore = WORST_SCORE;
00502 
00503     if (!fsgs->pnode_active) {
00504         E_ERROR("Frame %d: No active HMM!!\n", fsgs->frame);
00505         return;
00506     }
00507 
00508     for (n = 0, gn = fsgs->pnode_active; gn; gn = gnode_next(gn), n++) {
00509         int32 score;
00510 
00511         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00512         hmm = fsg_pnode_hmmptr(pnode);
00513         assert(hmm_frame(hmm) == fsgs->frame);
00514 
00515 #if __FSG_DBG__
00516         E_INFO("pnode(%08x) active @frm %5d\n", (int32) pnode,
00517                fsgs->frame);
00518         hmm_dump(hmm, stdout);
00519 #endif
00520         score = hmm_vit_eval(hmm);
00521 #if __FSG_DBG_CHAN__
00522         E_INFO("pnode(%08x) after eval @frm %5d\n",
00523                (int32) pnode, fsgs->frame);
00524         hmm_dump(hmm, stdout);
00525 #endif
00526 
00527         if (score BETTER_THAN bestscore)
00528             bestscore = score;
00529     }
00530 
00531 #if __FSG_DBG__
00532     E_INFO("[%5d] %6d HMM; bestscr: %11d\n", fsgs->frame, n, bestscore);
00533 #endif
00534     fsgs->n_hmm_eval += n;
00535 
00536     /* Adjust beams if #active HMMs larger than absolute threshold */
00537     maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs), "-maxhmmpf");
00538     if (maxhmmpf != -1 && n > maxhmmpf) {
00539         /*
00540          * Too many HMMs active; reduce the beam factor applied to the default
00541          * beams, but not if the factor is already at a floor (0.1).
00542          */
00543         if (fsgs->beam_factor > 0.1) {        /* Hack!!  Hardwired constant 0.1 */
00544             fsgs->beam_factor *= 0.9f;        /* Hack!!  Hardwired constant 0.9 */
00545             fsgs->beam =
00546                 (int32) (fsgs->beam_orig * fsgs->beam_factor);
00547             fsgs->pbeam =
00548                 (int32) (fsgs->pbeam_orig * fsgs->beam_factor);
00549             fsgs->wbeam =
00550                 (int32) (fsgs->wbeam_orig * fsgs->beam_factor);
00551         }
00552     }
00553     else {
00554         fsgs->beam_factor = 1.0f;
00555         fsgs->beam = fsgs->beam_orig;
00556         fsgs->pbeam = fsgs->pbeam_orig;
00557         fsgs->wbeam = fsgs->wbeam_orig;
00558     }
00559 
00560     if (n > fsg_lextree_n_pnode(fsgs->lextree))
00561         E_FATAL("PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n",
00562                 fsgs->frame, n, fsg_lextree_n_pnode(fsgs->lextree));
00563 
00564     fsgs->bestscore = bestscore;
00565 }
00566 
00567 
00568 static void
00569 fsg_search_pnode_trans(fsg_search_t *fsgs, fsg_pnode_t * pnode)
00570 {
00571     fsg_pnode_t *child;
00572     hmm_t *hmm;
00573     int32 newscore, thresh, nf;
00574 
00575     assert(pnode);
00576     assert(!fsg_pnode_leaf(pnode));
00577 
00578     nf = fsgs->frame + 1;
00579     thresh = fsgs->bestscore + fsgs->beam;
00580 
00581     hmm = fsg_pnode_hmmptr(pnode);
00582 
00583     for (child = fsg_pnode_succ(pnode);
00584          child; child = fsg_pnode_sibling(child)) {
00585         newscore = hmm_out_score(hmm) + child->logs2prob;
00586 
00587         if ((newscore BETTER_THAN thresh)
00588             && (newscore BETTER_THAN hmm_in_score(&child->hmm))) {
00589             /* Incoming score > pruning threshold and > target's existing score */
00590             if (hmm_frame(&child->hmm) < nf) {
00591                 /* Child node not yet activated; do so */
00592                 fsgs->pnode_active_next =
00593                     glist_add_ptr(fsgs->pnode_active_next,
00594                                   (void *) child);
00595             }
00596 
00597             hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf);
00598         }
00599     }
00600 }
00601 
00602 
00603 static void
00604 fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode)
00605 {
00606     hmm_t *hmm;
00607     fsg_link_t *fl;
00608     int32 wid;
00609     fsg_pnode_ctxt_t ctxt;
00610 
00611     assert(pnode);
00612     assert(fsg_pnode_leaf(pnode));
00613 
00614     hmm = fsg_pnode_hmmptr(pnode);
00615     fl = fsg_pnode_fsglink(pnode);
00616     assert(fl);
00617 
00618     wid = fsg_link_wid(fl);
00619     assert(wid >= 0);
00620 
00621 #if __FSG_DBG__
00622     E_INFO("[%5d] Exit(%08x) %10d(score) %5d(pred)\n",
00623            fsgs->frame, (int32) pnode,
00624            hmm_out_score(hmm), hmm_out_history(hmm));
00625 #endif
00626 
00627     /*
00628      * Check if this is filler or single phone word; these do not model right
00629      * context (i.e., the exit score applies to all right contexts).
00630      */
00631     if (fsg_model_is_filler(fsgs->fsg, wid)
00632         /* FIXME: This might be slow due to repeated calls to dict_to_id(). */
00633         || (s3dict_pronlen(ps_search_dict(fsgs),
00634                            s3dict_wordid(ps_search_dict(fsgs),
00635                                          fsg_model_word_str(fsgs->fsg, wid))) == 1)) {
00636         /* Create a dummy context structure that applies to all right contexts */
00637         fsg_pnode_add_all_ctxt(&ctxt);
00638 
00639         /* Create history table entry for this word exit */
00640         fsg_history_entry_add(fsgs->history,
00641                               fl,
00642                               fsgs->frame,
00643                               hmm_out_score(hmm),
00644                               hmm_out_history(hmm),
00645                               pnode->ci_ext, ctxt);
00646 
00647     }
00648     else {
00649         /* Create history table entry for this word exit */
00650         fsg_history_entry_add(fsgs->history,
00651                               fl,
00652                               fsgs->frame,
00653                               hmm_out_score(hmm),
00654                               hmm_out_history(hmm),
00655                               pnode->ci_ext, pnode->ctxt);
00656     }
00657 }
00658 
00659 
00660 /*
00661  * (Beam) prune the just evaluated HMMs, determine which ones remain
00662  * active, which ones transition to successors, which ones exit and
00663  * terminate in their respective destination FSM states.
00664  * (Executed once per frame.)
00665  */
00666 static void
00667 fsg_search_hmm_prune_prop(fsg_search_t *fsgs)
00668 {
00669     gnode_t *gn;
00670     fsg_pnode_t *pnode;
00671     hmm_t *hmm;
00672     int32 thresh, word_thresh, phone_thresh;
00673 
00674     assert(fsgs->pnode_active_next == NULL);
00675 
00676     thresh = fsgs->bestscore + fsgs->beam;
00677     phone_thresh = fsgs->bestscore + fsgs->pbeam;
00678     word_thresh = fsgs->bestscore + fsgs->wbeam;
00679 
00680     for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
00681         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00682         hmm = fsg_pnode_hmmptr(pnode);
00683 
00684         if (hmm_bestscore(hmm) >= thresh) {
00685             /* Keep this HMM active in the next frame */
00686             if (hmm_frame(hmm) == fsgs->frame) {
00687                 hmm_frame(hmm) = fsgs->frame + 1;
00688                 fsgs->pnode_active_next =
00689                     glist_add_ptr(fsgs->pnode_active_next,
00690                                   (void *) pnode);
00691             }
00692             else {
00693                 assert(hmm_frame(hmm) == fsgs->frame + 1);
00694             }
00695 
00696             if (!fsg_pnode_leaf(pnode)) {
00697                 if (hmm_out_score(hmm) >= phone_thresh) {
00698                     /* Transition out of this phone into its children */
00699                     fsg_search_pnode_trans(fsgs, pnode);
00700                 }
00701             }
00702             else {
00703                 if (hmm_out_score(hmm) >= word_thresh) {
00704                     /* Transition out of leaf node into destination FSG state */
00705                     fsg_search_pnode_exit(fsgs, pnode);
00706                 }
00707             }
00708         }
00709     }
00710 }
00711 
00712 
00713 /*
00714  * Propagate newly created history entries through null transitions.
00715  */
00716 static void
00717 fsg_search_null_prop(fsg_search_t *fsgs)
00718 {
00719     int32 bpidx, n_entries, thresh, newscore;
00720     fsg_hist_entry_t *hist_entry;
00721     fsg_link_t *l;
00722     int32 s, d;
00723     fsg_model_t *fsg;
00724 
00725     fsg = fsgs->fsg;
00726     thresh = fsgs->bestscore + fsgs->wbeam; /* Which beam really?? */
00727 
00728     n_entries = fsg_history_n_entries(fsgs->history);
00729 
00730     for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
00731         hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
00732 
00733         l = fsg_hist_entry_fsglink(hist_entry);
00734 
00735         /* Destination FSG state for history entry */
00736         s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg);
00737 
00738         /*
00739          * Check null transitions from d to all other states.  (Only need to
00740          * propagate one step, since FSG contains transitive closure of null
00741          * transitions.)
00742          */
00743         for (d = 0; d < fsg_model_n_state(fsg); d++) {
00744             l = fsg_model_null_trans(fsg, s, d);
00745 
00746             if (l) {            /* Propagate history entry through this null transition */
00747                 newscore =
00748                     fsg_hist_entry_score(hist_entry) +
00749                     fsg_link_logs2prob(l);
00750 
00751                 if (newscore >= thresh) {
00752                     fsg_history_entry_add(fsgs->history, l,
00753                                           fsg_hist_entry_frame(hist_entry),
00754                                           newscore,
00755                                           bpidx,
00756                                           fsg_hist_entry_lc(hist_entry),
00757                                           fsg_hist_entry_rc(hist_entry));
00758                 }
00759             }
00760         }
00761     }
00762 }
00763 
00764 
00765 /*
00766  * Perform cross-word transitions; propagate each history entry created in this
00767  * frame to lextree roots attached to the target FSG state for that entry.
00768  */
00769 static void
00770 fsg_search_word_trans(fsg_search_t *fsgs)
00771 {
00772     int32 bpidx, n_entries;
00773     fsg_hist_entry_t *hist_entry;
00774     fsg_link_t *l;
00775     int32 score, newscore, thresh, nf, d;
00776     fsg_pnode_t *root;
00777     int32 lc, rc;
00778 
00779     n_entries = fsg_history_n_entries(fsgs->history);
00780 
00781     thresh = fsgs->bestscore + fsgs->beam;
00782     nf = fsgs->frame + 1;
00783 
00784     for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
00785         hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
00786         assert(hist_entry);
00787         score = fsg_hist_entry_score(hist_entry);
00788         assert(fsgs->frame == fsg_hist_entry_frame(hist_entry));
00789 
00790         l = fsg_hist_entry_fsglink(hist_entry);
00791 
00792         /* Destination state for hist_entry */
00793         d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs->
00794                                                                 fsg);
00795 
00796         lc = fsg_hist_entry_lc(hist_entry);
00797 
00798         /* Transition to all root nodes attached to state d */
00799         for (root = fsg_lextree_root(fsgs->lextree, d);
00800              root; root = root->sibling) {
00801             rc = root->ci_ext;
00802 
00803             if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) &&
00804                 (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) {
00805                 /*
00806                  * Last CIphone of history entry is in left-context list supported by
00807                  * target root node, and
00808                  * first CIphone of target root node is in right context list supported
00809                  * by history entry;
00810                  * So the transition can go ahead (if new score is good enough).
00811                  */
00812                 newscore = score + root->logs2prob;
00813 
00814                 if ((newscore BETTER_THAN thresh)
00815                     && (newscore BETTER_THAN hmm_in_score(&root->hmm))) {
00816                     if (hmm_frame(&root->hmm) < nf) {
00817                         /* Newly activated node; add to active list */
00818                         fsgs->pnode_active_next =
00819                             glist_add_ptr(fsgs->pnode_active_next,
00820                                           (void *) root);
00821 #if __FSG_DBG__
00822                         E_INFO
00823                             ("[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n",
00824                              fsgs->frame, bpidx, (int32) root);
00825 #endif
00826                     }
00827                     else {
00828 #if __FSG_DBG__
00829                         E_INFO
00830                             ("[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n",
00831                              fsgs->frame, bpidx, (int32) root);
00832 #endif
00833                     }
00834 
00835                     hmm_enter(&root->hmm, newscore, bpidx, nf);
00836                 }
00837             }
00838         }
00839     }
00840 }
00841 
00842 
00843 int
00844 fsg_search_step(ps_search_t *search, int frame_idx)
00845 {
00846     fsg_search_t *fsgs = (fsg_search_t *)search;
00847     int16 const *senscr;
00848     acmod_t *acmod = search->acmod;
00849     gnode_t *gn;
00850     fsg_pnode_t *pnode;
00851     hmm_t *hmm;
00852 
00853     /* Activate our HMMs for the current frame if need be. */
00854     if (!acmod->compallsen)
00855         fsg_search_sen_active(fsgs);
00856     /* Compute GMM scores for the current frame. */
00857     senscr = acmod_score(acmod, &frame_idx);
00858     fsgs->n_sen_eval += acmod->n_senone_active;
00859     hmm_context_set_senscore(fsgs->hmmctx, senscr);
00860 
00861     /* Mark backpointer table for current frame. */
00862     fsgs->bpidx_start = fsg_history_n_entries(fsgs->history);
00863 
00864     /* Evaluate all active pnodes (HMMs) */
00865     fsg_search_hmm_eval(fsgs);
00866 
00867     /*
00868      * Prune and propagate the HMMs evaluated; create history entries for
00869      * word exits.  The words exits are tentative, and may be pruned; make
00870      * the survivors permanent via fsg_history_end_frame().
00871      */
00872     fsg_search_hmm_prune_prop(fsgs);
00873     fsg_history_end_frame(fsgs->history);
00874 
00875     /*
00876      * Propagate new history entries through any null transitions, creating
00877      * new history entries, and then make the survivors permanent.
00878      */
00879     fsg_search_null_prop(fsgs);
00880     fsg_history_end_frame(fsgs->history);
00881 
00882     /*
00883      * Perform cross-word transitions; propagate each history entry across its
00884      * terminating state to the root nodes of the lextree attached to the state.
00885      */
00886     fsg_search_word_trans(fsgs);
00887 
00888     /*
00889      * We've now come full circle, HMM and FSG states have been updated for
00890      * the next frame.
00891      * Update the active lists, deactivate any currently active HMMs that
00892      * did not survive into the next frame
00893      */
00894     for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
00895         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00896         hmm = fsg_pnode_hmmptr(pnode);
00897 
00898         if (hmm_frame(hmm) == fsgs->frame) {
00899             /* This HMM NOT activated for the next frame; reset it */
00900             fsg_psubtree_pnode_deactivate(pnode);
00901         }
00902         else {
00903             assert(hmm_frame(hmm) == (fsgs->frame + 1));
00904         }
00905     }
00906 
00907     /* Free the currently active list */
00908     glist_free(fsgs->pnode_active);
00909 
00910     /* Make the next-frame active list the current one */
00911     fsgs->pnode_active = fsgs->pnode_active_next;
00912     fsgs->pnode_active_next = NULL;
00913 
00914     /* End of this frame; ready for the next */
00915     ++fsgs->frame;
00916 
00917     return 1;
00918 }
00919 
00920 
00921 /*
00922  * Set all HMMs to inactive, clear active lists, initialize FSM start
00923  * state to be the only active node.
00924  * (Executed at the start of each utterance.)
00925  */
00926 int
00927 fsg_search_start(ps_search_t *search)
00928 {
00929     fsg_search_t *fsgs = (fsg_search_t *)search;
00930     int32 silcipid;
00931     fsg_pnode_ctxt_t ctxt;
00932 
00933     /* Reset dynamic adjustment factor for beams */
00934     fsgs->beam_factor = 1.0f;
00935     fsgs->beam = fsgs->beam_orig;
00936     fsgs->pbeam = fsgs->pbeam_orig;
00937     fsgs->wbeam = fsgs->wbeam_orig;
00938 
00939     silcipid = bin_mdef_ciphone_id(ps_search_acmod(fsgs)->mdef, "SIL");
00940 
00941     /* Initialize EVERYTHING to be inactive */
00942     assert(fsgs->pnode_active == NULL);
00943     assert(fsgs->pnode_active_next == NULL);
00944 
00945     fsg_history_reset(fsgs->history);
00946     fsg_history_utt_start(fsgs->history);
00947     fsgs->final = FALSE;
00948 
00949     /* Dummy context structure that allows all right contexts to use this entry */
00950     fsg_pnode_add_all_ctxt(&ctxt);
00951 
00952     /* Create dummy history entry leading to start state */
00953     fsgs->frame = -1;
00954     fsgs->bestscore = 0;
00955     fsg_history_entry_add(fsgs->history,
00956                           NULL, -1, 0, -1, silcipid, ctxt);
00957     fsgs->bpidx_start = 0;
00958 
00959     /* Propagate dummy history entry through NULL transitions from start state */
00960     fsg_search_null_prop(fsgs);
00961 
00962     /* Perform word transitions from this dummy history entry */
00963     fsg_search_word_trans(fsgs);
00964 
00965     /* Make the next-frame active list the current one */
00966     fsgs->pnode_active = fsgs->pnode_active_next;
00967     fsgs->pnode_active_next = NULL;
00968 
00969     ++fsgs->frame;
00970 
00971     fsgs->n_hmm_eval = 0;
00972     fsgs->n_sen_eval = 0;
00973 
00974     return 0;
00975 }
00976 
00977 /*
00978  * Cleanup at the end of each utterance.
00979  */
00980 int
00981 fsg_search_finish(ps_search_t *search)
00982 {
00983     fsg_search_t *fsgs = (fsg_search_t *)search;
00984     gnode_t *gn;
00985     fsg_pnode_t *pnode;
00986     int32 n_hist;
00987 
00988     /* Deactivate all nodes in the current and next-frame active lists */
00989     for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
00990         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00991         fsg_psubtree_pnode_deactivate(pnode);
00992     }
00993     for (gn = fsgs->pnode_active_next; gn; gn = gnode_next(gn)) {
00994         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00995         fsg_psubtree_pnode_deactivate(pnode);
00996     }
00997 
00998     glist_free(fsgs->pnode_active);
00999     fsgs->pnode_active = NULL;
01000     glist_free(fsgs->pnode_active_next);
01001     fsgs->pnode_active_next = NULL;
01002 
01003     fsgs->final = TRUE;
01004 
01005     n_hist = fsg_history_n_entries(fsgs->history);
01006     E_INFO
01007         ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n",
01008          fsgs->frame, fsgs->n_hmm_eval,
01009          (fsgs->frame > 0) ? fsgs->n_hmm_eval / fsgs->frame : 0,
01010          fsgs->n_sen_eval,
01011          (fsgs->frame > 0) ? fsgs->n_sen_eval / fsgs->frame : 0,
01012          n_hist, (fsgs->frame > 0) ? n_hist / fsgs->frame : 0);
01013 
01014     /* Sanity check */
01015     if (fsgs->n_hmm_eval >
01016         fsg_lextree_n_pnode(fsgs->lextree) * fsgs->frame) {
01017         E_ERROR
01018             ("SANITY CHECK #HMMEval(%d) > %d (#HMMs(%d)*#frames(%d)) FAILED\n",
01019              fsgs->n_hmm_eval,
01020              fsg_lextree_n_pnode(fsgs->lextree) * fsgs->frame,
01021              fsg_lextree_n_pnode(fsgs->lextree), fsgs->frame);
01022     }
01023 
01024     return 0;
01025 }
01026 
01027 static int
01028 fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_score)
01029 {
01030     fsg_hist_entry_t *hist_entry;
01031     fsg_model_t *fsg;
01032     int bpidx, frm, last_frm, besthist;
01033     int32 bestscore;
01034 
01035     if (frame_idx == -1)
01036         frame_idx = fsgs->frame - 1;
01037     last_frm = frm = frame_idx;
01038 
01039     /* Scan backwards to find a word exit in frame_idx. */
01040     bpidx = fsg_history_n_entries(fsgs->history) - 1;
01041     while (bpidx > 0) {
01042         hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
01043         if (fsg_hist_entry_frame(hist_entry) <= frame_idx) {
01044             frm = last_frm = fsg_hist_entry_frame(hist_entry);
01045             break;
01046         }
01047     }
01048 
01049     /* No hypothesis (yet). */
01050     if (bpidx <= 0) 
01051         return bpidx;
01052 
01053     /* Now find best word exit in this frame. */
01054     bestscore = INT_MIN;
01055     besthist = -1;
01056     fsg = fsgs->fsg;
01057     while (frm == last_frm) {
01058         fsg_link_t *fl;
01059         int32 score;
01060 
01061         fl = fsg_hist_entry_fsglink(hist_entry);
01062         score = fsg_hist_entry_score(hist_entry);
01063 
01064         if (score BETTER_THAN bestscore) {
01065             /* Only enforce the final state constraint if this is a final hypothesis. */
01066             if ((!final)
01067                 || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
01068                 bestscore = score;
01069                 besthist = bpidx;
01070             }
01071         }
01072 
01073         --bpidx;
01074         if (bpidx < 0)
01075             break;
01076         hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
01077         frm = fsg_hist_entry_frame(hist_entry);
01078     }
01079 
01080     /* Final state not reached. */
01081     if (besthist == -1) {
01082         E_ERROR("Final state not reached in frame %d\n", frame_idx);
01083         return -1;
01084     }
01085 
01086     /* This here's the one we want. */
01087     if (out_score)
01088         *out_score = bestscore;
01089     return besthist;
01090 }
01091 
01092 /* FIXME: Mostly duplicated with ngram_search_bestpath(). */
01093 static ps_latlink_t *
01094 fsg_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
01095 {
01096     fsg_search_t *fsgs = (fsg_search_t *)search;
01097 
01098     if (search->last_link == NULL) {
01099         search->last_link = ps_lattice_bestpath(search->dag, NULL,
01100                                                 1.0, fsgs->ascale);
01101         if (search->last_link == NULL)
01102             return NULL;
01103         /* Also calculate betas so we can fill in the posterior
01104          * probability field in the segmentation. */
01105         if (search->post == 0)
01106             search->post = ps_lattice_posterior(search->dag, NULL, fsgs->ascale);
01107     }
01108     if (out_score)
01109         *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
01110     return search->last_link;
01111 }
01112 
01113 char const *
01114 fsg_search_hyp(ps_search_t *search, int32 *out_score)
01115 {
01116     fsg_search_t *fsgs = (fsg_search_t *)search;
01117     char *c;
01118     size_t len;
01119     int bp, bpidx;
01120 
01121     /* Get last backpointer table index. */
01122     bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score);
01123     /* No hypothesis (yet). */
01124     if (bpidx <= 0)
01125         return NULL;
01126 
01127     /* If bestpath is enabled and the utterance is complete, then run it. */
01128     if (fsgs->bestpath && fsgs->final) {
01129         ps_lattice_t *dag;
01130         ps_latlink_t *link;
01131 
01132         if ((dag = fsg_search_lattice(search)) == NULL)
01133             return NULL;
01134         if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL)
01135             return NULL;
01136         return ps_lattice_hyp(dag, link);
01137     }
01138 
01139     bp = bpidx;
01140     len = 0;
01141     while (bp > 0) {
01142         fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
01143         fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
01144         int32 wid;
01145 
01146         bp = fsg_hist_entry_pred(hist_entry);
01147         wid = fsg_link_wid(fl);
01148         if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
01149             continue;
01150         len += strlen(fsg_model_word_str(fsgs->fsg, wid)) + 1;
01151     }
01152 
01153     ckd_free(search->hyp_str);
01154     search->hyp_str = ckd_calloc(1, len);
01155     bp = bpidx;
01156     c = search->hyp_str + len - 1;
01157     while (bp > 0) {
01158         fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
01159         fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
01160         int32 wid;
01161 
01162         bp = fsg_hist_entry_pred(hist_entry);
01163         wid = fsg_link_wid(fl);
01164         if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
01165             continue;
01166         len = strlen(fsg_model_word_str(fsgs->fsg, wid));
01167         c -= len;
01168         memcpy(c, fsg_model_word_str(fsgs->fsg, wid), len);
01169         if (c > search->hyp_str) {
01170             --c;
01171             *c = ' ';
01172         }
01173     }
01174 
01175     return search->hyp_str;
01176 }
01177 
01178 static void
01179 fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry)
01180 {
01181     fsg_search_t *fsgs = (fsg_search_t *)seg->search;
01182     fsg_hist_entry_t *ph = NULL;
01183     int32 bp;
01184 
01185     if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0)
01186         ph = fsg_history_entry_get(fsgs->history, bp);
01187     seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid);
01188     seg->ef = fsg_hist_entry_frame(hist_entry);
01189     seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0;
01190     /* This is kind of silly but it happens for null transitions. */
01191     if (seg->sf > seg->ef) seg->sf = seg->ef;
01192     seg->prob = 0; /* Bogus value... */
01193     /* "Language model" score = transition probability. */
01194     seg->lback = 1;
01195     seg->lscr = hist_entry->fsglink->logs2prob;
01196     if (ph) {
01197         /* FIXME: Not sure exactly how cross-word triphones are handled. */
01198         seg->ascr = hist_entry->score - ph->score - seg->lscr;
01199     }
01200     else
01201         seg->ascr = hist_entry->score - seg->lscr;
01202 }
01203 
01204 
01205 static void
01206 fsg_seg_free(ps_seg_t *seg)
01207 {
01208     fsg_seg_t *itor = (fsg_seg_t *)seg;
01209     ckd_free(itor->hist);
01210     ckd_free(itor);
01211 }
01212 
01213 static ps_seg_t *
01214 fsg_seg_next(ps_seg_t *seg)
01215 {
01216     fsg_seg_t *itor = (fsg_seg_t *)seg;
01217 
01218     if (++itor->cur == itor->n_hist) {
01219         fsg_seg_free(seg);
01220         return NULL;
01221     }
01222 
01223     fsg_seg_bp2itor(seg, itor->hist[itor->cur]);
01224     return seg;
01225 }
01226 
01227 static ps_segfuncs_t fsg_segfuncs = {
01228     /* seg_next */ fsg_seg_next,
01229     /* seg_free */ fsg_seg_free
01230 };
01231 
01232 static ps_seg_t *
01233 fsg_search_seg_iter(ps_search_t *search, int32 *out_score)
01234 {
01235     fsg_search_t *fsgs = (fsg_search_t *)search;
01236     fsg_seg_t *itor;
01237     int bp, bpidx, cur;
01238 
01239     bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score);
01240     /* No hypothesis (yet). */
01241     if (bpidx <= 0)
01242         return NULL;
01243 
01244     /* If bestpath is enabled and the utterance is complete, then run it. */
01245     if (fsgs->bestpath && fsgs->final) {
01246         ps_lattice_t *dag;
01247         ps_latlink_t *link;
01248 
01249         if ((dag = fsg_search_lattice(search)) == NULL)
01250             return NULL;
01251         if ((link = fsg_search_bestpath(search, out_score, TRUE)) == NULL)
01252             return NULL;
01253         return ps_lattice_seg_iter(dag, link, 1.0);
01254     }
01255 
01256     /* Calling this an "iterator" is a bit of a misnomer since we have
01257      * to get the entire backtrace in order to produce it.  On the
01258      * other hand, all we actually need is the bptbl IDs, and we can
01259      * allocate a fixed-size array of them. */
01260     itor = ckd_calloc(1, sizeof(*itor));
01261     itor->base.vt = &fsg_segfuncs;
01262     itor->base.search = search;
01263     itor->base.lwf = 1.0;
01264     itor->n_hist = 0;
01265     bp = bpidx;
01266     while (bp > 0) {
01267         fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
01268         bp = fsg_hist_entry_pred(hist_entry);
01269         ++itor->n_hist;
01270     }
01271     if (itor->n_hist == 0) {
01272         ckd_free(itor);
01273         return NULL;
01274     }
01275     itor->hist = ckd_calloc(itor->n_hist, sizeof(*itor->hist));
01276     cur = itor->n_hist - 1;
01277     bp = bpidx;
01278     while (bp > 0) {
01279         fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
01280         itor->hist[cur] = hist_entry;
01281         bp = fsg_hist_entry_pred(hist_entry);
01282         --cur;
01283     }
01284 
01285     /* Fill in relevant fields for first element. */
01286     fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]);
01287     
01288     return (ps_seg_t *)itor;
01289 }
01290 
01291 static int
01292 fsg_search_prob(ps_search_t *search)
01293 {
01294     fsg_search_t *fsgs = (fsg_search_t *)search;
01295 
01296     /* If bestpath is enabled and the utterance is complete, then run it. */
01297     if (fsgs->bestpath && fsgs->final) {
01298         ps_lattice_t *dag;
01299         ps_latlink_t *link;
01300 
01301         if ((dag = fsg_search_lattice(search)) == NULL)
01302             return 0;
01303         if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL)
01304             return 0;
01305         return search->post;
01306     }
01307     else {
01308         /* FIXME: Give some kind of good estimate here, eventually. */
01309         return 0;
01310     }
01311 }
01312 
01313 static ps_latnode_t *
01314 new_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int ef, int32 wid, int32 ascr)
01315 {
01316     ps_latnode_t *node;
01317 
01318     for (node = dag->nodes; node; node = node->next)
01319         if (node->sf == sf && node->wid == wid)
01320             break;
01321 
01322     if (node) {
01323         /* Update end frames. */
01324         if (node->lef == -1 || node->lef < ef)
01325             node->lef = ef;
01326         if (node->fef == -1 || node->fef > ef)
01327             node->fef = ef;
01328         /* Update best link score. */
01329         if (ascr BETTER_THAN node->info.best_exit)
01330             node->info.best_exit = ascr;
01331     }
01332     else {
01333         /* New node; link to head of list */
01334         node = listelem_malloc(dag->latnode_alloc);
01335         node->wid = wid;
01336         node->sf = sf;
01337         node->fef = node->lef = ef;
01338         node->reachable = FALSE;
01339         node->entries = NULL;
01340         node->exits = NULL;
01341         node->info.best_exit = ascr;
01342 
01343         node->next = dag->nodes;
01344         dag->nodes = node;
01345     }
01346 
01347     return node;
01348 }
01349 
01350 static ps_latnode_t *
01351 find_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int32 wid)
01352 {
01353     ps_latnode_t *node;
01354 
01355     for (node = dag->nodes; node; node = node->next)
01356         if (node->sf == sf && node->wid == wid)
01357             break;
01358     return node;
01359 }
01360 
01361 static ps_latnode_t *
01362 find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag)
01363 {
01364     ps_latnode_t *node;
01365     glist_t start = NULL;
01366     int nstart = 0;
01367 
01368     /* Look for all nodes starting in frame zero with some exits. */
01369     for (node = dag->nodes; node; node = node->next) {
01370         if (node->sf == 0 && node->exits) {
01371             E_INFO("Start node %s.%d:%d:%d\n",
01372                    fsg_model_word_str(fsgs->fsg, node->wid),
01373                    node->sf, node->fef, node->lef);
01374             start = glist_add_ptr(start, node);
01375             ++nstart;
01376         }
01377     }
01378 
01379     /* If there was more than one start node candidate, then we need
01380      * to create an artificial start node with epsilon transitions to
01381      * all of them. */
01382     if (nstart == 1) {
01383         node = gnode_ptr(start);
01384     }
01385     else {
01386         gnode_t *st;
01387         int wid;
01388 
01389         wid = fsg_model_word_add(fsgs->fsg, "<s>");
01390         if (fsgs->fsg->silwords)
01391             bitvec_set(fsgs->fsg->silwords, wid);
01392         node = new_node(dag, fsgs->fsg, 0, 0, wid, 0);
01393         for (st = start; st; st = gnode_next(st))
01394             ps_lattice_link(dag, node, gnode_ptr(st), 0, 0);
01395     }
01396     glist_free(start);
01397     return node;
01398 }
01399 
01400 static ps_latnode_t *
01401 find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag)
01402 {
01403     ps_latnode_t *node;
01404     glist_t end = NULL;
01405     int nend = 0;
01406 
01407     /* Look for all nodes ending in last frame with some entries. */
01408     for (node = dag->nodes; node; node = node->next) {
01409         if (node->lef == dag->n_frames - 1 && node->entries) {
01410             E_INFO("End node %s.%d:%d:%d (%d)\n",
01411                    fsg_model_word_str(fsgs->fsg, node->wid),
01412                    node->sf, node->fef, node->lef, node->info.best_exit);
01413             end = glist_add_ptr(end, node);
01414             ++nend;
01415         }
01416     }
01417 
01418     if (nend == 1) {
01419         node = gnode_ptr(end);
01420     }
01421     else if (nend == 0) {
01422         ps_latnode_t *last = NULL;
01423         int ef = 0;
01424 
01425         /* If there were no end node candidates, then just use the
01426          * node with the last exit frame. */
01427         for (node = dag->nodes; node; node = node->next) {
01428             if (node->lef > ef && node->entries) {
01429                 last = node;
01430                 ef = node->lef;
01431             }
01432         }
01433         node = last;
01434         if (node)
01435             E_INFO("End node %s.%d:%d:%d (%d)\n",
01436                    fsg_model_word_str(fsgs->fsg, node->wid),
01437                    node->sf, node->fef, node->lef, node->info.best_exit);
01438     }    
01439     else {
01440         /* If there was more than one end node candidate, then we need
01441          * to create an artificial end node with epsilon transitions
01442          * out of all of them. */
01443         gnode_t *st;
01444         int wid;
01445 
01446         wid = fsg_model_word_add(fsgs->fsg, "</s>");
01447         if (fsgs->fsg->silwords)
01448             bitvec_set(fsgs->fsg->silwords, wid);
01449         node = new_node(dag, fsgs->fsg, fsgs->frame, fsgs->frame, wid, 0);
01450         /* Use the "best" (in reality it will be the only) exit link
01451          * score from this final node as the link score. */
01452         for (st = end; st; st = gnode_next(st)) {
01453             ps_latnode_t *src = gnode_ptr(st);
01454             ps_lattice_link(dag, src, node, src->info.best_exit, fsgs->frame);
01455         }
01456     }
01457     glist_free(end);
01458     return node;
01459 }
01460 
01461 static void
01462 mark_reachable(ps_lattice_t *dag, ps_latnode_t *end)
01463 {
01464     glist_t q;
01465 
01466     /* It doesn't matter which order we do this in. */
01467     end->reachable = TRUE;
01468     q = glist_add_ptr(NULL, end);
01469     while (q) {
01470         ps_latnode_t *node = gnode_ptr(q);
01471         latlink_list_t *x;
01472 
01473         /* Pop the front of the list. */
01474         q = gnode_free(q, NULL);
01475         /* Expand all its predecessors that haven't been seen yet. */
01476         for (x = node->entries; x; x = x->next) {
01477             ps_latnode_t *next = x->link->from;
01478             if (!next->reachable) {
01479                 next->reachable = TRUE;
01480                 q = glist_add_ptr(q, next);
01481             }
01482         }
01483     }
01484 }
01485 
01494 static ps_lattice_t *
01495 fsg_search_lattice(ps_search_t *search)
01496 {
01497     fsg_search_t *fsgs;
01498     fsg_model_t *fsg;
01499     ps_latnode_t *node;
01500     ps_lattice_t *dag;
01501     int32 i, n;
01502 
01503     fsgs = (fsg_search_t *)search;
01504 
01505     /* Check to see if a lattice has previously been created over the
01506      * same number of frames, and reuse it if so. */
01507     if (search->dag && search->dag->n_frames == fsgs->frame)
01508         return search->dag;
01509 
01510     /* Nope, create a new one. */
01511     ps_lattice_free(search->dag);
01512     search->dag = NULL;
01513     dag = ps_lattice_init_search(search, fsgs->frame);
01514     fsg = fsgs->fsg;
01515 
01516     /*
01517      * Each history table entry represents a link in the word graph.
01518      * The set of nodes is determined by the number of unique
01519      * (word,start-frame) pairs in the history table.  So we will
01520      * first find all those nodes.
01521      */
01522     n = fsg_history_n_entries(fsgs->history);
01523     for (i = 0; i < n; ++i) {
01524         fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
01525         ps_latnode_t *node;
01526         int32 ascr;
01527         int sf;
01528 
01529         /* Skip null transitions. */
01530         if (fh->fsglink == NULL || fh->fsglink->wid == -1)
01531             continue;
01532 
01533         /* Find the start node of this link. */
01534         if (fh->pred) {
01535             fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
01536             /* FIXME: We include the transition score in the lattice
01537              * link score.  This is because of the practical
01538              * difficulty of obtaining it separately in bestpath or
01539              * forward-backward search, and because it is essentially
01540              * a unigram probability, so there is no need to treat it
01541              * separately from the acoustic score.  However, it's not
01542              * clear that this will actually yield correct results.*/
01543             ascr = fh->score - pfh->score;
01544             sf = pfh->frame + 1;
01545         }
01546         else {
01547             ascr = fh->score;
01548             sf = 0;
01549         }
01550 
01551         /*
01552          * Note that although scores are tied to links rather than
01553          * nodes, it's possible that there are no links out of the
01554          * destination node, and thus we need to preserve its score in
01555          * case it turns out to be utterance-final.
01556          */
01557         node = new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, ascr);
01558     }
01559 
01560     /*
01561      * Now, we will create links only to nodes that actually exist.
01562      */
01563     n = fsg_history_n_entries(fsgs->history);
01564     for (i = 0; i < n; ++i) {
01565         fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
01566         ps_latnode_t *src, *dest;
01567         int32 ascr;
01568         int sf;
01569         int j;
01570 
01571         /* Skip null transitions. */
01572         if (fh->fsglink == NULL || fh->fsglink->wid == -1)
01573             continue;
01574 
01575         /* Find the start node of this link and calculate its link score. */
01576         if (fh->pred) {
01577             fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
01578             sf = pfh->frame + 1;
01579             ascr = fh->score - pfh->score;
01580         }
01581         else {
01582             ascr = fh->score;
01583             sf = 0;
01584         }
01585         src = find_node(dag, fsg, sf, fh->fsglink->wid);
01586     
01587         /*
01588          * For each non-epsilon link following this one, look for a
01589          * matching node in the lattice and link to it.
01590          */
01591         sf = fh->frame + 1;
01592         for (j = 0; j < fsg->n_state; ++j) {
01593             gnode_t *gn;
01594 
01595             for (gn = fsg_model_trans(fsg, fh->fsglink->to_state, j); gn; gn = gnode_next(gn)) {
01596                 fsg_link_t *link = gnode_ptr(gn);
01597                 if ((dest = find_node(dag, fsg, sf, link->wid)) != NULL)
01598                     ps_lattice_link(dag, src, dest, ascr, fh->frame);
01599             }
01600 
01601             /*
01602              * Transitive closure on nulls has already been done, so we
01603              * just need to look one link forward from them.
01604              */
01605             if (fsg_model_null_trans(fsg, fh->fsglink->to_state,j)) {
01606                 gnode_t *gn2;
01607                 int k;
01608 
01609                 /* Add all non-null links out of j. */
01610                 for (k = 0; k < fsg->n_state; ++k) {
01611                     for (gn2 = fsg_model_trans(fsg, j, k); gn2; gn2 = gnode_next(gn2)) {
01612                         fsg_link_t *link = gnode_ptr(gn2);
01613                         if ((dest = find_node(dag, fsg, sf, link->wid)) != NULL)
01614                             ps_lattice_link(dag, src, dest, ascr, fh->frame);
01615                     }
01616                 }
01617             }
01618         }
01619     }
01620 
01621     /* Figure out which nodes are the start and end nodes. */
01622     if ((dag->start = find_start_node(fsgs, dag)) == NULL)
01623         goto error_out;
01624     if ((dag->end = find_end_node(fsgs, dag)) == NULL)
01625         goto error_out;
01626     E_INFO("lattice start node %s.%d end node %s.%d\n",
01627            fsg_model_word_str(fsg, dag->start->wid), dag->start->sf,
01628            fsg_model_word_str(fsg, dag->end->wid), dag->end->sf);
01629     /* FIXME: Need to calculate final_node_ascr here. */
01630 
01631     /*
01632      * Convert word IDs from FSG to dictionary.
01633      */
01634     for (node = dag->nodes; node; node = node->next) {
01635         node->wid = s3dict_wordid(dag->search->dict,
01636                                   fsg_model_word_str(fsg, node->wid));
01637         node->basewid = s3dict_basewid(dag->search->dict, node->wid);
01638     }
01639 
01640     /*
01641      * Now we are done, because the links in the graph are uniquely
01642      * defined by the history table.  However we should remove any
01643      * nodes which are not reachable from the end node of the FSG.
01644      * Everything is reachable from the start node by definition.
01645      */
01646     mark_reachable(dag, dag->end);
01647 
01648     ps_lattice_delete_unreachable(dag);
01649     {
01650         int32 silpen, fillpen;
01651 
01652         silpen = (int32)(logmath_log(fsg->lmath,
01653                                      cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"))
01654                          * fsg->lw);
01655         fillpen = (int32)(logmath_log(fsg->lmath,
01656                                       cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"))
01657                           * fsg->lw);
01658         ps_lattice_bypass_fillers(dag, silpen, fillpen);
01659     }
01660     search->dag = dag;
01661     return dag;
01662 
01663 error_out:
01664     ps_lattice_free(dag);
01665     return NULL;
01666 
01667 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7