src/libpocketsphinx/phone_loop_search.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 #include <err.h>
00043 
00044 #include "phone_loop_search.h"
00045 
00046 static int phone_loop_search_start(ps_search_t *search);
00047 static int phone_loop_search_step(ps_search_t *search, int frame_idx);
00048 static int phone_loop_search_finish(ps_search_t *search);
00049 static int phone_loop_search_reinit(ps_search_t *search);
00050 static void phone_loop_search_free(ps_search_t *search);
00051 static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score);
00052 static int32 phone_loop_search_prob(ps_search_t *search);
00053 static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score);
00054 
00055 static ps_searchfuncs_t phone_loop_search_funcs = {
00056     /* name: */   "phone_loop",
00057     /* start: */  phone_loop_search_start,
00058     /* step: */   phone_loop_search_step,
00059     /* finish: */ phone_loop_search_finish,
00060     /* reinit: */ phone_loop_search_reinit,
00061     /* free: */   phone_loop_search_free,
00062     /* lattice: */  NULL,
00063     /* hyp: */      phone_loop_search_hyp,
00064     /* prob: */     phone_loop_search_prob,
00065     /* seg_iter: */ phone_loop_search_seg_iter,
00066 };
00067 
00068 static int
00069 phone_loop_search_reinit(ps_search_t *search)
00070 {
00071     phone_loop_search_t *pls = (phone_loop_search_t *)search;
00072     cmd_ln_t *config = ps_search_config(search);
00073     acmod_t *acmod = ps_search_acmod(search);
00074     int i;
00075 
00076     /* Initialize HMM context. */
00077     if (pls->hmmctx)
00078         hmm_context_free(pls->hmmctx);
00079     pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
00080                                    acmod->tmat->tp, NULL, acmod->mdef->sseq);
00081     if (pls->hmmctx == NULL)
00082         return -1;
00083 
00084     /* Initialize phone HMMs. */
00085     if (pls->phones) {
00086         for (i = 0; i < pls->n_phones; ++i)
00087             hmm_deinit((hmm_t *)&pls->phones[i]);
00088         ckd_free(pls->phones);
00089     }
00090     pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
00091     pls->phones = ckd_calloc(pls->n_phones, sizeof(*pls->phones));
00092     for (i = 0; i < pls->n_phones; ++i) {
00093         pls->phones[i].ciphone = i;
00094         hmm_init(pls->hmmctx, (hmm_t *)&pls->phones[i],
00095                  FALSE,
00096                  bin_mdef_pid2ssid(acmod->mdef, i),
00097                  bin_mdef_pid2tmatid(acmod->mdef, i));
00098     }
00099     pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam"));
00100     pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam"));
00101     pls->pip = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pip"));
00102     E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
00103            pls->beam, pls->pbeam, pls->pip);
00104 
00105     return 0;
00106 }
00107 
00108 ps_search_t *
00109 phone_loop_search_init(cmd_ln_t *config,
00110                        acmod_t *acmod,
00111                        s3dict_t *dict)
00112 {
00113     phone_loop_search_t *pls;
00114 
00115     /* Allocate and initialize. */
00116     pls = ckd_calloc(1, sizeof(*pls));
00117     ps_search_init(ps_search_base(pls), &phone_loop_search_funcs,
00118                    config, acmod, dict, NULL);
00119     phone_loop_search_reinit(ps_search_base(pls));
00120 
00121     return ps_search_base(pls);
00122 }
00123 
00124 static void
00125 phone_loop_search_free_renorm(phone_loop_search_t *pls)
00126 {
00127     gnode_t *gn;
00128     for (gn = pls->renorm; gn; gn = gnode_next(gn))
00129         ckd_free(gnode_ptr(gn));
00130     glist_free(pls->renorm);
00131     pls->renorm = NULL;
00132 }
00133 
00134 static void
00135 phone_loop_search_free(ps_search_t *search)
00136 {
00137     phone_loop_search_t *pls = (phone_loop_search_t *)search;
00138     int i;
00139 
00140     ps_search_deinit(search);
00141     for (i = 0; i < pls->n_phones; ++i)
00142         hmm_deinit((hmm_t *)&pls->phones[i]);
00143     phone_loop_search_free_renorm(pls);
00144     ckd_free(pls->phones);
00145     hmm_context_free(pls->hmmctx);
00146     ckd_free(pls);
00147 }
00148 
00149 static int
00150 phone_loop_search_start(ps_search_t *search)
00151 {
00152     phone_loop_search_t *pls = (phone_loop_search_t *)search;
00153     int i;
00154 
00155     /* Reset and enter all phone HMMs. */
00156     for (i = 0; i < pls->n_phones; ++i) {
00157         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00158         hmm_clear(hmm);
00159         hmm_enter(hmm, 0, -1, 0);
00160     }
00161     phone_loop_search_free_renorm(pls);
00162     pls->best_score = 0;
00163 
00164     return 0;
00165 }
00166 
00167 static void
00168 renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm)
00169 {
00170     phone_loop_renorm_t *rn = ckd_calloc(1, sizeof(*rn));
00171     int i;
00172 
00173     pls->renorm = glist_add_ptr(pls->renorm, rn);
00174     rn->frame_idx = frame_idx;
00175     rn->norm = norm;
00176 
00177     for (i = 0; i < pls->n_phones; ++i) {
00178         hmm_normalize((hmm_t *)&pls->phones[i], norm);
00179     }
00180 }
00181 
00182 static int32
00183 evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
00184 {
00185     int32 bs = WORST_SCORE;
00186     int i, bi;
00187 
00188     hmm_context_set_senscore(pls->hmmctx, senscr);
00189 
00190     bi = 0;
00191     for (i = 0; i < pls->n_phones; ++i) {
00192         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00193         int32 score;
00194 
00195         if (hmm_frame(hmm) < frame_idx)
00196             continue;
00197         score = hmm_vit_eval(hmm);
00198         if (score BETTER_THAN bs) {
00199             bs = score;
00200             bi = i;
00201         }
00202     }
00203     pls->best_score = bs;
00204 
00205     for (i = 0; i < pls->n_phones; ++i) {
00206         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00207         if (hmm_frame(hmm) < frame_idx)
00208             continue;
00209         if (hmm_bestscore(hmm) < bs + pls->beam)
00210             continue;
00211     }
00212 
00213     return bs;
00214 }
00215 
00216 static void
00217 prune_hmms(phone_loop_search_t *pls, int frame_idx)
00218 {
00219     int32 thresh = pls->best_score + pls->beam;
00220     int nf = frame_idx + 1;
00221     int i;
00222 
00223     /* Check all phones to see if they remain active in the next frame. */
00224     for (i = 0; i < pls->n_phones; ++i) {
00225         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00226 
00227         if (hmm_frame(hmm) < frame_idx)
00228             continue;
00229         /* Retain if score better than threshold. */
00230         if (hmm_bestscore(hmm) BETTER_THAN thresh) {
00231             hmm_frame(hmm) = nf;
00232         }
00233         else
00234             hmm_clear_scores(hmm);
00235     }
00236 }
00237 
00238 static void
00239 phone_transition(phone_loop_search_t *pls, int frame_idx)
00240 {
00241     int32 thresh = pls->best_score + pls->pbeam;
00242     int nf = frame_idx + 1;
00243     int i;
00244 
00245     /* Now transition out of phones whose last states are inside the
00246      * phone transition beam. */
00247     for (i = 0; i < pls->n_phones; ++i) {
00248         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00249         int32 newphone_score;
00250         int j;
00251 
00252         if (hmm_frame(hmm) != nf)
00253             continue;
00254 
00255         newphone_score = hmm_out_score(hmm) + pls->pip;
00256         if (newphone_score BETTER_THAN thresh) {
00257             /* Transition into all phones using the usual Viterbi rule. */
00258             for (j = 0; j < pls->n_phones; ++j) {
00259                 hmm_t *nhmm = (hmm_t *)&pls->phones[j];
00260 
00261                 if (hmm_frame(nhmm) < frame_idx
00262                     || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
00263                     hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
00264                 }
00265             }
00266         }
00267     }
00268 }
00269 
00270 static int
00271 phone_loop_search_step(ps_search_t *search, int frame_idx)
00272 {
00273     phone_loop_search_t *pls = (phone_loop_search_t *)search;
00274     acmod_t *acmod = ps_search_acmod(search);
00275     int16 const *senscr;
00276     int i;
00277 
00278     /* All CI senones are active all the time. */
00279     if (!ps_search_acmod(pls)->compallsen)
00280         for (i = 0; i < pls->n_phones; ++i)
00281             acmod_activate_hmm(acmod, (hmm_t *)&pls->phones[i]);
00282 
00283     /* Calculate senone scores for current frame. */
00284     senscr = acmod_score(acmod, &frame_idx);
00285 
00286     /* Renormalize, if necessary. */
00287     if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) {
00288         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
00289                frame_idx, pls->best_score);
00290         renormalize_hmms(pls, frame_idx, pls->best_score);
00291     }
00292 
00293     /* Evaluate phone HMMs for current frame. */
00294     pls->best_score = evaluate_hmms(pls, senscr, frame_idx);
00295 
00296     /* Prune phone HMMs. */
00297     prune_hmms(pls, frame_idx);
00298 
00299     /* Do phone transitions. */
00300     phone_transition(pls, frame_idx);
00301 
00302     return 0;
00303 }
00304 
00305 int32
00306 phone_loop_search_score(phone_loop_search_t *pls, int ciphone)
00307 {
00308     hmm_t *hmm;
00309 
00310     if (pls == NULL)
00311         return 0;
00312 
00313     hmm = (hmm_t *)&pls->phones[ciphone];
00314     return hmm_bestscore(hmm) - pls->best_score;
00315 }
00316 
00317 static int
00318 phone_loop_search_finish(ps_search_t *search)
00319 {
00320     /* Actually nothing to do here really. */
00321     return 0;
00322 }
00323 
00324 static char const *
00325 phone_loop_search_hyp(ps_search_t *search, int32 *out_score)
00326 {
00327     E_WARN("Hypotheses are not returned from phone loop search");
00328     return NULL;
00329 }
00330 
00331 static int32
00332 phone_loop_search_prob(ps_search_t *search)
00333 {
00334     /* FIXME: Actually... they ought to be. */
00335     E_WARN("Posterior probabilities are not returned from phone loop search");
00336     return 0;
00337 }
00338 
00339 static ps_seg_t *
00340 phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score)
00341 {
00342     E_WARN("Hypotheses are not returned from phone loop search");
00343     return NULL;
00344 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7