00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00042 #include <err.h>
00043
00044 #include "phone_loop_search.h"
00045
00046 static int phone_loop_search_start(ps_search_t *search);
00047 static int phone_loop_search_step(ps_search_t *search, int frame_idx);
00048 static int phone_loop_search_finish(ps_search_t *search);
00049 static int phone_loop_search_reinit(ps_search_t *search);
00050 static void phone_loop_search_free(ps_search_t *search);
00051 static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score);
00052 static int32 phone_loop_search_prob(ps_search_t *search);
00053 static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score);
00054
00055 static ps_searchfuncs_t phone_loop_search_funcs = {
00056 "phone_loop",
00057 phone_loop_search_start,
00058 phone_loop_search_step,
00059 phone_loop_search_finish,
00060 phone_loop_search_reinit,
00061 phone_loop_search_free,
00062 NULL,
00063 phone_loop_search_hyp,
00064 phone_loop_search_prob,
00065 phone_loop_search_seg_iter,
00066 };
00067
00068 static int
00069 phone_loop_search_reinit(ps_search_t *search)
00070 {
00071 phone_loop_search_t *pls = (phone_loop_search_t *)search;
00072 cmd_ln_t *config = ps_search_config(search);
00073 acmod_t *acmod = ps_search_acmod(search);
00074 int i;
00075
00076
00077 if (pls->hmmctx)
00078 hmm_context_free(pls->hmmctx);
00079 pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
00080 acmod->tmat->tp, NULL, acmod->mdef->sseq);
00081 if (pls->hmmctx == NULL)
00082 return -1;
00083
00084
00085 if (pls->phones) {
00086 for (i = 0; i < pls->n_phones; ++i)
00087 hmm_deinit((hmm_t *)&pls->phones[i]);
00088 ckd_free(pls->phones);
00089 }
00090 pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
00091 pls->phones = ckd_calloc(pls->n_phones, sizeof(*pls->phones));
00092 for (i = 0; i < pls->n_phones; ++i) {
00093 pls->phones[i].ciphone = i;
00094 hmm_init(pls->hmmctx, (hmm_t *)&pls->phones[i],
00095 FALSE,
00096 bin_mdef_pid2ssid(acmod->mdef, i),
00097 bin_mdef_pid2tmatid(acmod->mdef, i));
00098 }
00099 pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam"));
00100 pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam"));
00101 pls->pip = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pip"));
00102 E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
00103 pls->beam, pls->pbeam, pls->pip);
00104
00105 return 0;
00106 }
00107
00108 ps_search_t *
00109 phone_loop_search_init(cmd_ln_t *config,
00110 acmod_t *acmod,
00111 s3dict_t *dict)
00112 {
00113 phone_loop_search_t *pls;
00114
00115
00116 pls = ckd_calloc(1, sizeof(*pls));
00117 ps_search_init(ps_search_base(pls), &phone_loop_search_funcs,
00118 config, acmod, dict, NULL);
00119 phone_loop_search_reinit(ps_search_base(pls));
00120
00121 return ps_search_base(pls);
00122 }
00123
00124 static void
00125 phone_loop_search_free_renorm(phone_loop_search_t *pls)
00126 {
00127 gnode_t *gn;
00128 for (gn = pls->renorm; gn; gn = gnode_next(gn))
00129 ckd_free(gnode_ptr(gn));
00130 glist_free(pls->renorm);
00131 pls->renorm = NULL;
00132 }
00133
00134 static void
00135 phone_loop_search_free(ps_search_t *search)
00136 {
00137 phone_loop_search_t *pls = (phone_loop_search_t *)search;
00138 int i;
00139
00140 ps_search_deinit(search);
00141 for (i = 0; i < pls->n_phones; ++i)
00142 hmm_deinit((hmm_t *)&pls->phones[i]);
00143 phone_loop_search_free_renorm(pls);
00144 ckd_free(pls->phones);
00145 hmm_context_free(pls->hmmctx);
00146 ckd_free(pls);
00147 }
00148
00149 static int
00150 phone_loop_search_start(ps_search_t *search)
00151 {
00152 phone_loop_search_t *pls = (phone_loop_search_t *)search;
00153 int i;
00154
00155
00156 for (i = 0; i < pls->n_phones; ++i) {
00157 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00158 hmm_clear(hmm);
00159 hmm_enter(hmm, 0, -1, 0);
00160 }
00161 phone_loop_search_free_renorm(pls);
00162 pls->best_score = 0;
00163
00164 return 0;
00165 }
00166
00167 static void
00168 renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm)
00169 {
00170 phone_loop_renorm_t *rn = ckd_calloc(1, sizeof(*rn));
00171 int i;
00172
00173 pls->renorm = glist_add_ptr(pls->renorm, rn);
00174 rn->frame_idx = frame_idx;
00175 rn->norm = norm;
00176
00177 for (i = 0; i < pls->n_phones; ++i) {
00178 hmm_normalize((hmm_t *)&pls->phones[i], norm);
00179 }
00180 }
00181
00182 static int32
00183 evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
00184 {
00185 int32 bs = WORST_SCORE;
00186 int i, bi;
00187
00188 hmm_context_set_senscore(pls->hmmctx, senscr);
00189
00190 bi = 0;
00191 for (i = 0; i < pls->n_phones; ++i) {
00192 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00193 int32 score;
00194
00195 if (hmm_frame(hmm) < frame_idx)
00196 continue;
00197 score = hmm_vit_eval(hmm);
00198 if (score BETTER_THAN bs) {
00199 bs = score;
00200 bi = i;
00201 }
00202 }
00203 pls->best_score = bs;
00204
00205 for (i = 0; i < pls->n_phones; ++i) {
00206 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00207 if (hmm_frame(hmm) < frame_idx)
00208 continue;
00209 if (hmm_bestscore(hmm) < bs + pls->beam)
00210 continue;
00211 }
00212
00213 return bs;
00214 }
00215
00216 static void
00217 prune_hmms(phone_loop_search_t *pls, int frame_idx)
00218 {
00219 int32 thresh = pls->best_score + pls->beam;
00220 int nf = frame_idx + 1;
00221 int i;
00222
00223
00224 for (i = 0; i < pls->n_phones; ++i) {
00225 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00226
00227 if (hmm_frame(hmm) < frame_idx)
00228 continue;
00229
00230 if (hmm_bestscore(hmm) BETTER_THAN thresh) {
00231 hmm_frame(hmm) = nf;
00232 }
00233 else
00234 hmm_clear_scores(hmm);
00235 }
00236 }
00237
00238 static void
00239 phone_transition(phone_loop_search_t *pls, int frame_idx)
00240 {
00241 int32 thresh = pls->best_score + pls->pbeam;
00242 int nf = frame_idx + 1;
00243 int i;
00244
00245
00246
00247 for (i = 0; i < pls->n_phones; ++i) {
00248 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00249 int32 newphone_score;
00250 int j;
00251
00252 if (hmm_frame(hmm) != nf)
00253 continue;
00254
00255 newphone_score = hmm_out_score(hmm) + pls->pip;
00256 if (newphone_score BETTER_THAN thresh) {
00257
00258 for (j = 0; j < pls->n_phones; ++j) {
00259 hmm_t *nhmm = (hmm_t *)&pls->phones[j];
00260
00261 if (hmm_frame(nhmm) < frame_idx
00262 || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
00263 hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
00264 }
00265 }
00266 }
00267 }
00268 }
00269
00270 static int
00271 phone_loop_search_step(ps_search_t *search, int frame_idx)
00272 {
00273 phone_loop_search_t *pls = (phone_loop_search_t *)search;
00274 acmod_t *acmod = ps_search_acmod(search);
00275 int16 const *senscr;
00276 int i;
00277
00278
00279 if (!ps_search_acmod(pls)->compallsen)
00280 for (i = 0; i < pls->n_phones; ++i)
00281 acmod_activate_hmm(acmod, (hmm_t *)&pls->phones[i]);
00282
00283
00284 senscr = acmod_score(acmod, &frame_idx);
00285
00286
00287 if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) {
00288 E_INFO("Renormalizing Scores at frame %d, best score %d\n",
00289 frame_idx, pls->best_score);
00290 renormalize_hmms(pls, frame_idx, pls->best_score);
00291 }
00292
00293
00294 pls->best_score = evaluate_hmms(pls, senscr, frame_idx);
00295
00296
00297 prune_hmms(pls, frame_idx);
00298
00299
00300 phone_transition(pls, frame_idx);
00301
00302 return 0;
00303 }
00304
00305 int32
00306 phone_loop_search_score(phone_loop_search_t *pls, int ciphone)
00307 {
00308 hmm_t *hmm;
00309
00310 if (pls == NULL)
00311 return 0;
00312
00313 hmm = (hmm_t *)&pls->phones[ciphone];
00314 return hmm_bestscore(hmm) - pls->best_score;
00315 }
00316
00317 static int
00318 phone_loop_search_finish(ps_search_t *search)
00319 {
00320
00321 return 0;
00322 }
00323
00324 static char const *
00325 phone_loop_search_hyp(ps_search_t *search, int32 *out_score)
00326 {
00327 E_WARN("Hypotheses are not returned from phone loop search");
00328 return NULL;
00329 }
00330
00331 static int32
00332 phone_loop_search_prob(ps_search_t *search)
00333 {
00334
00335 E_WARN("Posterior probabilities are not returned from phone loop search");
00336 return 0;
00337 }
00338
00339 static ps_seg_t *
00340 phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score)
00341 {
00342 E_WARN("Hypotheses are not returned from phone loop search");
00343 return NULL;
00344 }