src/libpocketsphinx/acmod.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 
00044 /* System headers. */
00045 #include <assert.h>
00046 
00047 /* SphinxBase headers. */
00048 #include <prim_type.h>
00049 #include <err.h>
00050 #include <cmd_ln.h>
00051 #include <strfuncs.h>
00052 #include <string.h>
00053 #include <byteorder.h>
00054 #include <feat.h>
00055 
00056 /* Local headers. */
00057 #include "cmdln_macro.h"
00058 #include "acmod.h"
00059 #include "s2_semi_mgau.h"
00060 #include "ms_mgau.h"
00061 
00062 /* Feature and front-end parameters that may be in feat.params */
00063 static const arg_t feat_defn[] = {
00064     waveform_to_cepstral_command_line_macro(),
00065     cepstral_to_feature_command_line_macro(),
00066     CMDLN_EMPTY_OPTION
00067 };
00068 
00069 #ifndef WORDS_BIGENDIAN
00070 #define WORDS_BIGENDIAN 1
00071 #endif
00072 
00073 static int32 acmod_flags2list(acmod_t *acmod);
00074 static int32 acmod_process_mfcbuf(acmod_t *acmod);
00075 
00076 static int
00077 acmod_init_am(acmod_t *acmod)
00078 {
00079     char const *mdeffn, *tmatfn, *mllrfn;
00080 
00081     /* Read model definition. */
00082     if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
00083         E_ERROR("Must specify -mdef or -hmm\n");
00084         return -1;
00085     }
00086 
00087     if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
00088         E_ERROR("Failed to read model definition from %s\n", mdeffn);
00089         return -1;
00090     }
00091 
00092     /* Read transition matrices. */
00093     if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
00094         E_ERROR("No tmat file specified\n");
00095         return -1;
00096     }
00097     acmod->tmat = tmat_init(tmatfn, acmod->lmath,
00098                             cmd_ln_float32_r(acmod->config, "-tmatfloor"),
00099                             TRUE);
00100 
00101     /* Read the acoustic models. */
00102     if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
00103         || (cmd_ln_str_r(acmod->config, "-var") == NULL)
00104         || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
00105         E_ERROR("No mean/var/tmat files specified\n");
00106         return -1;
00107     }
00108 
00109     E_INFO("Attempting to use SCHMM computation module\n");
00110     acmod->mgau = s2_semi_mgau_init(acmod);
00111     if (acmod->mgau) {
00112         char const *kdtreefn = cmd_ln_str_r(acmod->config, "-kdtree");
00113         if (kdtreefn)
00114             s2_semi_mgau_load_kdtree(acmod->mgau, kdtreefn,
00115                                      cmd_ln_int32_r(acmod->config, "-kdmaxdepth"),
00116                                      cmd_ln_int32_r(acmod->config, "-kdmaxbbi"));
00117     }
00118     else {
00119         E_INFO("Falling back to general multi-stream GMM computation\n");
00120         acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath);
00121     }
00122 
00123     /* If there is an MLLR transform, apply it. */
00124     if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
00125         ps_mllr_t *mllr = ps_mllr_read(mllrfn);
00126         if (mllr == NULL)
00127             return -1;
00128         acmod_update_mllr(acmod, mllr);
00129     }
00130 
00131     return 0;
00132 }
00133 
00134 static int
00135 acmod_init_feat(acmod_t *acmod)
00136 {
00137     acmod->fcb = 
00138         feat_init(cmd_ln_str_r(acmod->config, "-feat"),
00139                   cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
00140                   cmd_ln_boolean_r(acmod->config, "-varnorm"),
00141                   agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
00142                   1, cmd_ln_int32_r(acmod->config, "-ceplen"));
00143     if (acmod->fcb == NULL)
00144         return -1;
00145 
00146     if (cmd_ln_str_r(acmod->config, "-lda")) {
00147         E_INFO("Reading linear feature transformation from %s\n",
00148                cmd_ln_str_r(acmod->config, "-lda"));
00149         if (feat_read_lda(acmod->fcb,
00150                           cmd_ln_str_r(acmod->config, "-lda"),
00151                           cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
00152             return -1;
00153     }
00154 
00155     if (cmd_ln_str_r(acmod->config, "-svspec")) {
00156         int32 **subvecs;
00157         E_INFO("Using subvector specification %s\n", 
00158                cmd_ln_str_r(acmod->config, "-svspec"));
00159         if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
00160             return -1;
00161         if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
00162             return -1;
00163     }
00164 
00165     if (cmd_ln_exists_r(acmod->config, "-agcthresh")
00166         && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
00167         agc_set_threshold(acmod->fcb->agc_struct,
00168                           cmd_ln_float32_r(acmod->config, "-agcthresh"));
00169     }
00170 
00171     if (acmod->fcb->cmn_struct
00172         && cmd_ln_exists_r(acmod->config, "-cmninit")) {
00173         char *c, *cc, *vallist;
00174         int32 nvals;
00175 
00176         vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
00177         c = vallist;
00178         nvals = 0;
00179         while (nvals < acmod->fcb->cmn_struct->veclen
00180                && (cc = strchr(c, ',')) != NULL) {
00181             *cc = '\0';
00182             acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
00183             c = cc + 1;
00184             ++nvals;
00185         }
00186         if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
00187             acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
00188         }
00189         ckd_free(vallist);
00190     }
00191     return 0;
00192 }
00193 
00194 int
00195 acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
00196 {
00197     /* Output vector dimension needs to be the same. */
00198     if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe))
00199         return TRUE;
00200     /* Feature parameters need to be the same. */
00201     /* ... */
00202     return FALSE;
00203 }
00204 
00205 int
00206 acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
00207 {
00208     /* Feature type needs to be the same. */
00209     if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
00210         return TRUE;
00211     /* Input vector dimension needs to be the same. */
00212     if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
00213         return TRUE;
00214     /* FIXME: Need to check LDA and stuff too. */
00215     return FALSE;
00216 }
00217 
00218 acmod_t *
00219 acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
00220 {
00221     acmod_t *acmod;
00222     char const *featparams;
00223 
00224     acmod = ckd_calloc(1, sizeof(*acmod));
00225     acmod->config = config;
00226     acmod->lmath = lmath;
00227     acmod->state = ACMOD_IDLE;
00228 
00229     /* Look for feat.params in acoustic model dir. */
00230     if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
00231         if (cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE) != NULL) {
00232             E_INFO("Parsed model-specific feature parameters from %s\n", featparams);
00233         }
00234     }
00235 
00236     /* Initialize feature computation. */
00237     if (fe) {
00238         if (acmod_fe_mismatch(acmod, fe))
00239             goto error_out;
00240         fe_retain(fe);
00241         acmod->fe = fe;
00242     }
00243     else {
00244         /* Initialize a new front end. */
00245         cmd_ln_retain(config);
00246         acmod->fe = fe_init_auto_r(config);
00247         if (acmod->fe == NULL)
00248             goto error_out;
00249     }
00250     if (fcb) {
00251         if (acmod_feat_mismatch(acmod, fcb))
00252             goto error_out;
00253         feat_retain(fcb);
00254         acmod->fcb = fcb;
00255     }
00256     else {
00257         /* Initialize a new fcb. */
00258         if (acmod_init_feat(acmod) < 0)
00259             goto error_out;
00260     }
00261 
00262     /* Load acoustic model parameters. */
00263     if (acmod_init_am(acmod) < 0)
00264         goto error_out;
00265 
00266 
00267     /* The MFCC buffer needs to be at least as large as the dynamic
00268      * feature window.  */
00269     acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
00270     acmod->mfc_buf = (mfcc_t **)
00271         ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
00272                       sizeof(**acmod->mfc_buf));
00273 
00274     /* Feature buffer has to be at least as large as MFCC buffer. */
00275     acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
00276     acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
00277 
00278     /* Senone computation stuff. */
00279     acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
00280                                                      sizeof(*acmod->senone_scores));
00281     acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
00282     acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
00283                                                      sizeof(*acmod->senone_active));
00284     acmod->log_zero = logmath_get_zero(acmod->lmath);
00285     acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
00286     return acmod;
00287 
00288 error_out:
00289     acmod_free(acmod);
00290     return NULL;
00291 }
00292 
00293 void
00294 acmod_free(acmod_t *acmod)
00295 {
00296     if (acmod == NULL)
00297         return;
00298 
00299     feat_free(acmod->fcb);
00300     fe_free(acmod->fe);
00301 
00302     if (acmod->mfc_buf)
00303         ckd_free_2d((void **)acmod->mfc_buf);
00304     if (acmod->feat_buf)
00305         feat_array_free(acmod->feat_buf);
00306 
00307     if (acmod->mfcfh)
00308         fclose(acmod->mfcfh);
00309     if (acmod->rawfh)
00310         fclose(acmod->rawfh);
00311 
00312     ckd_free(acmod->senone_scores);
00313     ckd_free(acmod->senone_active_vec);
00314     ckd_free(acmod->senone_active);
00315 
00316     if (acmod->mdef)
00317         bin_mdef_free(acmod->mdef);
00318     if (acmod->tmat)
00319         tmat_free(acmod->tmat);
00320     if (acmod->mgau)
00321         ps_mgau_free(acmod->mgau);
00322     if (acmod->mllr)
00323         ps_mllr_free(acmod->mllr);
00324     
00325     ckd_free(acmod);
00326 }
00327 
00328 ps_mllr_t *
00329 acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
00330 {
00331     if (acmod->mllr)
00332         ps_mllr_free(acmod->mllr);
00333     acmod->mllr = mllr;
00334     ps_mgau_transform(acmod->mgau, mllr);
00335 
00336     return mllr;
00337 }
00338 
00339 int
00340 acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
00341 {
00342     int rv = 0;
00343 
00344     if (acmod->mfcfh)
00345         fclose(acmod->mfcfh);
00346     acmod->mfcfh = logfh;
00347     fwrite(&rv, 4, 1, acmod->mfcfh);
00348     return rv;
00349 }
00350 
00351 int
00352 acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
00353 {
00354     if (acmod->rawfh)
00355         fclose(acmod->rawfh);
00356     acmod->rawfh = logfh;
00357     return 0;
00358 }
00359 
00360 void
00361 acmod_grow_feat_buf(acmod_t *acmod, int nfr)
00362 {
00363     mfcc_t ***new_feat_buf;
00364 
00365     new_feat_buf = feat_array_alloc(acmod->fcb, nfr);
00366     if (acmod->n_feat_frame || acmod->grow_feat) {
00367         memcpy(new_feat_buf[0][0], acmod->feat_buf[0][0],
00368                (acmod->n_feat_alloc
00369                 * feat_dimension(acmod->fcb)
00370                 * sizeof(***acmod->feat_buf)));
00371     }
00372     feat_array_free(acmod->feat_buf);
00373     acmod->feat_buf = new_feat_buf;
00374     acmod->n_feat_alloc = nfr;
00375 }
00376 
00377 int
00378 acmod_set_grow(acmod_t *acmod, int grow_feat)
00379 {
00380     int tmp = acmod->grow_feat;
00381     acmod->grow_feat = grow_feat;
00382 
00383     /* Expand feat_buf to a reasonable size to start with. */
00384     if (grow_feat && acmod->n_feat_alloc < 128)
00385         acmod_grow_feat_buf(acmod, 128);
00386 
00387     return tmp;
00388 }
00389 
00390 int
00391 acmod_start_utt(acmod_t *acmod)
00392 {
00393     fe_start_utt(acmod->fe);
00394     acmod->state = ACMOD_STARTED;
00395     acmod->n_mfc_frame = 0;
00396     acmod->n_feat_frame = 0;
00397     acmod->mfc_outidx = 0;
00398     acmod->feat_outidx = 0;
00399     acmod->output_frame = 0;
00400     acmod->senscr_frame = -1;
00401     acmod->n_senone_active = 0;
00402     acmod->mgau->frame_idx = 0;
00403     return 0;
00404 }
00405 
00406 int
00407 acmod_end_utt(acmod_t *acmod)
00408 {
00409     int32 nfr = 0;
00410 
00411     acmod->state = ACMOD_ENDED;
00412     if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
00413         int inptr;
00414         /* Where to start writing them (circular buffer) */
00415         inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
00416         /* nfr is always either zero or one. */
00417         fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
00418         acmod->n_mfc_frame += nfr;
00419         /* Process whatever's left, and any leadout. */
00420         if (nfr)
00421             nfr = acmod_process_mfcbuf(acmod);
00422     }
00423     if (acmod->mfcfh) {
00424         int32 outlen, rv;
00425         outlen = (ftell(acmod->mfcfh) - 4) / 4;
00426         if (!WORDS_BIGENDIAN)
00427             SWAP_INT32(&outlen);
00428         /* Try to seek and write */
00429         if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) {
00430             fwrite(&outlen, 4, 1, acmod->mfcfh);
00431         }
00432         fclose(acmod->mfcfh);
00433         acmod->mfcfh = NULL;
00434     }
00435     if (acmod->rawfh) {
00436         fclose(acmod->rawfh);
00437         acmod->rawfh = NULL;
00438     }
00439 
00440     return nfr;
00441 }
00442 
00443 static int
00444 acmod_log_mfc(acmod_t *acmod,
00445               mfcc_t **cep, int n_frames)
00446 {
00447     int i, n;
00448     int32 *ptr = (int32 *)cep[0];
00449 
00450     n = n_frames * feat_cepsize(acmod->fcb);
00451     /* Swap bytes. */
00452     if (!WORDS_BIGENDIAN) {
00453         for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
00454             SWAP_INT32(ptr + i);
00455         }
00456     }
00457     /* Write features. */
00458     if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
00459         E_ERROR_SYSTEM("Failed to write %d values to log file", n);
00460     }
00461 
00462     /* Swap them back. */
00463     if (!WORDS_BIGENDIAN) {
00464         for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
00465             SWAP_INT32(ptr + i);
00466         }
00467     }
00468     return 0;
00469 }
00470 
00471 static int
00472 acmod_process_full_cep(acmod_t *acmod,
00473                        mfcc_t ***inout_cep,
00474                        int *inout_n_frames)
00475 {
00476     int32 nfr;
00477 
00478     /* Write to log file. */
00479     if (acmod->mfcfh)
00480         acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
00481 
00482     /* Resize feat_buf to fit. */
00483     if (acmod->n_feat_alloc < *inout_n_frames) {
00484         feat_array_free(acmod->feat_buf);
00485         acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
00486         acmod->n_feat_alloc = *inout_n_frames;
00487         acmod->n_feat_frame = 0;
00488         acmod->feat_outidx = 0;
00489     }
00490     /* Make dynamic features. */
00491     nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
00492                                TRUE, TRUE, acmod->feat_buf);
00493     acmod->n_feat_frame = nfr;
00494     assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
00495     *inout_cep += *inout_n_frames;
00496     *inout_n_frames = 0;
00497     return nfr;
00498 }
00499 
00500 static int
00501 acmod_process_full_raw(acmod_t *acmod,
00502                        int16 const **inout_raw,
00503                        size_t *inout_n_samps)
00504 {
00505     int32 nfr, ntail;
00506     mfcc_t **cepptr;
00507 
00508     /* Write to logging file if any. */
00509     if (acmod->rawfh)
00510         fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh);
00511     /* Resize mfc_buf to fit. */
00512     if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr) < 0)
00513         return -1;
00514     if (acmod->n_mfc_alloc < nfr + 1) {
00515         ckd_free_2d(acmod->mfc_buf);
00516         acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
00517                                        sizeof(**acmod->mfc_buf));
00518         acmod->n_mfc_alloc = nfr + 1;
00519     }
00520     acmod->n_mfc_frame = 0;
00521     acmod->mfc_outidx = 0;
00522     fe_start_utt(acmod->fe);
00523     if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00524                           acmod->mfc_buf, &nfr) < 0)
00525         return -1;
00526     fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
00527     nfr += ntail;
00528 
00529     cepptr = acmod->mfc_buf;
00530     nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
00531     acmod->n_mfc_frame = 0;
00532     return nfr;
00533 }
00534 
00538 static int32
00539 acmod_process_mfcbuf(acmod_t *acmod)
00540 {
00541     mfcc_t **mfcptr;
00542     int32 ncep;
00543 
00544     ncep = acmod->n_mfc_frame;
00545     /* Also do this in two parts because of the circular mfc_buf. */
00546     if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) {
00547         int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx;
00548         int saved_state = acmod->state;
00549 
00550         /* Make sure we don't end the utterance here. */
00551         if (acmod->state == ACMOD_ENDED)
00552             acmod->state = ACMOD_PROCESSING;
00553         mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
00554         ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE);
00555         /* It's possible that not all available frames were filled. */
00556         ncep -= ncep1;
00557         acmod->n_mfc_frame -= ncep1;
00558         acmod->mfc_outidx += ncep1;
00559         acmod->mfc_outidx %= acmod->n_mfc_alloc;
00560         /* Restore original state (could this really be the end) */
00561         acmod->state = saved_state;
00562     }
00563     mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
00564     ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE);
00565     acmod->n_mfc_frame -= ncep;
00566     acmod->mfc_outidx += ncep;
00567     acmod->mfc_outidx %= acmod->n_mfc_alloc;
00568     return ncep;
00569 }
00570 
00571 int
00572 acmod_process_raw(acmod_t *acmod,
00573                   int16 const **inout_raw,
00574                   size_t *inout_n_samps,
00575                   int full_utt)
00576 {
00577     int32 ncep;
00578 
00579     /* If this is a full utterance, process it all at once. */
00580     if (full_utt)
00581         return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
00582 
00583     /* Write to logging file if any. */
00584     if (acmod->rawfh)
00585         fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh);
00586     /* Append MFCCs to the end of any that are previously in there
00587      * (in practice, there will probably be none) */
00588     if (inout_n_samps && *inout_n_samps) {
00589         int inptr;
00590 
00591         /* Total number of frames available. */
00592         ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame;
00593         /* Where to start writing them (circular buffer) */
00594         inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
00595 
00596         /* Write them in two (or more) parts if there is wraparound. */
00597         while (inptr + ncep > acmod->n_mfc_alloc) {
00598             int32 ncep1 = acmod->n_mfc_alloc - inptr;
00599             if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00600                                   acmod->mfc_buf + inptr, &ncep1) < 0)
00601                 return -1;
00602             /* ncep1 now contains the number of frames actually
00603              * processed.  This is a good thing, but it means we
00604              * actually still might have some room left at the end of
00605              * the buffer, hence the while loop.  Unfortunately it
00606              * also means that in the case where we are really
00607              * actually done, we need to get out totally, hence the
00608              * goto. */
00609             acmod->n_mfc_frame += ncep1;
00610             ncep -= ncep1;
00611             inptr += ncep1;
00612             inptr %= acmod->n_mfc_alloc;
00613             if (ncep1 == 0)
00614                 goto alldone;
00615         }
00616         assert(inptr + ncep <= acmod->n_mfc_alloc);
00617         if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00618                               acmod->mfc_buf + inptr, &ncep) < 0)
00619             return -1;
00620         acmod->n_mfc_frame += ncep;
00621     alldone:
00622         ;
00623     }
00624 
00625     /* Hand things off to acmod_process_cep. */
00626     return acmod_process_mfcbuf(acmod);
00627 }
00628 
00629 int
00630 acmod_process_cep(acmod_t *acmod,
00631                   mfcc_t ***inout_cep,
00632                   int *inout_n_frames,
00633                   int full_utt)
00634 {
00635     int32 nfeat, ncep, inptr;
00636     int orig_n_frames;
00637 
00638     /* If this is a full utterance, process it all at once. */
00639     if (full_utt)
00640         return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
00641 
00642     /* Write to log file. */
00643     if (acmod->mfcfh)
00644         acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
00645 
00646     /* Maximum number of frames we're going to generate. */
00647     orig_n_frames = ncep = nfeat = *inout_n_frames;
00648 
00649     /* FIXME: This behaviour isn't guaranteed... */
00650     if (acmod->state == ACMOD_ENDED)
00651         nfeat += feat_window_size(acmod->fcb);
00652     else if (acmod->state == ACMOD_STARTED)
00653         nfeat -= feat_window_size(acmod->fcb);
00654 
00655     /* Clamp number of features to fit available space. */
00656     if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
00657         /* Grow it as needed - we have to grow it at the end of an
00658          * utterance because we can't return a short read there. */
00659         if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
00660             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
00661         else
00662             ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
00663     }
00664 
00665     /* Where to start writing in the feature buffer. */
00666     if (acmod->grow_feat) {
00667         /* Grow to avoid wraparound if grow_feat == TRUE. */
00668         inptr = acmod->feat_outidx + acmod->n_feat_frame;
00669         while (inptr + nfeat > acmod->n_feat_alloc)
00670             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
00671     }
00672     else {
00673         inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
00674     }
00675 
00676     /* Write them in two parts if there is wraparound. */
00677     if (inptr + nfeat > acmod->n_feat_alloc) {
00678         int32 ncep1 = acmod->n_feat_alloc - inptr;
00679         int saved_state = acmod->state;
00680 
00681         /* Make sure we don't end the utterance here. */
00682         if (acmod->state == ACMOD_ENDED)
00683             acmod->state = ACMOD_PROCESSING;
00684         nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
00685                                      &ncep1,
00686                                      (acmod->state == ACMOD_STARTED),
00687                                      (acmod->state == ACMOD_ENDED),
00688                                      acmod->feat_buf + inptr);
00689         if (nfeat < 0)
00690             return -1;
00691         /* Move the output feature pointer forward. */
00692         acmod->n_feat_frame += nfeat;
00693         assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
00694         inptr += nfeat;
00695         inptr %= acmod->n_feat_alloc;
00696         /* Move the input feature pointers forward. */
00697         *inout_n_frames -= ncep1;
00698         *inout_cep += ncep1;
00699         ncep -= ncep1;
00700         /* Restore original state (could this really be the end) */
00701         acmod->state = saved_state;
00702     }
00703 
00704     nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
00705                                  &ncep,
00706                                  (acmod->state == ACMOD_STARTED),
00707                                  (acmod->state == ACMOD_ENDED),
00708                                  acmod->feat_buf + inptr);
00709     if (nfeat < 0)
00710         return -1;
00711     acmod->n_feat_frame += nfeat;
00712     assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
00713     /* Move the input feature pointers forward. */
00714     *inout_n_frames -= ncep;
00715     *inout_cep += ncep;
00716     if (acmod->state == ACMOD_STARTED)
00717         acmod->state = ACMOD_PROCESSING;
00718     return orig_n_frames - *inout_n_frames;
00719 }
00720 
00721 int
00722 acmod_process_feat(acmod_t *acmod,
00723                    mfcc_t **feat)
00724 {
00725     int i, inptr;
00726 
00727     if (acmod->n_feat_frame == acmod->n_feat_alloc) {
00728         if (acmod->grow_feat)
00729             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
00730         else
00731             return 0;
00732     }
00733 
00734     inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
00735     for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
00736         memcpy(acmod->feat_buf[inptr][i],
00737                feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
00738     ++acmod->n_feat_frame;
00739     assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
00740 
00741     return 1;
00742 }
00743 
00744 int
00745 acmod_rewind(acmod_t *acmod)
00746 {
00747     /* If the feature buffer is circular, this is not possible. */
00748     if (acmod->output_frame > acmod->n_feat_alloc)
00749         return -1;
00750 
00751     /* Frames consumed + frames available */
00752     acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame;
00753 
00754     /* Reset output pointers. */
00755     acmod->feat_outidx = 0;
00756     acmod->output_frame = 0;
00757     acmod->senscr_frame = -1;
00758     acmod->mgau->frame_idx = 0;
00759 
00760     return 0;
00761 }
00762 
00763 int
00764 acmod_advance(acmod_t *acmod)
00765 {
00766     /* Advance the output pointers. */
00767     if (++acmod->feat_outidx == acmod->n_feat_alloc)
00768         acmod->feat_outidx = 0;
00769     --acmod->n_feat_frame;
00770     ++acmod->mgau->frame_idx;
00771 
00772     return ++acmod->output_frame;
00773 }
00774 
00775 int16 const *
00776 acmod_score(acmod_t *acmod,
00777             int *inout_frame_idx)
00778 {
00779     int frame_idx, feat_idx, n_backfr;
00780 
00781     /* Calculate the absolute frame index to be scored. */
00782     if (inout_frame_idx == NULL)
00783         frame_idx = acmod->output_frame;
00784     else if (*inout_frame_idx < 0)
00785         frame_idx = acmod->output_frame + 1 + *inout_frame_idx;
00786     else
00787         frame_idx = *inout_frame_idx;
00788 
00789     /* Check to make sure features are available for the requested frame index. */
00790     n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame;
00791     if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
00792         E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), cannot score\n",
00793                 frame_idx, acmod->n_feat_frame, acmod->n_feat_alloc,
00794                 acmod->output_frame - frame_idx, n_backfr);
00795         return NULL;
00796     }
00797 
00798     /* If all senones are being computed then we can reuse existing scores. */
00799     if (acmod->compallsen && frame_idx == acmod->senscr_frame)
00800         return acmod->senone_scores;
00801 
00802     /* Build active senone list. */
00803     acmod_flags2list(acmod);
00804 
00805     /* Get the index in feat_buf of the frame to be scored. */
00806     feat_idx = ((acmod->feat_outidx + frame_idx - acmod->output_frame)
00807                 % acmod->n_feat_alloc);
00808     if (feat_idx < 0) feat_idx += acmod->n_feat_alloc;
00809 
00810     /* Generate scores for the next available frame */
00811     ps_mgau_frame_eval(acmod->mgau,
00812                        acmod->senone_scores,
00813                        acmod->senone_active,
00814                        acmod->n_senone_active,
00815                        acmod->feat_buf[feat_idx],
00816                        frame_idx,
00817                        acmod->compallsen);
00818 
00819     if (inout_frame_idx)
00820         *inout_frame_idx = frame_idx;
00821     acmod->senscr_frame = frame_idx;
00822 
00823     return acmod->senone_scores;
00824 }
00825 
00826 int
00827 acmod_best_score(acmod_t *acmod, int *out_best_senid)
00828 {
00829     int i, best;
00830 
00831     best = WORST_SCORE;
00832     if (acmod->compallsen) {
00833         for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) {
00834             if (acmod->senone_scores[i] BETTER_THAN best) {
00835                 best = acmod->senone_scores[i];
00836                 *out_best_senid = i;
00837             }
00838         }
00839     }
00840     else {
00841         int16 *senscr;
00842         senscr = acmod->senone_scores;
00843         for (i = 0; i < acmod->n_senone_active; ++i) {
00844             senscr += acmod->senone_active[i];
00845             if (*senscr BETTER_THAN best) {
00846                 best = *senscr;
00847                 *out_best_senid = i;
00848             }
00849         }
00850     }
00851     return best;
00852 }
00853 
00854 
00855 void
00856 acmod_clear_active(acmod_t *acmod)
00857 {
00858     bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef));
00859     acmod->n_senone_active = 0;
00860 }
00861 
00862 #define MPX_BITVEC_SET(a,h,i)                                   \
00863     if (hmm_mpx_ssid(h,i) != BAD_SSID)                          \
00864         bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
00865 #define NONMPX_BITVEC_SET(a,h,i)                                        \
00866     bitvec_set((a)->senone_active_vec,                                  \
00867                hmm_nonmpx_senid(h,i))
00868 
00869 void
00870 acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
00871 {
00872     int i;
00873 
00874     if (hmm_is_mpx(hmm)) {
00875         switch (hmm_n_emit_state(hmm)) {
00876         case 5:
00877             MPX_BITVEC_SET(acmod, hmm, 4);
00878             MPX_BITVEC_SET(acmod, hmm, 3);
00879         case 3:
00880             MPX_BITVEC_SET(acmod, hmm, 2);
00881             MPX_BITVEC_SET(acmod, hmm, 1);
00882             MPX_BITVEC_SET(acmod, hmm, 0);
00883             break;
00884         default:
00885             for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
00886                 MPX_BITVEC_SET(acmod, hmm, i);
00887             }
00888         }
00889     }
00890     else {
00891         switch (hmm_n_emit_state(hmm)) {
00892         case 5:
00893             NONMPX_BITVEC_SET(acmod, hmm, 4);
00894             NONMPX_BITVEC_SET(acmod, hmm, 3);
00895         case 3:
00896             NONMPX_BITVEC_SET(acmod, hmm, 2);
00897             NONMPX_BITVEC_SET(acmod, hmm, 1);
00898             NONMPX_BITVEC_SET(acmod, hmm, 0);
00899             break;
00900         default:
00901             for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
00902                 NONMPX_BITVEC_SET(acmod, hmm, i);
00903             }
00904         }
00905     }
00906 }
00907 
00908 static int32
00909 acmod_flags2list(acmod_t *acmod)
00910 {
00911     int32 w, l, n, b, total_dists, total_words, extra_bits;
00912     bitvec_t *flagptr;
00913 
00914     total_dists = bin_mdef_n_sen(acmod->mdef);
00915     if (acmod->compallsen) {
00916         acmod->n_senone_active = total_dists;
00917         return total_dists;
00918     }
00919     total_words = total_dists / BITVEC_BITS;
00920     extra_bits = total_dists % BITVEC_BITS;
00921     w = n = l = 0;
00922     for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) {
00923         if (*flagptr == 0)
00924             continue;
00925         for (b = 0; b < BITVEC_BITS; ++b) {
00926             if (*flagptr & (1UL << b)) {
00927                 int32 sen = w * BITVEC_BITS + b;
00928                 int32 delta = sen - l;
00929                 /* Handle excessive deltas "lossily" by adding a few
00930                    extra senones to bridge the gap. */
00931                 while (delta > 255) {
00932                     acmod->senone_active[n++] = 255;
00933                     delta -= 255;
00934                 }
00935                 acmod->senone_active[n++] = delta;
00936                 l = sen;
00937             }
00938         }
00939     }
00940 
00941     for (b = 0; b < extra_bits; ++b) {
00942         if (*flagptr & (1UL << b)) {
00943             int32 sen = w * BITVEC_BITS + b;
00944             int32 delta = sen - l;
00945             /* Handle excessive deltas "lossily" by adding a few
00946                extra senones to bridge the gap. */
00947             while (delta > 255) {
00948                 acmod->senone_active[n++] = 255;
00949                 delta -= 255;
00950             }
00951             acmod->senone_active[n++] = delta;
00952             l = sen;
00953         }
00954     }
00955 
00956     acmod->n_senone_active = n;
00957     E_DEBUG(1, ("acmod_flags2list: %d active in frame %d\n",
00958                 acmod->n_senone_active, acmod->output_frame));
00959     return n;
00960 }

Generated on Mon Jan 24 21:50:15 2011 for PocketSphinx by  doxygen 1.4.7