src/libpocketsphinx/ms_mgau.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * ms_mgau.c -- Essentially a wrapper that wrap up gauden and
00039  * senone. It supports multi-stream. 
00040  *
00041  *
00042  * **********************************************
00043  * CMU ARPA Speech Project
00044  *
00045  * Copyright (c) 1997 Carnegie Mellon University.
00046  * ALL RIGHTS RESERVED.
00047  * **********************************************
00048  * HISTORY
00049  * $Log$
00050  * Revision 1.2  2006/02/22  16:56:01  arthchan2003
00051  * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone
00052  * 
00053  * Revision 1.1.2.4  2005/09/25 18:55:19  arthchan2003
00054  * Added a flag to turn on and off precomputation.
00055  *
00056  * Revision 1.1.2.3  2005/08/03 18:53:44  dhdfu
00057  * Add memory deallocation functions.  Also move all the initialization
00058  * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it
00059  * from decode_anytopo and friends.
00060  *
00061  * Revision 1.1.2.2  2005/08/02 21:05:38  arthchan2003
00062  * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc.
00063  *
00064  * Revision 1.1.2.1  2005/07/20 19:37:09  arthchan2003
00065  * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone.  Add ms_mgau_init and model_set_mllr.  This allow eliminating 600 lines of code in decode_anytopo/align/allphone.
00066  *
00067  *
00068  *
00069  */
00070 
00071 /* Local headers. */
00072 #include "ms_mgau.h"
00073 
00074 static ps_mgaufuncs_t ms_mgau_funcs = {
00075     "ms",
00076     &ms_cont_mgau_frame_eval, /* frame_eval */
00077     &ms_mgau_mllr_transform,  /* transform */
00078     &ms_mgau_free             /* free */
00079 };
00080 
00081 ps_mgau_t *
00082 ms_mgau_init(cmd_ln_t *config, logmath_t *lmath)
00083 {
00084     /* Codebooks */
00085     int32 i;
00086     ms_mgau_model_t *msg;
00087     ps_mgau_t *mg;
00088     gauden_t *g;
00089     senone_t *s;
00090     mgau2sen_t *m2s;
00091 
00092     msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t));
00093     
00094     msg->g = NULL;
00095     msg->s = NULL;
00096 
00097     msg->g = gauden_init(cmd_ln_str_r(config, "-mean"),
00098                          cmd_ln_str_r(config, "-var"),
00099                          cmd_ln_float32_r(config, "-varfloor"),
00100                          lmath);
00101     msg->s = senone_init(msg->g,
00102                          cmd_ln_str_r(config, "-mixw"), NULL,
00103                          cmd_ln_float32_r(config, "-mixwfloor"), lmath);
00104 
00105     g = ms_mgau_gauden(msg);
00106     s = ms_mgau_senone(msg);
00107 
00108     /* Verify senone parameters against gauden parameters */
00109     if (s->n_feat != g->n_feat)
00110         E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat,
00111                 s->n_feat);
00112     if (s->n_cw != g->n_density)
00113         E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n",
00114                 g->n_density, s->n_cw);
00115     if (s->n_gauden > g->n_mgau)
00116         E_FATAL("Senones need more codebooks (%d) than present (%d)\n",
00117                 s->n_gauden, g->n_mgau);
00118     if (s->n_gauden < g->n_mgau)
00119         E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n",
00120                 s->n_gauden, g->n_mgau);
00121     /* Initialize mapping from mixture Gaussian to senones */
00122     msg->mgau2sen =
00123         (mgau2sen_t **) ckd_calloc(g->n_mgau, sizeof(mgau2sen_t *));
00124     for (i = 0; i < s->n_sen; i++) {
00125         m2s = (mgau2sen_t *) ckd_calloc(1, sizeof(mgau2sen_t));
00126         m2s->sen = i;
00127         m2s->next = msg->mgau2sen[s->mgau[i]];
00128         msg->mgau2sen[s->mgau[i]] = m2s;
00129     }
00130 
00131     msg->topn = cmd_ln_int32_r(config, "-topn");
00132     E_INFO("The value of topn: %d\n", msg->topn);
00133     if (msg->topn == 0 || msg->topn > msg->g->n_density) {
00134         E_WARN
00135             ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n",
00136              msg->topn, msg->g->n_density);
00137         msg->topn = msg->g->n_density;
00138     }
00139 
00140     msg->dist = (gauden_dist_t ***)
00141         ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn,
00142                       sizeof(gauden_dist_t));
00143     msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8));
00144 
00145     mg = (ps_mgau_t *)msg;
00146     mg->vt = &ms_mgau_funcs;
00147     return mg;
00148 }
00149 
00150 void
00151 ms_mgau_free(ps_mgau_t * mg)
00152 {
00153     ms_mgau_model_t *msg = (ms_mgau_model_t *)mg;
00154     if (msg == NULL)
00155         return;
00156 
00157     gauden_free(msg->g);
00158     senone_free(msg->s);
00159     ckd_free_3d((void *) msg->dist);
00160     ckd_free(msg->mgau_active);
00161     ckd_free(msg);
00162 }
00163 
00164 int
00165 ms_mgau_mllr_transform(ps_mgau_t *s,
00166                        ps_mllr_t *mllr)
00167 {
00168     return -1;
00169 }
00170 
00171 int32
00172 ms_cont_mgau_frame_eval(ps_mgau_t * mg,
00173                         int16 *senscr,
00174                         uint8 *senone_active,
00175                         int32 n_senone_active,
00176                         mfcc_t ** feat,
00177                         int32 frame,
00178                         int32 compallsen)
00179 {
00180     ms_mgau_model_t *msg = (ms_mgau_model_t *)mg;
00181     int32 gid;
00182     int32 topn;
00183     int32 best;
00184     gauden_t *g;
00185     senone_t *sen;
00186 
00187     topn = ms_mgau_topn(msg);
00188     g = ms_mgau_gauden(msg);
00189     sen = ms_mgau_senone(msg);
00190 
00191     if (compallsen) {
00192         int32 s;
00193 
00194         for (gid = 0; gid < g->n_mgau; gid++)
00195             gauden_dist(g, gid, topn, feat, msg->dist[gid]);
00196 
00197         best = (int32) 0x7fffffff;
00198         for (s = 0; s < sen->n_sen; s++) {
00199             senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
00200             if (best > senscr[s]) {
00201                 best = senscr[s];
00202             }
00203         }
00204 
00205         /* Normalize senone scores */
00206         for (s = 0; s < sen->n_sen; s++) {
00207             int32 bs = senscr[s] - best;
00208             if (bs > 32767)
00209                 bs = 32767;
00210             if (bs < -32768)
00211                 bs = -32768;
00212             senscr[s] = bs;
00213         }
00214     }
00215     else {
00216         int32 i, n;
00217         /* Flag all active mixture-gaussian codebooks */
00218         for (gid = 0; gid < g->n_mgau; gid++)
00219             msg->mgau_active[gid] = 0;
00220 
00221         n = 0;
00222         for (i = 0; i < n_senone_active; i++) {
00223             /* senone_active consists of deltas. */
00224             int32 s = senone_active[i] + n;
00225             msg->mgau_active[sen->mgau[s]] = 1;
00226             n = s;
00227         }
00228 
00229         /* Compute topn gaussian density values (for active codebooks) */
00230         for (gid = 0; gid < g->n_mgau; gid++) {
00231             if (msg->mgau_active[gid])
00232                 gauden_dist(g, gid, topn, feat, msg->dist[gid]);
00233         }
00234 
00235         best = (int32) 0x7fffffff;
00236         n = 0;
00237         for (i = 0; i < n_senone_active; i++) {
00238             int32 s = senone_active[i] + n;
00239             senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
00240             if (best > senscr[s]) {
00241                 best = senscr[s];
00242             }
00243             n = s;
00244         }
00245 
00246         /* Normalize senone scores */
00247         n = 0;
00248         for (i = 0; i < n_senone_active; i++) {
00249             int32 s = senone_active[i] + n;
00250             int32 bs = senscr[s] - best;
00251             if (bs > 32767)
00252                 bs = 32767;
00253             if (bs < -32768)
00254                 bs = -32768;
00255             senscr[s] = bs;
00256             n = s;
00257         }
00258     }
00259 
00260     return 0;
00261 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7