src/libpocketsphinx/ms_senone.c

00001 /* ====================================================================
00002  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00003  * reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice, this list of conditions and the following disclaimer. 
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  *
00017  * This work was supported in part by funding from the Defense Advanced 
00018  * Research Projects Agency and the National Science Foundation of the 
00019  * United States of America, and the CMU Sphinx Speech Consortium.
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00022  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00023  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00024  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00025  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00027  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00028  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00029  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00030  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00031  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * ====================================================================
00034  *
00035  */
00036 
00037 /* System headers. */
00038 #include <string.h>
00039 #include <stdio.h>
00040 #include <assert.h>
00041 
00042 /* SphinxBase headers. */
00043 #include <bio.h>
00044 
00045 /* Local headers. */
00046 #include "ms_senone.h"
00047 
00048 
00049 #define MIXW_PARAM_VERSION      "1.0"
00050 #define SPDEF_PARAM_VERSION     "1.2"
00051 
00052 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ == 199901L)
00053 #define LOGMATH_INLINE inline
00054 #elif defined(__GNUC__)
00055 #define LOGMATH_INLINE static inline
00056 #elif defined(_MSC_VER)
00057 #define LOGMATH_INLINE __inline
00058 #else
00059 #define LOGMATH_INLINE static
00060 #endif
00061 
00062 static int32
00063 senone_mgau_map_read(senone_t * s, char const *file_name)
00064 {
00065     FILE *fp;
00066     int32 byteswap, chksum_present, n_gauden_present;
00067     uint32 chksum;
00068     int32 i;
00069     char eofchk;
00070     char **argname, **argval;
00071     void *ptr;
00072     float32 v;
00073 
00074     E_INFO("Reading senone gauden-codebook map file: %s\n", file_name);
00075 
00076     if ((fp = fopen(file_name, "rb")) == NULL)
00077         E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);
00078 
00079     /* Read header, including argument-value info and 32-bit byteorder magic */
00080     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00081         E_FATAL("bio_readhdr(%s) failed\n", file_name);
00082 
00083     /* Parse argument-value list */
00084     chksum_present = 0;
00085     n_gauden_present = 0;
00086     for (i = 0; argname[i]; i++) {
00087         if (strcmp(argname[i], "version") == 0) {
00088             if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) {
00089                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00090                        file_name, argval[i], SPDEF_PARAM_VERSION);
00091             }
00092 
00093             /* HACK!! Convert version# to float32 and take appropriate action */
00094             if (sscanf(argval[i], "%f", &v) != 1)
00095                 E_FATAL("%s: Bad version no. string: %s\n", file_name,
00096                         argval[i]);
00097 
00098             n_gauden_present = (v > 1.1) ? 1 : 0;
00099         }
00100         else if (strcmp(argname[i], "chksum0") == 0) {
00101             chksum_present = 1; /* Ignore the associated value */
00102         }
00103     }
00104     bio_hdrarg_free(argname, argval);
00105     argname = argval = NULL;
00106 
00107     chksum = 0;
00108 
00109     /* Read #gauden (if version matches) */
00110     if (n_gauden_present) {
00111         if (bio_fread
00112             (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1)
00113             E_FATAL("fread(%s) (#gauden) failed\n", file_name);
00114     }
00115 
00116     /* Read 1d array data */
00117     if (bio_fread_1d(&ptr, sizeof(int16), &(s->n_sen), fp,
00118                      byteswap, &chksum) < 0) {
00119         E_FATAL("bio_fread_1d(%s) failed\n", file_name);
00120     }
00121     s->mgau = ptr;
00122 
00123     /* Infer n_gauden if not present in this version */
00124     if (!n_gauden_present) {
00125         s->n_gauden = 1;
00126         for (i = 0; i < s->n_sen; i++)
00127             if (s->mgau[i] >= s->n_gauden)
00128                 s->n_gauden = s->mgau[i] + 1;
00129     }
00130 
00131     if (chksum_present)
00132         bio_verify_chksum(fp, byteswap, chksum);
00133 
00134     if (fread(&eofchk, 1, 1, fp) == 1)
00135         E_FATAL("More data than expected in %s\n", file_name);
00136 
00137     fclose(fp);
00138 
00139     E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen,
00140            s->n_gauden);
00141 
00142     return 1;
00143 }
00144 
00145 
00146 static int32
00147 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath)
00148 {
00149     char eofchk;
00150     FILE *fp;
00151     int32 byteswap, chksum_present;
00152     uint32 chksum;
00153     float32 *pdf;
00154     int32 i, f, c, p, n_err;
00155     char **argname, **argval;
00156 
00157     E_INFO("Reading senone mixture weights: %s\n", file_name);
00158 
00159     if ((fp = fopen(file_name, "rb")) == NULL)
00160         E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);
00161 
00162     /* Read header, including argument-value info and 32-bit byteorder magic */
00163     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00164         E_FATAL("bio_readhdr(%s) failed\n", file_name);
00165 
00166     /* Parse argument-value list */
00167     chksum_present = 0;
00168     for (i = 0; argname[i]; i++) {
00169         if (strcmp(argname[i], "version") == 0) {
00170             if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0)
00171                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00172                        file_name, argval[i], MIXW_PARAM_VERSION);
00173         }
00174         else if (strcmp(argname[i], "chksum0") == 0) {
00175             chksum_present = 1; /* Ignore the associated value */
00176         }
00177     }
00178     bio_hdrarg_free(argname, argval);
00179     argname = argval = NULL;
00180 
00181     chksum = 0;
00182 
00183     /* Read #senones, #features, #codewords, arraysize */
00184     if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
00185          1)
00186         ||
00187         (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum)
00188          != 1)
00189         || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum)
00190             != 1)
00191         || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
00192         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
00193     }
00194     if (i != s->n_sen * s->n_feat * s->n_cw) {
00195         E_FATAL
00196             ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
00197              file_name, i, s->n_sen, s->n_feat, s->n_cw);
00198     }
00199 
00200     /*
00201      * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits.
00202      * All PDF values will be truncated (in the LSB positions) by these many bits.
00203      */
00204     if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0))
00205         E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor);
00206 
00207     /* Use a fixed shift for compatibility with everything else. */
00208     E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT);
00209 
00210     /*
00211      * Allocate memory for senone PDF data.  Organize normally or transposed depending on
00212      * s->n_gauden.
00213      */
00214     if (s->n_gauden > 1) {
00215         E_INFO("Not transposing mixture weights in memory\n");
00216         s->pdf =
00217             (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw,
00218                                           sizeof(senprob_t));
00219     }
00220     else {
00221         E_INFO("Transposing mixture weights in memory\n");
00222         s->pdf =
00223             (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen,
00224                                           sizeof(senprob_t));
00225     }
00226 
00227     /* Temporary structure to read in floats */
00228     pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32));
00229 
00230     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
00231     n_err = 0;
00232     for (i = 0; i < s->n_sen; i++) {
00233         for (f = 0; f < s->n_feat; f++) {
00234             if (bio_fread
00235                 ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap,
00236                  &chksum)
00237                 != s->n_cw) {
00238                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
00239             }
00240 
00241             /* Normalize and floor */
00242             if (vector_sum_norm(pdf, s->n_cw) <= 0.0)
00243                 n_err++;
00244             vector_floor(pdf, s->n_cw, s->mixwfloor);
00245             vector_sum_norm(pdf, s->n_cw);
00246 
00247             /* Convert to logs3, truncate to 8 bits, and store in s->pdf */
00248             for (c = 0; c < s->n_cw; c++) {
00249                 p = -(logmath_log(lmath, pdf[c]));
00250                 p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */
00251 
00252                 if (s->n_gauden > 1)
00253                     s->pdf[i][f][c] =
00254                         (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
00255                 else
00256                     s->pdf[f][c][i] =
00257                         (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
00258             }
00259         }
00260     }
00261     if (n_err > 0)
00262         E_ERROR("Weight normalization failed for %d senones\n", n_err);
00263 
00264     ckd_free(pdf);
00265 
00266     if (chksum_present)
00267         bio_verify_chksum(fp, byteswap, chksum);
00268 
00269     if (fread(&eofchk, 1, 1, fp) == 1)
00270         E_FATAL("More data than expected in %s\n", file_name);
00271 
00272     fclose(fp);
00273 
00274     E_INFO
00275         ("Read mixture weights for %d senones: %d features x %d codewords\n",
00276          s->n_sen, s->n_feat, s->n_cw);
00277 
00278     return 1;
00279 }
00280 
00281 
00282 senone_t *
00283 senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file,
00284             float32 mixwfloor, logmath_t *lmath)
00285 {
00286     senone_t *s;
00287     int32 n = 0, i;
00288 
00289     s = (senone_t *) ckd_calloc(1, sizeof(senone_t));
00290     s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
00291     s->mixwfloor = mixwfloor;
00292 
00293     s->n_gauden = g->n_mgau;
00294     if (sen2mgau_map_file) {
00295         if (!(strcmp(sen2mgau_map_file, ".semi.") == 0
00296               || strcmp(sen2mgau_map_file, ".cont.") == 0)) {
00297             senone_mgau_map_read(s, sen2mgau_map_file);
00298             n = s->n_sen;
00299         }
00300     }
00301     else {
00302         if (s->n_gauden == 1)
00303             sen2mgau_map_file = ".semi.";
00304         else
00305             sen2mgau_map_file = ".cont.";
00306     }
00307 
00308     senone_mixw_read(s, mixwfile, lmath);
00309 
00310     if (strcmp(sen2mgau_map_file, ".semi.") == 0) {
00311         /* All-to-1 senones-codebook mapping */
00312         s->mgau = (int16 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
00313     }
00314     else if (strcmp(sen2mgau_map_file, ".cont.") == 0
00315              || strcmp(sen2mgau_map_file, ".s3cont.") == 0) {
00316         /* 1-to-1 senone-codebook mapping */
00317         if (s->n_sen <= 1)
00318             E_FATAL("#senone=%d; must be >1\n", s->n_sen);
00319 
00320         s->mgau = (int16 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
00321         for (i = 0; i < s->n_sen; i++)
00322             s->mgau[i] = i;
00323 
00324         s->n_gauden = s->n_sen;
00325     }
00326     else {
00327         if (s->n_sen != n)
00328             E_FATAL("#senones inconsistent: %d in %s; %d in %s\n",
00329                     n, sen2mgau_map_file, s->n_sen, mixwfile);
00330     }
00331 
00332     s->featscr = NULL;
00333     return s;
00334 }
00335 
00336 void
00337 senone_free(senone_t * s)
00338 {
00339     if (s == NULL)
00340         return;
00341     if (s->pdf)
00342         ckd_free_3d((void *) s->pdf);
00343     if (s->mgau)
00344         ckd_free(s->mgau);
00345     if (s->featscr)
00346         ckd_free(s->featscr);
00347     logmath_free(s->lmath);
00348     ckd_free(s);
00349 }
00350 
00351 
00352 /*
00353  * Compute senone score for one senone.
00354  * NOTE:  Remember that senone PDF tables contain SCALED, NEGATED logs3 values.
00355  * NOTE:  Remember also that PDF data may be transposed or not depending on s->n_gauden.
00356  */
00357 int32
00358 senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top)
00359 {
00360     int32 scr;                  /* total senone score */
00361     int32 fden;                 /* Gaussian density */
00362     int32 fscr;                 /* senone score for one feature */
00363     int32 fwscr;                /* senone score for one feature, one codeword */
00364     int32 f, t;
00365     gauden_dist_t *fdist;
00366 
00367     assert((id >= 0) && (id < s->n_sen));
00368     assert((n_top > 0) && (n_top <= s->n_cw));
00369 
00370     scr = 0;
00371 
00372     for (f = 0; f < s->n_feat; f++) {
00373         fdist = dist[f];
00374 
00375         /* Top codeword for feature f */
00376         fden = ((int32)fdist[0].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
00377         fscr = (s->n_gauden > 1)
00378             ? (fden + -s->pdf[id][f][fdist[0].id])  /* untransposed */
00379             : (fden + -s->pdf[f][fdist[0].id][id]); /* transposed */
00380 
00381         /* Remaining of n_top codewords for feature f */
00382         for (t = 1; t < n_top; t++) {
00383             fden = ((int32)fdist[t].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
00384             fwscr = (s->n_gauden > 1) ?
00385                 (fden + -s->pdf[id][f][fdist[t].id]) :
00386                 (fden + -s->pdf[f][fdist[t].id][id]);
00387             fscr = logmath_add(s->lmath, fscr, fwscr);
00388         }
00389         /* Senone scores are also scaled, negated logs3 values.  Hence
00390          * we have to negate the stuff we calculated above. */
00391         scr -= fscr;
00392     }
00393 
00394     /* Avoid overflowing int16 */
00395     if (scr > 32767)
00396       scr = 32767;
00397     if (scr < -32768)
00398       scr = -32768;
00399     return scr;
00400 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7