src/libpocketsphinx/s2_semi_mgau.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* System headers */
00039 #include <stdio.h>
00040 #include <stdlib.h>
00041 #include <string.h>
00042 #include <assert.h>
00043 #include <limits.h>
00044 #include <math.h>
00045 #if defined(__ADSPBLACKFIN__)
00046 #elif !defined(_WIN32_WCE)
00047 #include <sys/types.h>
00048 #endif
00049 
00050 #ifndef M_PI 
00051 #define M_PI 3.14159265358979323846 
00052 #endif
00053 
00054 /* SphinxBase headers */
00055 #include <sphinx_config.h>
00056 #include <cmd_ln.h>
00057 #include <fixpoint.h>
00058 #include <ckd_alloc.h>
00059 #include <bio.h>
00060 #include <err.h>
00061 #include <prim_type.h>
00062 
00063 /* Local headers */
00064 #include "s2_semi_mgau.h"
00065 #include "kdtree.h"
00066 #include "posixwin32.h"
00067 
00068 static ps_mgaufuncs_t s2_semi_mgau_funcs = {
00069     "s2_semi",
00070     &s2_semi_mgau_frame_eval,      /* frame_eval */
00071     &s2_semi_mgau_mllr_transform,  /* transform */
00072     &s2_semi_mgau_free             /* free */
00073 };
00074 
00075 #define MGAU_MIXW_VERSION       "1.0"   /* Sphinx-3 file format version for mixw */
00076 #define MGAU_PARAM_VERSION      "1.0"   /* Sphinx-3 file format version for mean/var */
00077 #define NONE            -1
00078 #define WORST_DIST      (int32)(0x80000000)
00079 
00080 struct vqFeature_s {
00081     int32 score; /* score or distance */
00082     int32 codeword; /* codeword (vector index) */
00083 };
00084 
00086 #ifdef FIXED_POINT
00087 #define GMMSUB(a,b) \
00088         (((a)-(b) > a) ? (INT_MIN) : ((a)-(b)))
00089 
00090 #define GMMADD(a,b) \
00091         (((a)+(b) < a) ? (INT_MAX) : ((a)+(b)))
00092 #else
00093 #define GMMSUB(a,b) ((a)-(b))
00094 #define GMMADD(a,b) ((a)+(b))
00095 #endif
00096 
00097 #ifndef MIN
00098 #define MIN(a,b) ((a) < (b) ? (a) : (b))
00099 #endif
00100 
00101 
00102 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ == 199901L)
00103 #define LOGMATH_INLINE inline
00104 #elif defined(__GNUC__)
00105 #define LOGMATH_INLINE static inline
00106 #elif defined(_MSC_VER)
00107 #define LOGMATH_INLINE __inline
00108 #else
00109 #define LOGMATH_INLINE static
00110 #endif
00111 
00112 /* Allocate 0..159 for negated quantized mixture weights and 0..96 for
00113  * negated normalized acoustic scores, so that the combination of the
00114  * two (for a single mixture) can never exceed 255. */
00115 #define MAX_NEG_MIXW 159 
00116 #define MAX_NEG_ASCR 96  
00135 LOGMATH_INLINE int
00136 fast_logmath_add(logmath_t *lmath, int mlx, int mly)
00137 {
00138     logadd_t *t = LOGMATH_TABLE(lmath);
00139     int d, r;
00140 
00141     /* d must be positive, obviously. */
00142     if (mlx > mly) {
00143         d = (mlx - mly);
00144         r = mly;
00145     }
00146     else {
00147         d = (mly - mlx);
00148         r = mlx;
00149     }
00150 
00151     return r - (((uint8 *)t->table)[d]);
00152 }
00153 
00154 static void
00155 eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
00156 {
00157     int32 i, ceplen;
00158     vqFeature_t *topn;
00159 
00160     topn = s->f[feat];
00161     ceplen = s->veclen[feat];
00162 
00163     for (i = 0; i < s->max_topn; i++) {
00164         mfcc_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */
00165         vqFeature_t vtmp;
00166         mfcc_t *var, d;
00167         mfcc_t *obs;
00168         int32 cw, j;
00169 
00170         cw = topn[i].codeword;
00171         mean = s->means[feat] + cw * ceplen;
00172         var = s->vars[feat] + cw * ceplen;
00173         d = s->dets[feat][cw];
00174         obs = z;
00175         for (j = 0; j < ceplen; j++) {
00176             diff = *obs++ - *mean++;
00177             sqdiff = MFCCMUL(diff, diff);
00178             compl = MFCCMUL(sqdiff, *var);
00179             d = GMMSUB(d, compl);
00180             ++var;
00181         }
00182         topn[i].score = (int32)d;
00183         if (i == 0)
00184             continue;
00185         vtmp = topn[i];
00186         for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) {
00187             topn[j + 1] = topn[j];
00188         }
00189         topn[j + 1] = vtmp;
00190     }
00191 }
00192 
00193 static void
00194 eval_cb_kdtree(s2_semi_mgau_t *s, int32 feat, mfcc_t *z,
00195                kd_tree_node_t *node, uint32 maxbbi)
00196 {
00197     vqFeature_t *worst, *best, *topn;
00198     int32 i, ceplen;
00199 
00200     best = topn = s->f[feat];
00201     worst = topn + (s->max_topn - 1);
00202     ceplen = s->veclen[feat];
00203 
00204     for (i = 0; i < maxbbi; ++i) {
00205         mfcc_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */
00206         mfcc_t *var, d;
00207         mfcc_t *obs;
00208         vqFeature_t *cur;
00209         int32 cw, j, k;
00210 
00211         cw = node->bbi[i];
00212         mean = s->means[feat] + cw * ceplen;
00213         var = s->vars[feat] + cw * ceplen;
00214         d = s->dets[feat][cw];
00215         obs = z;
00216         for (j = 0; (j < ceplen) && (d >= worst->score); j++) {
00217             diff = *obs++ - *mean++;
00218             sqdiff = MFCCMUL(diff, diff);
00219             compl = MFCCMUL(sqdiff, *var);
00220             d = GMMSUB(d, compl);
00221             ++var;
00222         }
00223         if (j < ceplen)
00224             continue;
00225         if ((int32)d < worst->score)
00226             continue;
00227         for (k = 0; k < s->max_topn; k++) {
00228             /* already there, so don't need to insert */
00229             if (topn[k].codeword == cw)
00230                 break;
00231         }
00232         if (k < s->max_topn)
00233             continue;       /* already there.  Don't insert */
00234         /* remaining code inserts codeword and dist in correct spot */
00235         for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
00236             memcpy(cur + 1, cur, sizeof(vqFeature_t));
00237         ++cur;
00238         cur->codeword = cw;
00239         cur->score = (int32)d;
00240     }
00241 }
00242 
00243 static void
00244 eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
00245 {
00246     vqFeature_t *worst, *best, *topn;
00247     mfcc_t *mean;
00248     mfcc_t *var, *det, *detP, *detE;
00249     int32 i, ceplen;
00250 
00251     best = topn = s->f[feat];
00252     worst = topn + (s->max_topn - 1);
00253     mean = s->means[feat];
00254     var = s->vars[feat];
00255     det = s->dets[feat];
00256     detE = det + s->n_density;
00257     ceplen = s->veclen[feat];
00258 
00259     for (detP = det; detP < detE; ++detP) {
00260         mfcc_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */
00261         mfcc_t d;
00262         mfcc_t *obs;
00263         vqFeature_t *cur;
00264         int32 cw, j;
00265 
00266         d = *detP;
00267         obs = z;
00268         cw = detP - det;
00269         for (j = 0; (j < ceplen) && (d >= worst->score); ++j) {
00270             diff = *obs++ - *mean++;
00271             sqdiff = MFCCMUL(diff, diff);
00272             compl = MFCCMUL(sqdiff, *var);
00273             d = GMMSUB(d, compl);
00274             ++var;
00275         }
00276         if (j < ceplen) {
00277             /* terminated early, so not in topn */
00278             mean += (ceplen - j);
00279             var += (ceplen - j);
00280             continue;
00281         }
00282         if ((int32)d < worst->score)
00283             continue;
00284         for (i = 0; i < s->max_topn; i++) {
00285             /* already there, so don't need to insert */
00286             if (topn[i].codeword == cw)
00287                 break;
00288         }
00289         if (i < s->max_topn)
00290             continue;       /* already there.  Don't insert */
00291         /* remaining code inserts codeword and dist in correct spot */
00292         for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
00293             memcpy(cur + 1, cur, sizeof(vqFeature_t));
00294         ++cur;
00295         cur->codeword = cw;
00296         cur->score = (int32)d;
00297     }
00298 }
00299 
00300 static void
00301 mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z)
00302 {
00303     eval_topn(s, feat, z);
00304 
00305     /* If this frame is skipped, do nothing else. */
00306     if (frame % s->ds_ratio)
00307         return;
00308 
00309     /* Evaluate the rest of the codebook (or subset thereof). */
00310     if (s->kdtrees) {
00311         kd_tree_node_t *node;
00312         uint32 maxbbi;
00313 
00314         node =
00315             eval_kd_tree(s->kdtrees[feat], z, s->kd_maxdepth);
00316         maxbbi = s->kd_maxbbi == -1 ? node->n_bbi : MIN(node->n_bbi,
00317                                                         s->
00318                                                         kd_maxbbi);
00319         eval_cb_kdtree(s, feat, z, node, maxbbi);
00320     }
00321     else {
00322         eval_cb(s, feat, z);
00323     }
00324 }
00325 
00326 static int
00327 mgau_norm(s2_semi_mgau_t *s, int feat)
00328 {
00329     int32 norm;
00330     int j;
00331 
00332     /* Compute quantized normalizing constant. */
00333     norm = s->f[feat][0].score >> SENSCR_SHIFT;
00334 
00335     /* Normalize the scores, negate them, and clamp their dynamic range. */
00336     for (j = 0; j < s->max_topn; ++j) {
00337         s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm);
00338         if (s->f[feat][j].score > MAX_NEG_ASCR)
00339             s->f[feat][j].score = MAX_NEG_ASCR;
00340         if (s->topn_beam[feat] && s->f[feat][j].score > s->topn_beam[feat])
00341             break;
00342     }
00343     return j;
00344 }
00345 
00346 static int32
00347 get_scores_8b_feat_6(s2_semi_mgau_t * s, int i,
00348                      int16 *senone_scores, uint8 *senone_active,
00349                      int32 n_senone_active)
00350 {
00351     int32 j, l;
00352     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
00353 
00354     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00355     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00356     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00357     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00358     pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00359     pid_cw5 = s->mixw[i][s->f[i][5].codeword];
00360 
00361     for (l = j = 0; j < n_senone_active; j++) {
00362         int sen = senone_active[j] + l;
00363         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00364 
00365         tmp = fast_logmath_add(s->lmath_8b, tmp,
00366                                pid_cw1[sen] + s->f[i][1].score);
00367         tmp = fast_logmath_add(s->lmath_8b, tmp,
00368                                pid_cw2[sen] + s->f[i][2].score);
00369         tmp = fast_logmath_add(s->lmath_8b, tmp,
00370                                pid_cw3[sen] + s->f[i][3].score);
00371         tmp = fast_logmath_add(s->lmath_8b, tmp,
00372                                pid_cw4[sen] + s->f[i][4].score);
00373         tmp = fast_logmath_add(s->lmath_8b, tmp,
00374                                pid_cw5[sen] + s->f[i][5].score);
00375 
00376         senone_scores[sen] += tmp;
00377         l = sen;
00378     }
00379     return 0;
00380 }
00381 
00382 static int32
00383 get_scores_8b_feat_5(s2_semi_mgau_t * s, int i,
00384                      int16 *senone_scores, uint8 *senone_active,
00385                      int32 n_senone_active)
00386 {
00387     int32 j, l;
00388     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
00389 
00390     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00391     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00392     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00393     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00394     pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00395 
00396     for (l = j = 0; j < n_senone_active; j++) {
00397         int sen = senone_active[j] + l;
00398         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00399 
00400         tmp = fast_logmath_add(s->lmath_8b, tmp,
00401                                pid_cw1[sen] + s->f[i][1].score);
00402         tmp = fast_logmath_add(s->lmath_8b, tmp,
00403                                pid_cw2[sen] + s->f[i][2].score);
00404         tmp = fast_logmath_add(s->lmath_8b, tmp,
00405                                pid_cw3[sen] + s->f[i][3].score);
00406         tmp = fast_logmath_add(s->lmath_8b, tmp,
00407                                pid_cw4[sen] + s->f[i][4].score);
00408 
00409         senone_scores[sen] += tmp;
00410         l = sen;
00411     }
00412     return 0;
00413 }
00414 
00415 static int32
00416 get_scores_8b_feat_4(s2_semi_mgau_t * s, int i,
00417                      int16 *senone_scores, uint8 *senone_active,
00418                      int32 n_senone_active)
00419 {
00420     int32 j, l;
00421     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00422 
00423     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00424     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00425     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00426     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00427 
00428     for (l = j = 0; j < n_senone_active; j++) {
00429         int sen = senone_active[j] + l;
00430         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00431 
00432         tmp = fast_logmath_add(s->lmath_8b, tmp,
00433                                pid_cw1[sen] + s->f[i][1].score);
00434         tmp = fast_logmath_add(s->lmath_8b, tmp,
00435                                pid_cw2[sen] + s->f[i][2].score);
00436         tmp = fast_logmath_add(s->lmath_8b, tmp,
00437                                pid_cw3[sen] + s->f[i][3].score);
00438 
00439         senone_scores[sen] += tmp;
00440         l = sen;
00441     }
00442     return 0;
00443 }
00444 
00445 static int32
00446 get_scores_8b_feat_3(s2_semi_mgau_t * s, int i,
00447                      int16 *senone_scores, uint8 *senone_active,
00448                      int32 n_senone_active)
00449 {
00450     int32 j, l;
00451     uint8 *pid_cw0, *pid_cw1, *pid_cw2;
00452 
00453     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00454     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00455     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00456 
00457     for (l = j = 0; j < n_senone_active; j++) {
00458         int sen = senone_active[j] + l;
00459         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00460 
00461         tmp = fast_logmath_add(s->lmath_8b, tmp,
00462                                pid_cw1[sen] + s->f[i][1].score);
00463         tmp = fast_logmath_add(s->lmath_8b, tmp,
00464                                pid_cw2[sen] + s->f[i][2].score);
00465 
00466         senone_scores[sen] += tmp;
00467         l = sen;
00468     }
00469     return 0;
00470 }
00471 
00472 static int32
00473 get_scores_8b_feat_2(s2_semi_mgau_t * s, int i,
00474                      int16 *senone_scores, uint8 *senone_active,
00475                      int32 n_senone_active)
00476 {
00477     int32 j, l;
00478     uint8 *pid_cw0, *pid_cw1;
00479 
00480     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00481     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00482 
00483     for (l = j = 0; j < n_senone_active; j++) {
00484         int sen = senone_active[j] + l;
00485         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00486 
00487         tmp = fast_logmath_add(s->lmath_8b, tmp,
00488                                pid_cw1[sen] + s->f[i][1].score);
00489 
00490         senone_scores[sen] += tmp;
00491         l = sen;
00492     }
00493     return 0;
00494 }
00495 
00496 static int32
00497 get_scores_8b_feat_1(s2_semi_mgau_t * s, int i,
00498                      int16 *senone_scores, uint8 *senone_active,
00499                      int32 n_senone_active)
00500 {
00501     int32 j, l;
00502     uint8 *pid_cw0;
00503 
00504     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00505     for (l = j = 0; j < n_senone_active; j++) {
00506         int sen = senone_active[j] + l;
00507         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00508         senone_scores[sen] += tmp;
00509         l = sen;
00510     }
00511     return 0;
00512 }
00513 
00514 static int32
00515 get_scores_8b_feat_any(s2_semi_mgau_t * s, int i, int topn,
00516                        int16 *senone_scores, uint8 *senone_active,
00517                        int32 n_senone_active)
00518 {
00519     int32 j, k, l;
00520 
00521     for (l = j = 0; j < n_senone_active; j++) {
00522         int sen = senone_active[j] + l;
00523         uint8 *pid_cw;
00524         int32 tmp;
00525         pid_cw = s->mixw[i][s->f[i][0].codeword];
00526         tmp = pid_cw[sen] + s->f[i][0].score;
00527         for (k = 1; k < topn; ++k) {
00528             pid_cw = s->mixw[i][s->f[i][k].codeword];
00529             tmp = fast_logmath_add(s->lmath_8b, tmp,
00530                                    pid_cw[sen] + s->f[i][k].score);
00531         }
00532         senone_scores[sen] += tmp;
00533         l = sen;
00534     }
00535     return 0;
00536 }
00537 
00538 static int32
00539 get_scores_8b_feat(s2_semi_mgau_t * s, int i, int topn,
00540                    int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
00541 {
00542     switch (topn) {
00543     case 6:
00544         return get_scores_8b_feat_6(s, i, senone_scores,
00545                                     senone_active, n_senone_active);
00546     case 5:
00547         return get_scores_8b_feat_5(s, i, senone_scores,
00548                                     senone_active, n_senone_active);
00549     case 4:
00550         return get_scores_8b_feat_4(s, i, senone_scores,
00551                                     senone_active, n_senone_active);
00552     case 3:
00553         return get_scores_8b_feat_3(s, i, senone_scores,
00554                                     senone_active, n_senone_active);
00555     case 2:
00556         return get_scores_8b_feat_2(s, i, senone_scores,
00557                                     senone_active, n_senone_active);
00558     case 1:
00559         return get_scores_8b_feat_1(s, i, senone_scores,
00560                                     senone_active, n_senone_active);
00561     default:
00562         return get_scores_8b_feat_any(s, i, topn, senone_scores,
00563                                       senone_active, n_senone_active);
00564     }
00565 }
00566 
00567 static int32
00568 get_scores_8b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
00569 {
00570     int32 j, k;
00571 
00572     for (j = 0; j < s->n_sen; j++) {
00573         uint8 *pid_cw;
00574         int32 tmp;
00575         pid_cw = s->mixw[i][s->f[i][0].codeword];
00576         tmp = pid_cw[j] + s->f[i][0].score;
00577         for (k = 1; k < topn; ++k) {
00578             pid_cw = s->mixw[i][s->f[i][k].codeword];
00579             tmp = fast_logmath_add(s->lmath_8b, tmp,
00580                                    pid_cw[j] + s->f[i][k].score);
00581         }
00582         senone_scores[j] += tmp;
00583     }
00584     return 0;
00585 }
00586 
00587 static int32
00588 get_scores_4b_feat_6(s2_semi_mgau_t * s, int i,
00589                      int16 *senone_scores, uint8 *senone_active,
00590                      int32 n_senone_active)
00591 {
00592     int32 j, l;
00593     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
00594     uint8 w_den[6][16];
00595 
00596     /* Precompute scaled densities. */
00597     for (j = 0; j < 16; ++j) {
00598         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00599         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00600         w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00601         w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00602         w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
00603         w_den[5][j] = s->mixw_cb[j] + s->f[i][5].score;
00604     }
00605 
00606     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00607     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00608     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00609     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00610     pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00611     pid_cw5 = s->mixw[i][s->f[i][5].codeword];
00612 
00613     for (l = j = 0; j < n_senone_active; j++) {
00614         int n = senone_active[j] + l;
00615         int tmp, cw;
00616 
00617         if (n & 1) {
00618             cw = pid_cw0[n/2] >> 4;
00619             tmp = w_den[0][cw];
00620             cw = pid_cw1[n/2] >> 4;
00621             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00622             cw = pid_cw2[n/2] >> 4;
00623             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00624             cw = pid_cw3[n/2] >> 4;
00625             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00626             cw = pid_cw4[n/2] >> 4;
00627             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00628             cw = pid_cw5[n/2] >> 4;
00629             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
00630         }
00631         else {
00632             cw = pid_cw0[n/2] & 0x0f;
00633             tmp = w_den[0][cw];
00634             cw = pid_cw1[n/2] & 0x0f;
00635             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00636             cw = pid_cw2[n/2] & 0x0f;
00637             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00638             cw = pid_cw3[n/2] & 0x0f;
00639             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00640             cw = pid_cw4[n/2] & 0x0f;
00641             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00642             cw = pid_cw5[n/2] & 0x0f;
00643             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
00644         }
00645         senone_scores[n] += tmp;
00646         l = n;
00647     }
00648     return 0;
00649 }
00650 
00651 static int32
00652 get_scores_4b_feat_5(s2_semi_mgau_t * s, int i,
00653                      int16 *senone_scores, uint8 *senone_active,
00654                      int32 n_senone_active)
00655 {
00656     int32 j, l;
00657     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
00658     uint8 w_den[5][16];
00659 
00660     /* Precompute scaled densities. */
00661     for (j = 0; j < 16; ++j) {
00662         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00663         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00664         w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00665         w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00666         w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
00667     }
00668 
00669     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00670     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00671     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00672     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00673     pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00674 
00675     for (l = j = 0; j < n_senone_active; j++) {
00676         int n = senone_active[j] + l;
00677         int tmp, cw;
00678 
00679         if (n & 1) {
00680             cw = pid_cw0[n/2] >> 4;
00681             tmp = w_den[0][cw];
00682             cw = pid_cw1[n/2] >> 4;
00683             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00684             cw = pid_cw2[n/2] >> 4;
00685             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00686             cw = pid_cw3[n/2] >> 4;
00687             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00688             cw = pid_cw4[n/2] >> 4;
00689             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00690         }
00691         else {
00692             cw = pid_cw0[n/2] & 0x0f;
00693             tmp = w_den[0][cw];
00694             cw = pid_cw1[n/2] & 0x0f;
00695             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00696             cw = pid_cw2[n/2] & 0x0f;
00697             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00698             cw = pid_cw3[n/2] & 0x0f;
00699             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00700             cw = pid_cw4[n/2] & 0x0f;
00701             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00702         }
00703         senone_scores[n] += tmp;
00704         l = n;
00705     }
00706     return 0;
00707 }
00708 
00709 static int32
00710 get_scores_4b_feat_4(s2_semi_mgau_t * s, int i,
00711                      int16 *senone_scores, uint8 *senone_active,
00712                      int32 n_senone_active)
00713 {
00714     int32 j, l;
00715     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00716     uint8 w_den[4][16];
00717 
00718     /* Precompute scaled densities. */
00719     for (j = 0; j < 16; ++j) {
00720         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00721         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00722         w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00723         w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00724     }
00725 
00726     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00727     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00728     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00729     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00730 
00731     for (l = j = 0; j < n_senone_active; j++) {
00732         int n = senone_active[j] + l;
00733         int tmp, cw;
00734 
00735         if (n & 1) {
00736             cw = pid_cw0[n/2] >> 4;
00737             tmp = w_den[0][cw];
00738             cw = pid_cw1[n/2] >> 4;
00739             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00740             cw = pid_cw2[n/2] >> 4;
00741             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00742             cw = pid_cw3[n/2] >> 4;
00743             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00744         }
00745         else {
00746             cw = pid_cw0[n/2] & 0x0f;
00747             tmp = w_den[0][cw];
00748             cw = pid_cw1[n/2] & 0x0f;
00749             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00750             cw = pid_cw2[n/2] & 0x0f;
00751             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00752             cw = pid_cw3[n/2] & 0x0f;
00753             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00754         }
00755         senone_scores[n] += tmp;
00756         l = n;
00757     }
00758     return 0;
00759 }
00760 
00761 static int32
00762 get_scores_4b_feat_3(s2_semi_mgau_t * s, int i,
00763                      int16 *senone_scores, uint8 *senone_active,
00764                      int32 n_senone_active)
00765 {
00766     int32 j, l;
00767     uint8 *pid_cw0, *pid_cw1, *pid_cw2;
00768     uint8 w_den[3][16];
00769 
00770     /* Precompute scaled densities. */
00771     for (j = 0; j < 16; ++j) {
00772         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00773         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00774         w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00775     }
00776 
00777     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00778     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00779     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00780 
00781     for (l = j = 0; j < n_senone_active; j++) {
00782         int n = senone_active[j] + l;
00783         int tmp, cw;
00784 
00785         if (n & 1) {
00786             cw = pid_cw0[n/2] >> 4;
00787             tmp = w_den[0][cw];
00788             cw = pid_cw1[n/2] >> 4;
00789             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00790             cw = pid_cw2[n/2] >> 4;
00791             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00792         }
00793         else {
00794             cw = pid_cw0[n/2] & 0x0f;
00795             tmp = w_den[0][cw];
00796             cw = pid_cw1[n/2] & 0x0f;
00797             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00798             cw = pid_cw2[n/2] & 0x0f;
00799             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00800         }
00801         senone_scores[n] += tmp;
00802         l = n;
00803     }
00804     return 0;
00805 }
00806 
00807 static int32
00808 get_scores_4b_feat_2(s2_semi_mgau_t * s, int i,
00809                      int16 *senone_scores, uint8 *senone_active,
00810                      int32 n_senone_active)
00811 {
00812     int32 j, l;
00813     uint8 *pid_cw0, *pid_cw1;
00814     uint8 w_den[2][16];
00815 
00816     /* Precompute scaled densities. */
00817     for (j = 0; j < 16; ++j) {
00818         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00819         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00820     }
00821 
00822     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00823     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00824 
00825     for (l = j = 0; j < n_senone_active; j++) {
00826         int n = senone_active[j] + l;
00827         int tmp, cw;
00828 
00829         if (n & 1) {
00830             cw = pid_cw0[n/2] >> 4;
00831             tmp = w_den[0][cw];
00832             cw = pid_cw1[n/2] >> 4;
00833             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00834         }
00835         else {
00836             cw = pid_cw0[n/2] & 0x0f;
00837             tmp = w_den[0][cw];
00838             cw = pid_cw1[n/2] & 0x0f;
00839             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00840         }
00841         senone_scores[n] += tmp;
00842         l = n;
00843     }
00844     return 0;
00845 }
00846 
00847 static int32
00848 get_scores_4b_feat_1(s2_semi_mgau_t * s, int i,
00849                      int16 *senone_scores, uint8 *senone_active,
00850                      int32 n_senone_active)
00851 {
00852     int32 j, l;
00853     uint8 *pid_cw0;
00854     uint8 w_den[16];
00855 
00856     /* Precompute scaled densities. */
00857     for (j = 0; j < 16; ++j) {
00858         w_den[j] = s->mixw_cb[j] + s->f[i][0].score;
00859     }
00860 
00861     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00862 
00863     for (l = j = 0; j < n_senone_active; j++) {
00864         int n = senone_active[j] + l;
00865         int tmp, cw;
00866 
00867         if (n & 1) {
00868             cw = pid_cw0[n/2] >> 4;
00869             tmp = w_den[cw];
00870         }
00871         else {
00872             cw = pid_cw0[n/2] & 0x0f;
00873             tmp = w_den[cw];
00874         }
00875         senone_scores[n] += tmp;
00876         l = n;
00877     }
00878     return 0;
00879 }
00880 
00881 static int32
00882 get_scores_4b_feat_any(s2_semi_mgau_t * s, int i, int topn,
00883                        int16 *senone_scores, uint8 *senone_active,
00884                        int32 n_senone_active)
00885 {
00886     int32 j, k, l;
00887 
00888     for (l = j = 0; j < n_senone_active; j++) {
00889         int n = senone_active[j] + l;
00890         int tmp, cw;
00891         uint8 *pid_cw;
00892     
00893         pid_cw = s->mixw[i][s->f[i][0].codeword];
00894         if (n & 1)
00895             cw = pid_cw[n/2] >> 4;
00896         else
00897             cw = pid_cw[n/2] & 0x0f;
00898         tmp = s->mixw_cb[cw] + s->f[i][0].score;
00899         for (k = 1; k < topn; ++k) {
00900             pid_cw = s->mixw[i][s->f[i][k].codeword];
00901             if (n & 1)
00902                 cw = pid_cw[n/2] >> 4;
00903             else
00904                 cw = pid_cw[n/2] & 0x0f;
00905             tmp = fast_logmath_add(s->lmath_8b, tmp,
00906                                    s->mixw_cb[cw] + s->f[i][k].score);
00907         }
00908         senone_scores[n] += tmp;
00909         l = n;
00910     }
00911     return 0;
00912 }
00913 
00914 static int32
00915 get_scores_4b_feat(s2_semi_mgau_t * s, int i, int topn,
00916                    int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
00917 {
00918     switch (topn) {
00919     case 6:
00920         return get_scores_4b_feat_6(s, i, senone_scores,
00921                                     senone_active, n_senone_active);
00922     case 5:
00923         return get_scores_4b_feat_5(s, i, senone_scores,
00924                                     senone_active, n_senone_active);
00925     case 4:
00926         return get_scores_4b_feat_4(s, i, senone_scores,
00927                                     senone_active, n_senone_active);
00928     case 3:
00929         return get_scores_4b_feat_3(s, i, senone_scores,
00930                                     senone_active, n_senone_active);
00931     case 2:
00932         return get_scores_4b_feat_2(s, i, senone_scores,
00933                                     senone_active, n_senone_active);
00934     case 1:
00935         return get_scores_4b_feat_1(s, i, senone_scores,
00936                                     senone_active, n_senone_active);
00937     default:
00938         return get_scores_4b_feat_any(s, i, topn, senone_scores,
00939                                       senone_active, n_senone_active);
00940     }
00941 }
00942 
00943 static int32
00944 get_scores_4b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
00945 {
00946     int32 j, k;
00947 
00948     for (j = 0; j < s->n_sen; j++) {
00949         uint8 *pid_cw;
00950         int32 tmp;
00951         pid_cw = s->mixw[i][s->f[i][0].codeword];
00952         tmp = pid_cw[j] + s->f[i][0].score;
00953         for (k = 1; k < topn; ++k) {
00954             pid_cw = s->mixw[i][s->f[i][k].codeword];
00955             tmp = fast_logmath_add(s->lmath_8b, tmp,
00956                                    pid_cw[j] + s->f[i][k].score);
00957         }
00958         senone_scores[j] += tmp;
00959     }
00960     return 0;
00961 }
00962 
00963 /*
00964  * Compute senone scores for the active senones.
00965  */
00966 int32
00967 s2_semi_mgau_frame_eval(ps_mgau_t *ps,
00968                         int16 *senone_scores,
00969                         uint8 *senone_active,
00970                         int32 n_senone_active,
00971                         mfcc_t ** featbuf, int32 frame,
00972                         int32 compallsen)
00973 {
00974     s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
00975     int i, topn_idx;
00976 
00977     memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00978     /* No bounds checking is done here, which just means you'll get
00979      * semi-random crap if you request a frame in the future or one
00980      * that's too far in the past. */
00981     topn_idx = frame % s->n_topn_hist;
00982     s->f = s->topn_hist[topn_idx];
00983     for (i = 0; i < s->n_feat; ++i) {
00984         /* For past frames this will already be computed. */
00985         if (frame >= ps_mgau_base(ps)->frame_idx) {
00986             vqFeature_t **lastf;
00987             if (topn_idx == 0)
00988                 lastf = s->topn_hist[s->n_topn_hist-1];
00989             else
00990                 lastf = s->topn_hist[topn_idx-1];
00991             memcpy(s->f[i], lastf[i], sizeof(vqFeature_t) * s->max_topn);
00992             mgau_dist(s, frame, i, featbuf[i]);
00993             s->topn_hist_n[topn_idx][i] = mgau_norm(s, i);
00994         }
00995         if (s->mixw_cb) {
00996             if (compallsen)
00997                 get_scores_4b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
00998             else
00999                 get_scores_4b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
01000                                    senone_active, n_senone_active);
01001         }
01002         else {
01003             if (compallsen)
01004                 get_scores_8b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
01005             else
01006                 get_scores_8b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
01007                                    senone_active, n_senone_active);
01008         }
01009     }
01010 
01011     return 0;
01012 }
01013 
01014 int32
01015 s2_semi_mgau_load_kdtree(ps_mgau_t * ps, const char *kdtree_path,
01016                          uint32 maxdepth, int32 maxbbi)
01017 {
01018     s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
01019     if (read_kd_trees(kdtree_path, &s->kdtrees, &s->n_kdtrees,
01020                       maxdepth, maxbbi) == -1)
01021         E_FATAL("Failed to read kd-trees from %s\n", kdtree_path);
01022     if (s->n_kdtrees != s->n_feat)
01023         E_FATAL("Number of kd-trees != %d\n", s->n_feat);
01024 
01025     s->kd_maxdepth = maxdepth;
01026     s->kd_maxbbi = maxbbi;
01027     return 0;
01028 }
01029 
01030 static int32
01031 read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file)
01032 {
01033     FILE *fp;
01034     char line[1000];
01035     int32 i, n, r, c;
01036     int32 do_swap, do_mmap;
01037     size_t filesize, offset;
01038     int n_clust = 0;
01039     int n_feat = s->n_feat;
01040     int n_density = s->n_density;
01041     int n_sen = bin_mdef_n_sen(mdef);
01042     int n_bits = 8;
01043 
01044     s->n_sen = n_sen; /* FIXME: Should have been done earlier */
01045     do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
01046 
01047     if ((fp = fopen(file, "rb")) == NULL)
01048         return -1;
01049 
01050     E_INFO("Loading senones from dump file %s\n", file);
01051     /* Read title size, title */
01052     if (fread(&n, sizeof(int32), 1, fp) != 1) {
01053         E_ERROR_SYSTEM("Failed to read title size from %s", file);
01054         goto error_out;
01055     }
01056     /* This is extremely bogus */
01057     do_swap = 0;
01058     if (n < 1 || n > 999) {
01059         SWAP_INT32(&n);
01060         if (n < 1 || n > 999) {
01061             E_ERROR("Title length %x in dump file %s out of range\n", n, file);
01062             goto error_out;
01063         }
01064         do_swap = 1;
01065     }
01066     if (fread(line, sizeof(char), n, fp) != n) {
01067         E_ERROR_SYSTEM("Cannot read title");
01068         goto error_out;
01069     }
01070     if (line[n - 1] != '\0') {
01071         E_ERROR("Bad title in dump file\n");
01072         goto error_out;
01073     }
01074     E_INFO("%s\n", line);
01075 
01076     /* Read header size, header */
01077     if (fread(&n, sizeof(n), 1, fp) != 1) {
01078         E_ERROR_SYSTEM("Failed to read header size from %s", file);
01079         goto error_out;
01080     }
01081     if (do_swap) SWAP_INT32(&n);
01082     if (fread(line, sizeof(char), n, fp) != n) {
01083         E_ERROR_SYSTEM("Cannot read header");
01084         goto error_out;
01085     }
01086     if (line[n - 1] != '\0') {
01087         E_ERROR("Bad header in dump file\n");
01088         goto error_out;
01089     }
01090 
01091     /* Read other header strings until string length = 0 */
01092     for (;;) {
01093         if (fread(&n, sizeof(n), 1, fp) != 1) {
01094             E_ERROR_SYSTEM("Failed to read header string size from %s", file);
01095             goto error_out;
01096         }
01097         if (do_swap) SWAP_INT32(&n);
01098         if (n == 0)
01099             break;
01100         if (fread(line, sizeof(char), n, fp) != n) {
01101             E_ERROR_SYSTEM("Cannot read header");
01102             goto error_out;
01103         }
01104         /* Look for a cluster count, if present */
01105         if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
01106             n_feat = atoi(line + strlen("feature_count "));
01107         }
01108         if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
01109             n_density = atoi(line + strlen("mixture_count "));
01110         }
01111         if (!strncmp(line, "model_count ", strlen("model_count "))) {
01112             n_sen = atoi(line + strlen("model_count "));
01113         }
01114         if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
01115             n_clust = atoi(line + strlen("cluster_count "));
01116         }
01117         if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
01118             n_bits = atoi(line + strlen("cluster_bits "));
01119         }
01120     }
01121 
01122     /* Defaults for #rows, #columns in mixw array. */
01123     c = n_sen;
01124     r = n_density;
01125     if (n_clust == 0) {
01126         /* Older mixw files have them here, and they might be padded. */
01127         if (fread(&r, sizeof(r), 1, fp) != 1) {
01128             E_ERROR_SYSTEM("Cannot read #rows");
01129             goto error_out;
01130         }
01131         if (do_swap) SWAP_INT32(&r);
01132         if (fread(&c, sizeof(c), 1, fp) != 1) {
01133             E_ERROR_SYSTEM("Cannot read #columns");
01134             goto error_out;
01135         }
01136         if (do_swap) SWAP_INT32(&c);
01137         E_INFO("Rows: %d, Columns: %d\n", r, c);
01138     }
01139 
01140     if (n_feat != s->n_feat) {
01141         E_ERROR("Number of feature streams mismatch: %d != %d\n",
01142                 n_feat, s->n_feat);
01143         goto error_out;
01144     }
01145     if (n_density != s->n_density) {
01146         E_ERROR("Number of densities mismatch: %d != %d\n",
01147                 n_density, s->n_density);
01148         goto error_out;
01149     }
01150     if (n_sen != s->n_sen) {
01151         E_ERROR("Number of senones mismatch: %d != %d\n",
01152                 n_sen, s->n_sen);
01153         goto error_out;
01154     }
01155 
01156     if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
01157         E_ERROR("Cluster count must be 0, 15, or 16\n");
01158         goto error_out;
01159     }
01160     if (n_clust == 15)
01161         ++n_clust;
01162 
01163     if (!((n_bits == 8) || (n_bits == 4))) {
01164         E_ERROR("Cluster count must be 4 or 8\n");
01165         goto error_out;
01166     }
01167 
01168     if (do_mmap) {
01169             E_INFO("Using memory-mapped I/O for senones\n");
01170     }
01171     offset = ftell(fp);
01172     fseek(fp, 0, SEEK_END);
01173     filesize = ftell(fp);
01174     fseek(fp, offset, SEEK_SET);
01175 
01176     /* Allocate memory for pdfs (or memory map them) */
01177     if (do_mmap) {
01178         s->sendump_mmap = mmio_file_read(file);
01179         /* Get cluster codebook if any. */
01180         if (n_clust) {
01181             s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
01182             offset += n_clust;
01183         }
01184     }
01185     else {
01186         /* Get cluster codebook if any. */
01187         if (n_clust) {
01188             s->mixw_cb = ckd_calloc(1, n_clust);
01189             if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
01190                 E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
01191                 goto error_out;
01192             }
01193         }
01194     }
01195 
01196     /* Set up pointers, or read, or whatever */
01197     if (s->sendump_mmap) {
01198         s->mixw = ckd_calloc_2d(s->n_feat, n_density, sizeof(*s->mixw));
01199         for (n = 0; n < n_feat; n++) {
01200             int step = c;
01201             if (n_bits == 4)
01202                 step = (step + 1) / 2;
01203             for (i = 0; i < r; i++) {
01204                 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
01205                 offset += step;
01206             }
01207         }
01208     }
01209     else {
01210         s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
01211         /* Read pdf values and ids */
01212         for (n = 0; n < n_feat; n++) {
01213             int step = c;
01214             if (n_bits == 4)
01215                 step = (step + 1) / 2;
01216             for (i = 0; i < r; i++) {
01217                 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
01218                     != (size_t) step) {
01219                     E_ERROR("Failed to read %d bytes from sendump\n", step);
01220                     goto error_out;
01221                 }
01222             }
01223         }
01224     }
01225 
01226     fclose(fp);
01227     return 0;
01228 error_out:
01229     fclose(fp);
01230     return -1;
01231 }
01232 
01233 static int32
01234 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin)
01235 {
01236     char **argname, **argval;
01237     char eofchk;
01238     FILE *fp;
01239     int32 byteswap, chksum_present;
01240     uint32 chksum;
01241     float32 *pdf;
01242     int32 i, f, c, n;
01243     int32 n_sen;
01244     int32 n_feat;
01245     int32 n_comp;
01246     int32 n_err;
01247 
01248     E_INFO("Reading mixture weights file '%s'\n", file_name);
01249 
01250     if ((fp = fopen(file_name, "rb")) == NULL)
01251         E_FATAL("fopen(%s,rb) failed\n", file_name);
01252 
01253     /* Read header, including argument-value info and 32-bit byteorder magic */
01254     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
01255         E_FATAL("bio_readhdr(%s) failed\n", file_name);
01256 
01257     /* Parse argument-value list */
01258     chksum_present = 0;
01259     for (i = 0; argname[i]; i++) {
01260         if (strcmp(argname[i], "version") == 0) {
01261             if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
01262                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
01263                        file_name, argval[i], MGAU_MIXW_VERSION);
01264         }
01265         else if (strcmp(argname[i], "chksum0") == 0) {
01266             chksum_present = 1; /* Ignore the associated value */
01267         }
01268     }
01269     bio_hdrarg_free(argname, argval);
01270     argname = argval = NULL;
01271 
01272     chksum = 0;
01273 
01274     /* Read #senones, #features, #codewords, arraysize */
01275     if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01276         || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
01277             1)
01278         || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
01279             1)
01280         || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
01281         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
01282     }
01283     if (n_feat != s->n_feat)
01284         E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat);
01285     if (n != n_sen * n_feat * n_comp) {
01286         E_FATAL
01287             ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
01288              file_name, i, n_sen, n_feat, n_comp);
01289     }
01290 
01291     /* n_sen = number of mixture weights per codeword, which is
01292      * fixed at the number of senones since we have only one codebook.
01293      */
01294     s->n_sen = n_sen;
01295 
01296     /* Quantized mixture weight arrays. */
01297     s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw));
01298 
01299     /* Temporary structure to read in floats before conversion to (int32) logs3 */
01300     pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
01301 
01302     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
01303     n_err = 0;
01304     for (i = 0; i < n_sen; i++) {
01305         for (f = 0; f < n_feat; f++) {
01306             if (bio_fread((void *) pdf, sizeof(float32),
01307                           n_comp, fp, byteswap, &chksum) != n_comp) {
01308                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
01309             }
01310 
01311             /* Normalize and floor */
01312             if (vector_sum_norm(pdf, n_comp) <= 0.0)
01313                 n_err++;
01314             vector_floor(pdf, n_comp, SmoothMin);
01315             vector_sum_norm(pdf, n_comp);
01316 
01317             /* Convert to LOG, quantize, and transpose */
01318             for (c = 0; c < n_comp; c++) {
01319                 int32 qscr;
01320 
01321                 qscr = -logmath_log(s->lmath_8b, pdf[c]);
01322                 if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
01323                     qscr = MAX_NEG_MIXW;
01324                 s->mixw[f][c][i] = qscr;
01325             }
01326         }
01327     }
01328     if (n_err > 0)
01329         E_ERROR("Weight normalization failed for %d senones\n", n_err);
01330 
01331     ckd_free(pdf);
01332 
01333     if (chksum_present)
01334         bio_verify_chksum(fp, byteswap, chksum);
01335 
01336     if (fread(&eofchk, 1, 1, fp) == 1)
01337         E_FATAL("More data than expected in %s\n", file_name);
01338 
01339     fclose(fp);
01340 
01341     E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
01342     return n_sen;
01343 }
01344 
01345 
01346 /* Read a Sphinx3 mean or variance file. */
01347 static int32
01348 s3_read_mgau(s2_semi_mgau_t *s, const char *file_name, float32 ***out_cb)
01349 {
01350     char tmp;
01351     FILE *fp;
01352     int32 i, blk, n;
01353     int32 n_mgau;
01354     int32 n_feat;
01355     int32 n_density;
01356     int32 *veclen;
01357     int32 byteswap, chksum_present;
01358     char **argname, **argval;
01359     uint32 chksum;
01360 
01361     E_INFO("Reading S3 mixture gaussian file '%s'\n", file_name);
01362 
01363     if ((fp = fopen(file_name, "rb")) == NULL)
01364         E_FATAL("fopen(%s,rb) failed\n", file_name);
01365 
01366     /* Read header, including argument-value info and 32-bit byteorder magic */
01367     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
01368         E_FATAL("bio_readhdr(%s) failed\n", file_name);
01369 
01370     /* Parse argument-value list */
01371     chksum_present = 0;
01372     for (i = 0; argname[i]; i++) {
01373         if (strcmp(argname[i], "version") == 0) {
01374             if (strcmp(argval[i], MGAU_PARAM_VERSION) != 0)
01375                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
01376                        file_name, argval[i], MGAU_PARAM_VERSION);
01377         }
01378         else if (strcmp(argname[i], "chksum0") == 0) {
01379             chksum_present = 1; /* Ignore the associated value */
01380         }
01381     }
01382     bio_hdrarg_free(argname, argval);
01383     argname = argval = NULL;
01384 
01385     chksum = 0;
01386 
01387     /* #Codebooks */
01388     if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01389         E_FATAL("fread(%s) (#codebooks) failed\n", file_name);
01390     if (n_mgau != 1) {
01391         E_ERROR("%s: #codebooks (%d) != 1\n", file_name, n_mgau);
01392         fclose(fp);
01393         return -1;
01394     }
01395 
01396     /* #Features/codebook */
01397     if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01398         E_FATAL("fread(%s) (#features) failed\n", file_name);
01399     if (s->n_feat == 0)
01400         s->n_feat = n_feat;
01401     else if (n_feat != s->n_feat)
01402         E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat);
01403 
01404     /* #Gaussian densities/feature in each codebook */
01405     if (bio_fread(&n_density, sizeof(int32), 1, fp,
01406                   byteswap, &chksum) != 1)
01407         E_FATAL("fread(%s) (#density/codebook) failed\n", file_name);
01408     if (s->n_density == 0)
01409         s->n_density = n_density;
01410     else if (n_density != s->n_density)
01411         E_FATAL("%s: Number of densities per feature(%d) != %d\n",
01412                 file_name, n_mgau, s->n_density);
01413 
01414     /* Vector length of feature stream */
01415     if (s->veclen == NULL)
01416         s->veclen = ckd_calloc(s->n_feat, sizeof(int32));
01417     veclen = ckd_calloc(s->n_feat, sizeof(int32));
01418     if (bio_fread(veclen, sizeof(int32), s->n_feat,
01419                   fp, byteswap, &chksum) != s->n_feat)
01420         E_FATAL("fread(%s) (feature vector-length) failed\n", file_name);
01421     for (i = 0, blk = 0; i < s->n_feat; ++i) {
01422         if (s->veclen[i] == 0)
01423             s->veclen[i] = veclen[i];
01424         else if (veclen[i] != s->veclen[i])
01425             E_FATAL("feature stream length %d is inconsistent (%d != %d)\n",
01426                     i, veclen[i], s->veclen[i]);
01427         blk += veclen[i];
01428     }
01429 
01430     /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */
01431     if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01432         E_FATAL("fread(%s) (total #floats) failed\n", file_name);
01433     if (n != n_mgau * n_density * blk)
01434         E_FATAL
01435             ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
01436              file_name, n, n_mgau, n_density, blk);
01437 
01438     *out_cb = ckd_calloc(s->n_feat, sizeof(float32 *));
01439     for (i = 0; i < s->n_feat; ++i) {
01440         (*out_cb)[i] =
01441             (float32 *) ckd_calloc(n_density * veclen[i],
01442                                    sizeof(float32));
01443         if (bio_fread
01444             ((*out_cb)[i], sizeof(float32),
01445              n_density * veclen[i], fp,
01446              byteswap, &chksum) != n_density * veclen[i])
01447             E_FATAL("fread(%s, %d) of feat %d failed\n", file_name,
01448                     n_density * veclen[i], i);
01449     }
01450     ckd_free(veclen);
01451 
01452     if (chksum_present)
01453         bio_verify_chksum(fp, byteswap, chksum);
01454 
01455     if (fread(&tmp, 1, 1, fp) == 1)
01456         E_FATAL("%s: More data than expected\n", file_name);
01457 
01458     fclose(fp);
01459 
01460     E_INFO("%d mixture Gaussians, %d components, %d feature streams, veclen %d\n", n_mgau,
01461            n_density, n_feat, blk);
01462 
01463     return n;
01464 }
01465 
01466 static int32
01467 s3_precomp(s2_semi_mgau_t *s, logmath_t *lmath, float32 vFloor)
01468 {
01469     int feat;
01470 
01471     for (feat = 0; feat < s->n_feat; ++feat) {
01472         float32 *fmp;
01473         mfcc_t *mp;
01474         mfcc_t *vp, *dp;
01475         int32 vecLen, i;
01476 
01477         vecLen = s->veclen[feat];
01478         fmp = (float32 *) s->means[feat];
01479         mp = s->means[feat];
01480         vp = s->vars[feat];
01481         dp = s->dets[feat];
01482 
01483         for (i = 0; i < s->n_density; ++i) {
01484             mfcc_t d;
01485             int32 j;
01486 
01487             d = 0;
01488             for (j = 0; j < vecLen; ++j, ++vp, ++mp, ++fmp) {
01489                 float64 fvar;
01490 
01491                 *mp = FLOAT2MFCC(*fmp);
01492                 /* Always do these pre-calculations in floating point */
01493                 fvar = *(float32 *) vp;
01494                 if (fvar < vFloor)
01495                     fvar = vFloor;
01496                 d += (mfcc_t)logmath_log(lmath, 1 / sqrt(fvar * 2.0 * M_PI));
01497                 *vp = (mfcc_t)logmath_ln_to_log(lmath, 1.0 / (2.0 * fvar));
01498             }
01499             *dp++ = d;
01500         }
01501     }
01502     return 0;
01503 }
01504 
01505 int
01506 split_topn(char const *str, uint8 *out, int nfeat)
01507 {
01508     char *topn_list = ckd_salloc(str);
01509     char *c, *cc;
01510     int i, maxn;
01511 
01512     c = topn_list;
01513     i = 0;
01514     maxn = 0;
01515     while (i < nfeat && (cc = strchr(c, ',')) != NULL) {
01516         *cc = '\0';
01517         out[i] = atoi(c);
01518         if (out[i] > maxn) maxn = out[i];
01519         c = cc + 1;
01520         ++i;
01521     }
01522     if (i < nfeat && *c != '\0') {
01523         out[i] = atoi(c);
01524         if (out[i] > maxn) maxn = out[i];
01525         ++i;
01526     }
01527     while (i < nfeat)
01528         out[i++] = maxn;
01529 
01530     ckd_free(topn_list);
01531     return maxn;
01532 }
01533 
01534 
01535 ps_mgau_t *
01536 s2_semi_mgau_init(acmod_t *acmod)
01537 {
01538     s2_semi_mgau_t *s;
01539     ps_mgau_t *ps;
01540     char const *sendump_path;
01541     float32 **fgau;
01542     int i;
01543 
01544     s = ckd_calloc(1, sizeof(*s));
01545     s->config = acmod->config;
01546 
01547     s->lmath = logmath_retain(acmod->lmath);
01548     /* Log-add table. */
01549     s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
01550     if (s->lmath_8b == NULL) {
01551         s2_semi_mgau_free(ps_mgau_base(s));
01552         return NULL;
01553     }
01554     /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
01555     if (logmath_get_width(s->lmath_8b) != 1) {
01556         E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
01557                 logmath_get_base(s->lmath_8b));
01558         s2_semi_mgau_free(ps_mgau_base(s));
01559         return NULL;
01560     }
01561 
01562     /* Inherit stream dimensions from acmod, will be checked below. */
01563     s->n_feat = feat_dimension1(acmod->fcb);
01564     s->veclen = ckd_calloc(s->n_feat, sizeof(int32));
01565     for (i = 0; i < s->n_feat; ++i)
01566         s->veclen[i] = feat_dimension2(acmod->fcb, i);
01567 
01568     /* Read means and variances. */
01569     if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fgau) < 0) {
01570         s2_semi_mgau_free(ps_mgau_base(s));
01571         return NULL;
01572     }
01573     s->means = (mfcc_t **)fgau;
01574     if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fgau) < 0) {
01575         s2_semi_mgau_free(ps_mgau_base(s));
01576         return NULL;
01577     }
01578     s->vars = (mfcc_t **)fgau;
01579 
01580     /* Precompute (and fixed-point-ize) means, variances, and determinants. */
01581     s->dets = (mfcc_t **)ckd_calloc_2d(s->n_feat, s->n_density, sizeof(**s->dets));
01582     s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor"));
01583 
01584     /* Read mixture weights */
01585     if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) {
01586         if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
01587             s2_semi_mgau_free(ps_mgau_base(s));
01588             return NULL;
01589         }
01590     }
01591     else {
01592         if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
01593                       cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
01594             s2_semi_mgau_free(ps_mgau_base(s));
01595             return NULL;
01596         }
01597     }
01598     s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
01599 
01600     /* Determine top-N for each feature */
01601     s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam));
01602     s->max_topn = cmd_ln_int32_r(s->config, "-topn");
01603     split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat);
01604     E_INFO("Maximum top-N: %d ", s->max_topn);
01605     E_INFOCONT("Top-N beams:");
01606     for (i = 0; i < s->n_feat; ++i) {
01607         E_INFOCONT(" %d", s->topn_beam[i]);
01608     }
01609     E_INFOCONT("\n");
01610 
01611     /* Top-N scores from recent frames */
01612     s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
01613     s->topn_hist = (vqFeature_t ***)
01614         ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn,
01615                       sizeof(***s->topn_hist));
01616     s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat,
01617                                    sizeof(**s->topn_hist_n));
01618     for (i = 0; i < s->n_topn_hist; ++i) {
01619         int j;
01620         for (j = 0; j < s->n_feat; ++j) {
01621             int k;
01622             for (k = 0; k < s->max_topn; ++k) {
01623                 s->topn_hist[i][j][k].score = WORST_DIST;
01624                 s->topn_hist[i][j][k].codeword = k;
01625             }
01626         }
01627     }
01628 
01629     ps = (ps_mgau_t *)s;
01630     ps->vt = &s2_semi_mgau_funcs;
01631     return ps;
01632 }
01633 
01634 int
01635 s2_semi_mgau_mllr_transform(ps_mgau_t *ps,
01636                             ps_mllr_t *mllr)
01637 {
01638     s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
01639     int32 i, d, l, m;
01640     float32 **fmean, *mp, **fvar, *vp;
01641     float64 *temp;
01642 
01643     /* Reload means and variances. */
01644     if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fmean) < 0) {
01645         return -1;
01646     }
01647     if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fvar) < 0) {
01648         return -1;
01649     }
01650 
01651     /* Transform codebook for each stream s */
01652     for (i = 0; i < s->n_feat; i++) {
01653         temp = (float64 *) ckd_calloc(s->veclen[i], sizeof(float64));
01654         mp = fmean[i];
01655         vp = fvar[i];
01656 
01657         /* Transform each density d in selected codebook */
01658         for (d = 0; d < s->n_density; d++) {
01659             for (l = 0; l < s->veclen[i]; l++) {
01660                 temp[l] = 0.0;
01661                 for (m = 0; m < s->veclen[i]; m++) {
01662                     temp[l] += mllr->A[i][0][l][m] * mp[m];
01663                 }
01664                 temp[l] += mllr->b[i][0][l];
01665             }
01666 
01667             for (l = 0; l < s->veclen[i]; l++) {
01668                 mp[l] = (float32) temp[l];
01669                 vp[l] *= mllr->h[i][0][l];
01670             }
01671             mp += s->veclen[i];
01672             vp += s->veclen[i];
01673         }
01674 
01675         ckd_free(temp);
01676     }
01677 
01678     for (i = 0; i < s->n_feat; ++i) {
01679         if (s->means)
01680             ckd_free(s->means[i]);
01681         if (s->vars)
01682             ckd_free(s->vars[i]);
01683     }
01684     ckd_free(s->means);
01685     ckd_free(s->vars);
01686 
01687     s->means = (mfcc_t **)fmean;
01688     s->vars = (mfcc_t **)fvar;
01689     s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor"));
01690 
01691     return 0;
01692 }
01693 
01694 void
01695 s2_semi_mgau_free(ps_mgau_t *ps)
01696 {
01697     s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
01698     uint32 i;
01699 
01700     logmath_free(s->lmath);
01701     logmath_free(s->lmath_8b);
01702     if (s->sendump_mmap) {
01703         ckd_free_2d(s->mixw); 
01704         mmio_file_unmap(s->sendump_mmap);
01705     }
01706     else {
01707         ckd_free_3d(s->mixw);
01708     }
01709     for (i = 0; i < s->n_feat; ++i) {
01710         if (s->means)
01711             ckd_free(s->means[i]);
01712         if (s->vars)
01713             ckd_free(s->vars[i]);
01714     }
01715     for (i = 0; i < s->n_kdtrees; ++i)
01716         free_kd_tree(s->kdtrees[i]);
01717     ckd_free(s->kdtrees);
01718     ckd_free(s->veclen);
01719     ckd_free(s->means);
01720     ckd_free(s->vars);
01721     ckd_free(s->topn_beam);
01722     ckd_free_2d(s->topn_hist_n);
01723     ckd_free_3d((void **)s->topn_hist);
01724     ckd_free_2d((void **)s->dets);
01725     ckd_free(s);
01726 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7