00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #include <stdio.h>
00040 #include <stdlib.h>
00041 #include <string.h>
00042 #include <assert.h>
00043 #include <limits.h>
00044 #include <math.h>
00045 #if defined(__ADSPBLACKFIN__)
00046 #elif !defined(_WIN32_WCE)
00047 #include <sys/types.h>
00048 #endif
00049
00050 #ifndef M_PI
00051 #define M_PI 3.14159265358979323846
00052 #endif
00053
00054
00055 #include <sphinx_config.h>
00056 #include <cmd_ln.h>
00057 #include <fixpoint.h>
00058 #include <ckd_alloc.h>
00059 #include <bio.h>
00060 #include <err.h>
00061 #include <prim_type.h>
00062
00063
00064 #include "s2_semi_mgau.h"
00065 #include "kdtree.h"
00066 #include "posixwin32.h"
00067
00068 static ps_mgaufuncs_t s2_semi_mgau_funcs = {
00069 "s2_semi",
00070 &s2_semi_mgau_frame_eval,
00071 &s2_semi_mgau_mllr_transform,
00072 &s2_semi_mgau_free
00073 };
00074
00075 #define MGAU_MIXW_VERSION "1.0"
00076 #define MGAU_PARAM_VERSION "1.0"
00077 #define NONE -1
00078 #define WORST_DIST (int32)(0x80000000)
00079
00080 struct vqFeature_s {
00081 int32 score;
00082 int32 codeword;
00083 };
00084
00086 #ifdef FIXED_POINT
00087 #define GMMSUB(a,b) \
00088 (((a)-(b) > a) ? (INT_MIN) : ((a)-(b)))
00089
00090 #define GMMADD(a,b) \
00091 (((a)+(b) < a) ? (INT_MAX) : ((a)+(b)))
00092 #else
00093 #define GMMSUB(a,b) ((a)-(b))
00094 #define GMMADD(a,b) ((a)+(b))
00095 #endif
00096
00097 #ifndef MIN
00098 #define MIN(a,b) ((a) < (b) ? (a) : (b))
00099 #endif
00100
00101
00102 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ == 199901L)
00103 #define LOGMATH_INLINE inline
00104 #elif defined(__GNUC__)
00105 #define LOGMATH_INLINE static inline
00106 #elif defined(_MSC_VER)
00107 #define LOGMATH_INLINE __inline
00108 #else
00109 #define LOGMATH_INLINE static
00110 #endif
00111
00112
00113
00114
00115 #define MAX_NEG_MIXW 159
00116 #define MAX_NEG_ASCR 96
00135 LOGMATH_INLINE int
00136 fast_logmath_add(logmath_t *lmath, int mlx, int mly)
00137 {
00138 logadd_t *t = LOGMATH_TABLE(lmath);
00139 int d, r;
00140
00141
00142 if (mlx > mly) {
00143 d = (mlx - mly);
00144 r = mly;
00145 }
00146 else {
00147 d = (mly - mlx);
00148 r = mlx;
00149 }
00150
00151 return r - (((uint8 *)t->table)[d]);
00152 }
00153
00154 static void
00155 eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
00156 {
00157 int32 i, ceplen;
00158 vqFeature_t *topn;
00159
00160 topn = s->f[feat];
00161 ceplen = s->veclen[feat];
00162
00163 for (i = 0; i < s->max_topn; i++) {
00164 mfcc_t *mean, diff, sqdiff, compl;
00165 vqFeature_t vtmp;
00166 mfcc_t *var, d;
00167 mfcc_t *obs;
00168 int32 cw, j;
00169
00170 cw = topn[i].codeword;
00171 mean = s->means[feat] + cw * ceplen;
00172 var = s->vars[feat] + cw * ceplen;
00173 d = s->dets[feat][cw];
00174 obs = z;
00175 for (j = 0; j < ceplen; j++) {
00176 diff = *obs++ - *mean++;
00177 sqdiff = MFCCMUL(diff, diff);
00178 compl = MFCCMUL(sqdiff, *var);
00179 d = GMMSUB(d, compl);
00180 ++var;
00181 }
00182 topn[i].score = (int32)d;
00183 if (i == 0)
00184 continue;
00185 vtmp = topn[i];
00186 for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) {
00187 topn[j + 1] = topn[j];
00188 }
00189 topn[j + 1] = vtmp;
00190 }
00191 }
00192
00193 static void
00194 eval_cb_kdtree(s2_semi_mgau_t *s, int32 feat, mfcc_t *z,
00195 kd_tree_node_t *node, uint32 maxbbi)
00196 {
00197 vqFeature_t *worst, *best, *topn;
00198 int32 i, ceplen;
00199
00200 best = topn = s->f[feat];
00201 worst = topn + (s->max_topn - 1);
00202 ceplen = s->veclen[feat];
00203
00204 for (i = 0; i < maxbbi; ++i) {
00205 mfcc_t *mean, diff, sqdiff, compl;
00206 mfcc_t *var, d;
00207 mfcc_t *obs;
00208 vqFeature_t *cur;
00209 int32 cw, j, k;
00210
00211 cw = node->bbi[i];
00212 mean = s->means[feat] + cw * ceplen;
00213 var = s->vars[feat] + cw * ceplen;
00214 d = s->dets[feat][cw];
00215 obs = z;
00216 for (j = 0; (j < ceplen) && (d >= worst->score); j++) {
00217 diff = *obs++ - *mean++;
00218 sqdiff = MFCCMUL(diff, diff);
00219 compl = MFCCMUL(sqdiff, *var);
00220 d = GMMSUB(d, compl);
00221 ++var;
00222 }
00223 if (j < ceplen)
00224 continue;
00225 if ((int32)d < worst->score)
00226 continue;
00227 for (k = 0; k < s->max_topn; k++) {
00228
00229 if (topn[k].codeword == cw)
00230 break;
00231 }
00232 if (k < s->max_topn)
00233 continue;
00234
00235 for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
00236 memcpy(cur + 1, cur, sizeof(vqFeature_t));
00237 ++cur;
00238 cur->codeword = cw;
00239 cur->score = (int32)d;
00240 }
00241 }
00242
00243 static void
00244 eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
00245 {
00246 vqFeature_t *worst, *best, *topn;
00247 mfcc_t *mean;
00248 mfcc_t *var, *det, *detP, *detE;
00249 int32 i, ceplen;
00250
00251 best = topn = s->f[feat];
00252 worst = topn + (s->max_topn - 1);
00253 mean = s->means[feat];
00254 var = s->vars[feat];
00255 det = s->dets[feat];
00256 detE = det + s->n_density;
00257 ceplen = s->veclen[feat];
00258
00259 for (detP = det; detP < detE; ++detP) {
00260 mfcc_t diff, sqdiff, compl;
00261 mfcc_t d;
00262 mfcc_t *obs;
00263 vqFeature_t *cur;
00264 int32 cw, j;
00265
00266 d = *detP;
00267 obs = z;
00268 cw = detP - det;
00269 for (j = 0; (j < ceplen) && (d >= worst->score); ++j) {
00270 diff = *obs++ - *mean++;
00271 sqdiff = MFCCMUL(diff, diff);
00272 compl = MFCCMUL(sqdiff, *var);
00273 d = GMMSUB(d, compl);
00274 ++var;
00275 }
00276 if (j < ceplen) {
00277
00278 mean += (ceplen - j);
00279 var += (ceplen - j);
00280 continue;
00281 }
00282 if ((int32)d < worst->score)
00283 continue;
00284 for (i = 0; i < s->max_topn; i++) {
00285
00286 if (topn[i].codeword == cw)
00287 break;
00288 }
00289 if (i < s->max_topn)
00290 continue;
00291
00292 for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
00293 memcpy(cur + 1, cur, sizeof(vqFeature_t));
00294 ++cur;
00295 cur->codeword = cw;
00296 cur->score = (int32)d;
00297 }
00298 }
00299
00300 static void
00301 mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z)
00302 {
00303 eval_topn(s, feat, z);
00304
00305
00306 if (frame % s->ds_ratio)
00307 return;
00308
00309
00310 if (s->kdtrees) {
00311 kd_tree_node_t *node;
00312 uint32 maxbbi;
00313
00314 node =
00315 eval_kd_tree(s->kdtrees[feat], z, s->kd_maxdepth);
00316 maxbbi = s->kd_maxbbi == -1 ? node->n_bbi : MIN(node->n_bbi,
00317 s->
00318 kd_maxbbi);
00319 eval_cb_kdtree(s, feat, z, node, maxbbi);
00320 }
00321 else {
00322 eval_cb(s, feat, z);
00323 }
00324 }
00325
00326 static int
00327 mgau_norm(s2_semi_mgau_t *s, int feat)
00328 {
00329 int32 norm;
00330 int j;
00331
00332
00333 norm = s->f[feat][0].score >> SENSCR_SHIFT;
00334
00335
00336 for (j = 0; j < s->max_topn; ++j) {
00337 s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm);
00338 if (s->f[feat][j].score > MAX_NEG_ASCR)
00339 s->f[feat][j].score = MAX_NEG_ASCR;
00340 if (s->topn_beam[feat] && s->f[feat][j].score > s->topn_beam[feat])
00341 break;
00342 }
00343 return j;
00344 }
00345
00346 static int32
00347 get_scores_8b_feat_6(s2_semi_mgau_t * s, int i,
00348 int16 *senone_scores, uint8 *senone_active,
00349 int32 n_senone_active)
00350 {
00351 int32 j, l;
00352 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
00353
00354 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00355 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00356 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00357 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00358 pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00359 pid_cw5 = s->mixw[i][s->f[i][5].codeword];
00360
00361 for (l = j = 0; j < n_senone_active; j++) {
00362 int sen = senone_active[j] + l;
00363 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00364
00365 tmp = fast_logmath_add(s->lmath_8b, tmp,
00366 pid_cw1[sen] + s->f[i][1].score);
00367 tmp = fast_logmath_add(s->lmath_8b, tmp,
00368 pid_cw2[sen] + s->f[i][2].score);
00369 tmp = fast_logmath_add(s->lmath_8b, tmp,
00370 pid_cw3[sen] + s->f[i][3].score);
00371 tmp = fast_logmath_add(s->lmath_8b, tmp,
00372 pid_cw4[sen] + s->f[i][4].score);
00373 tmp = fast_logmath_add(s->lmath_8b, tmp,
00374 pid_cw5[sen] + s->f[i][5].score);
00375
00376 senone_scores[sen] += tmp;
00377 l = sen;
00378 }
00379 return 0;
00380 }
00381
00382 static int32
00383 get_scores_8b_feat_5(s2_semi_mgau_t * s, int i,
00384 int16 *senone_scores, uint8 *senone_active,
00385 int32 n_senone_active)
00386 {
00387 int32 j, l;
00388 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
00389
00390 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00391 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00392 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00393 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00394 pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00395
00396 for (l = j = 0; j < n_senone_active; j++) {
00397 int sen = senone_active[j] + l;
00398 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00399
00400 tmp = fast_logmath_add(s->lmath_8b, tmp,
00401 pid_cw1[sen] + s->f[i][1].score);
00402 tmp = fast_logmath_add(s->lmath_8b, tmp,
00403 pid_cw2[sen] + s->f[i][2].score);
00404 tmp = fast_logmath_add(s->lmath_8b, tmp,
00405 pid_cw3[sen] + s->f[i][3].score);
00406 tmp = fast_logmath_add(s->lmath_8b, tmp,
00407 pid_cw4[sen] + s->f[i][4].score);
00408
00409 senone_scores[sen] += tmp;
00410 l = sen;
00411 }
00412 return 0;
00413 }
00414
00415 static int32
00416 get_scores_8b_feat_4(s2_semi_mgau_t * s, int i,
00417 int16 *senone_scores, uint8 *senone_active,
00418 int32 n_senone_active)
00419 {
00420 int32 j, l;
00421 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00422
00423 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00424 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00425 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00426 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00427
00428 for (l = j = 0; j < n_senone_active; j++) {
00429 int sen = senone_active[j] + l;
00430 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00431
00432 tmp = fast_logmath_add(s->lmath_8b, tmp,
00433 pid_cw1[sen] + s->f[i][1].score);
00434 tmp = fast_logmath_add(s->lmath_8b, tmp,
00435 pid_cw2[sen] + s->f[i][2].score);
00436 tmp = fast_logmath_add(s->lmath_8b, tmp,
00437 pid_cw3[sen] + s->f[i][3].score);
00438
00439 senone_scores[sen] += tmp;
00440 l = sen;
00441 }
00442 return 0;
00443 }
00444
00445 static int32
00446 get_scores_8b_feat_3(s2_semi_mgau_t * s, int i,
00447 int16 *senone_scores, uint8 *senone_active,
00448 int32 n_senone_active)
00449 {
00450 int32 j, l;
00451 uint8 *pid_cw0, *pid_cw1, *pid_cw2;
00452
00453 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00454 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00455 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00456
00457 for (l = j = 0; j < n_senone_active; j++) {
00458 int sen = senone_active[j] + l;
00459 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00460
00461 tmp = fast_logmath_add(s->lmath_8b, tmp,
00462 pid_cw1[sen] + s->f[i][1].score);
00463 tmp = fast_logmath_add(s->lmath_8b, tmp,
00464 pid_cw2[sen] + s->f[i][2].score);
00465
00466 senone_scores[sen] += tmp;
00467 l = sen;
00468 }
00469 return 0;
00470 }
00471
00472 static int32
00473 get_scores_8b_feat_2(s2_semi_mgau_t * s, int i,
00474 int16 *senone_scores, uint8 *senone_active,
00475 int32 n_senone_active)
00476 {
00477 int32 j, l;
00478 uint8 *pid_cw0, *pid_cw1;
00479
00480 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00481 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00482
00483 for (l = j = 0; j < n_senone_active; j++) {
00484 int sen = senone_active[j] + l;
00485 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00486
00487 tmp = fast_logmath_add(s->lmath_8b, tmp,
00488 pid_cw1[sen] + s->f[i][1].score);
00489
00490 senone_scores[sen] += tmp;
00491 l = sen;
00492 }
00493 return 0;
00494 }
00495
00496 static int32
00497 get_scores_8b_feat_1(s2_semi_mgau_t * s, int i,
00498 int16 *senone_scores, uint8 *senone_active,
00499 int32 n_senone_active)
00500 {
00501 int32 j, l;
00502 uint8 *pid_cw0;
00503
00504 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00505 for (l = j = 0; j < n_senone_active; j++) {
00506 int sen = senone_active[j] + l;
00507 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00508 senone_scores[sen] += tmp;
00509 l = sen;
00510 }
00511 return 0;
00512 }
00513
00514 static int32
00515 get_scores_8b_feat_any(s2_semi_mgau_t * s, int i, int topn,
00516 int16 *senone_scores, uint8 *senone_active,
00517 int32 n_senone_active)
00518 {
00519 int32 j, k, l;
00520
00521 for (l = j = 0; j < n_senone_active; j++) {
00522 int sen = senone_active[j] + l;
00523 uint8 *pid_cw;
00524 int32 tmp;
00525 pid_cw = s->mixw[i][s->f[i][0].codeword];
00526 tmp = pid_cw[sen] + s->f[i][0].score;
00527 for (k = 1; k < topn; ++k) {
00528 pid_cw = s->mixw[i][s->f[i][k].codeword];
00529 tmp = fast_logmath_add(s->lmath_8b, tmp,
00530 pid_cw[sen] + s->f[i][k].score);
00531 }
00532 senone_scores[sen] += tmp;
00533 l = sen;
00534 }
00535 return 0;
00536 }
00537
00538 static int32
00539 get_scores_8b_feat(s2_semi_mgau_t * s, int i, int topn,
00540 int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
00541 {
00542 switch (topn) {
00543 case 6:
00544 return get_scores_8b_feat_6(s, i, senone_scores,
00545 senone_active, n_senone_active);
00546 case 5:
00547 return get_scores_8b_feat_5(s, i, senone_scores,
00548 senone_active, n_senone_active);
00549 case 4:
00550 return get_scores_8b_feat_4(s, i, senone_scores,
00551 senone_active, n_senone_active);
00552 case 3:
00553 return get_scores_8b_feat_3(s, i, senone_scores,
00554 senone_active, n_senone_active);
00555 case 2:
00556 return get_scores_8b_feat_2(s, i, senone_scores,
00557 senone_active, n_senone_active);
00558 case 1:
00559 return get_scores_8b_feat_1(s, i, senone_scores,
00560 senone_active, n_senone_active);
00561 default:
00562 return get_scores_8b_feat_any(s, i, topn, senone_scores,
00563 senone_active, n_senone_active);
00564 }
00565 }
00566
00567 static int32
00568 get_scores_8b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
00569 {
00570 int32 j, k;
00571
00572 for (j = 0; j < s->n_sen; j++) {
00573 uint8 *pid_cw;
00574 int32 tmp;
00575 pid_cw = s->mixw[i][s->f[i][0].codeword];
00576 tmp = pid_cw[j] + s->f[i][0].score;
00577 for (k = 1; k < topn; ++k) {
00578 pid_cw = s->mixw[i][s->f[i][k].codeword];
00579 tmp = fast_logmath_add(s->lmath_8b, tmp,
00580 pid_cw[j] + s->f[i][k].score);
00581 }
00582 senone_scores[j] += tmp;
00583 }
00584 return 0;
00585 }
00586
00587 static int32
00588 get_scores_4b_feat_6(s2_semi_mgau_t * s, int i,
00589 int16 *senone_scores, uint8 *senone_active,
00590 int32 n_senone_active)
00591 {
00592 int32 j, l;
00593 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
00594 uint8 w_den[6][16];
00595
00596
00597 for (j = 0; j < 16; ++j) {
00598 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00599 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00600 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00601 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00602 w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
00603 w_den[5][j] = s->mixw_cb[j] + s->f[i][5].score;
00604 }
00605
00606 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00607 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00608 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00609 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00610 pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00611 pid_cw5 = s->mixw[i][s->f[i][5].codeword];
00612
00613 for (l = j = 0; j < n_senone_active; j++) {
00614 int n = senone_active[j] + l;
00615 int tmp, cw;
00616
00617 if (n & 1) {
00618 cw = pid_cw0[n/2] >> 4;
00619 tmp = w_den[0][cw];
00620 cw = pid_cw1[n/2] >> 4;
00621 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00622 cw = pid_cw2[n/2] >> 4;
00623 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00624 cw = pid_cw3[n/2] >> 4;
00625 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00626 cw = pid_cw4[n/2] >> 4;
00627 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00628 cw = pid_cw5[n/2] >> 4;
00629 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
00630 }
00631 else {
00632 cw = pid_cw0[n/2] & 0x0f;
00633 tmp = w_den[0][cw];
00634 cw = pid_cw1[n/2] & 0x0f;
00635 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00636 cw = pid_cw2[n/2] & 0x0f;
00637 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00638 cw = pid_cw3[n/2] & 0x0f;
00639 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00640 cw = pid_cw4[n/2] & 0x0f;
00641 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00642 cw = pid_cw5[n/2] & 0x0f;
00643 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
00644 }
00645 senone_scores[n] += tmp;
00646 l = n;
00647 }
00648 return 0;
00649 }
00650
00651 static int32
00652 get_scores_4b_feat_5(s2_semi_mgau_t * s, int i,
00653 int16 *senone_scores, uint8 *senone_active,
00654 int32 n_senone_active)
00655 {
00656 int32 j, l;
00657 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
00658 uint8 w_den[5][16];
00659
00660
00661 for (j = 0; j < 16; ++j) {
00662 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00663 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00664 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00665 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00666 w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
00667 }
00668
00669 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00670 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00671 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00672 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00673 pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00674
00675 for (l = j = 0; j < n_senone_active; j++) {
00676 int n = senone_active[j] + l;
00677 int tmp, cw;
00678
00679 if (n & 1) {
00680 cw = pid_cw0[n/2] >> 4;
00681 tmp = w_den[0][cw];
00682 cw = pid_cw1[n/2] >> 4;
00683 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00684 cw = pid_cw2[n/2] >> 4;
00685 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00686 cw = pid_cw3[n/2] >> 4;
00687 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00688 cw = pid_cw4[n/2] >> 4;
00689 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00690 }
00691 else {
00692 cw = pid_cw0[n/2] & 0x0f;
00693 tmp = w_den[0][cw];
00694 cw = pid_cw1[n/2] & 0x0f;
00695 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00696 cw = pid_cw2[n/2] & 0x0f;
00697 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00698 cw = pid_cw3[n/2] & 0x0f;
00699 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00700 cw = pid_cw4[n/2] & 0x0f;
00701 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00702 }
00703 senone_scores[n] += tmp;
00704 l = n;
00705 }
00706 return 0;
00707 }
00708
00709 static int32
00710 get_scores_4b_feat_4(s2_semi_mgau_t * s, int i,
00711 int16 *senone_scores, uint8 *senone_active,
00712 int32 n_senone_active)
00713 {
00714 int32 j, l;
00715 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00716 uint8 w_den[4][16];
00717
00718
00719 for (j = 0; j < 16; ++j) {
00720 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00721 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00722 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00723 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00724 }
00725
00726 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00727 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00728 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00729 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00730
00731 for (l = j = 0; j < n_senone_active; j++) {
00732 int n = senone_active[j] + l;
00733 int tmp, cw;
00734
00735 if (n & 1) {
00736 cw = pid_cw0[n/2] >> 4;
00737 tmp = w_den[0][cw];
00738 cw = pid_cw1[n/2] >> 4;
00739 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00740 cw = pid_cw2[n/2] >> 4;
00741 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00742 cw = pid_cw3[n/2] >> 4;
00743 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00744 }
00745 else {
00746 cw = pid_cw0[n/2] & 0x0f;
00747 tmp = w_den[0][cw];
00748 cw = pid_cw1[n/2] & 0x0f;
00749 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00750 cw = pid_cw2[n/2] & 0x0f;
00751 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00752 cw = pid_cw3[n/2] & 0x0f;
00753 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00754 }
00755 senone_scores[n] += tmp;
00756 l = n;
00757 }
00758 return 0;
00759 }
00760
00761 static int32
00762 get_scores_4b_feat_3(s2_semi_mgau_t * s, int i,
00763 int16 *senone_scores, uint8 *senone_active,
00764 int32 n_senone_active)
00765 {
00766 int32 j, l;
00767 uint8 *pid_cw0, *pid_cw1, *pid_cw2;
00768 uint8 w_den[3][16];
00769
00770
00771 for (j = 0; j < 16; ++j) {
00772 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00773 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00774 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00775 }
00776
00777 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00778 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00779 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00780
00781 for (l = j = 0; j < n_senone_active; j++) {
00782 int n = senone_active[j] + l;
00783 int tmp, cw;
00784
00785 if (n & 1) {
00786 cw = pid_cw0[n/2] >> 4;
00787 tmp = w_den[0][cw];
00788 cw = pid_cw1[n/2] >> 4;
00789 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00790 cw = pid_cw2[n/2] >> 4;
00791 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00792 }
00793 else {
00794 cw = pid_cw0[n/2] & 0x0f;
00795 tmp = w_den[0][cw];
00796 cw = pid_cw1[n/2] & 0x0f;
00797 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00798 cw = pid_cw2[n/2] & 0x0f;
00799 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00800 }
00801 senone_scores[n] += tmp;
00802 l = n;
00803 }
00804 return 0;
00805 }
00806
00807 static int32
00808 get_scores_4b_feat_2(s2_semi_mgau_t * s, int i,
00809 int16 *senone_scores, uint8 *senone_active,
00810 int32 n_senone_active)
00811 {
00812 int32 j, l;
00813 uint8 *pid_cw0, *pid_cw1;
00814 uint8 w_den[2][16];
00815
00816
00817 for (j = 0; j < 16; ++j) {
00818 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00819 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00820 }
00821
00822 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00823 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00824
00825 for (l = j = 0; j < n_senone_active; j++) {
00826 int n = senone_active[j] + l;
00827 int tmp, cw;
00828
00829 if (n & 1) {
00830 cw = pid_cw0[n/2] >> 4;
00831 tmp = w_den[0][cw];
00832 cw = pid_cw1[n/2] >> 4;
00833 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00834 }
00835 else {
00836 cw = pid_cw0[n/2] & 0x0f;
00837 tmp = w_den[0][cw];
00838 cw = pid_cw1[n/2] & 0x0f;
00839 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00840 }
00841 senone_scores[n] += tmp;
00842 l = n;
00843 }
00844 return 0;
00845 }
00846
00847 static int32
00848 get_scores_4b_feat_1(s2_semi_mgau_t * s, int i,
00849 int16 *senone_scores, uint8 *senone_active,
00850 int32 n_senone_active)
00851 {
00852 int32 j, l;
00853 uint8 *pid_cw0;
00854 uint8 w_den[16];
00855
00856
00857 for (j = 0; j < 16; ++j) {
00858 w_den[j] = s->mixw_cb[j] + s->f[i][0].score;
00859 }
00860
00861 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00862
00863 for (l = j = 0; j < n_senone_active; j++) {
00864 int n = senone_active[j] + l;
00865 int tmp, cw;
00866
00867 if (n & 1) {
00868 cw = pid_cw0[n/2] >> 4;
00869 tmp = w_den[cw];
00870 }
00871 else {
00872 cw = pid_cw0[n/2] & 0x0f;
00873 tmp = w_den[cw];
00874 }
00875 senone_scores[n] += tmp;
00876 l = n;
00877 }
00878 return 0;
00879 }
00880
00881 static int32
00882 get_scores_4b_feat_any(s2_semi_mgau_t * s, int i, int topn,
00883 int16 *senone_scores, uint8 *senone_active,
00884 int32 n_senone_active)
00885 {
00886 int32 j, k, l;
00887
00888 for (l = j = 0; j < n_senone_active; j++) {
00889 int n = senone_active[j] + l;
00890 int tmp, cw;
00891 uint8 *pid_cw;
00892
00893 pid_cw = s->mixw[i][s->f[i][0].codeword];
00894 if (n & 1)
00895 cw = pid_cw[n/2] >> 4;
00896 else
00897 cw = pid_cw[n/2] & 0x0f;
00898 tmp = s->mixw_cb[cw] + s->f[i][0].score;
00899 for (k = 1; k < topn; ++k) {
00900 pid_cw = s->mixw[i][s->f[i][k].codeword];
00901 if (n & 1)
00902 cw = pid_cw[n/2] >> 4;
00903 else
00904 cw = pid_cw[n/2] & 0x0f;
00905 tmp = fast_logmath_add(s->lmath_8b, tmp,
00906 s->mixw_cb[cw] + s->f[i][k].score);
00907 }
00908 senone_scores[n] += tmp;
00909 l = n;
00910 }
00911 return 0;
00912 }
00913
00914 static int32
00915 get_scores_4b_feat(s2_semi_mgau_t * s, int i, int topn,
00916 int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
00917 {
00918 switch (topn) {
00919 case 6:
00920 return get_scores_4b_feat_6(s, i, senone_scores,
00921 senone_active, n_senone_active);
00922 case 5:
00923 return get_scores_4b_feat_5(s, i, senone_scores,
00924 senone_active, n_senone_active);
00925 case 4:
00926 return get_scores_4b_feat_4(s, i, senone_scores,
00927 senone_active, n_senone_active);
00928 case 3:
00929 return get_scores_4b_feat_3(s, i, senone_scores,
00930 senone_active, n_senone_active);
00931 case 2:
00932 return get_scores_4b_feat_2(s, i, senone_scores,
00933 senone_active, n_senone_active);
00934 case 1:
00935 return get_scores_4b_feat_1(s, i, senone_scores,
00936 senone_active, n_senone_active);
00937 default:
00938 return get_scores_4b_feat_any(s, i, topn, senone_scores,
00939 senone_active, n_senone_active);
00940 }
00941 }
00942
00943 static int32
00944 get_scores_4b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
00945 {
00946 int32 j, k;
00947
00948 for (j = 0; j < s->n_sen; j++) {
00949 uint8 *pid_cw;
00950 int32 tmp;
00951 pid_cw = s->mixw[i][s->f[i][0].codeword];
00952 tmp = pid_cw[j] + s->f[i][0].score;
00953 for (k = 1; k < topn; ++k) {
00954 pid_cw = s->mixw[i][s->f[i][k].codeword];
00955 tmp = fast_logmath_add(s->lmath_8b, tmp,
00956 pid_cw[j] + s->f[i][k].score);
00957 }
00958 senone_scores[j] += tmp;
00959 }
00960 return 0;
00961 }
00962
00963
00964
00965
00966 int32
00967 s2_semi_mgau_frame_eval(ps_mgau_t *ps,
00968 int16 *senone_scores,
00969 uint8 *senone_active,
00970 int32 n_senone_active,
00971 mfcc_t ** featbuf, int32 frame,
00972 int32 compallsen)
00973 {
00974 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
00975 int i, topn_idx;
00976
00977 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00978
00979
00980
00981 topn_idx = frame % s->n_topn_hist;
00982 s->f = s->topn_hist[topn_idx];
00983 for (i = 0; i < s->n_feat; ++i) {
00984
00985 if (frame >= ps_mgau_base(ps)->frame_idx) {
00986 vqFeature_t **lastf;
00987 if (topn_idx == 0)
00988 lastf = s->topn_hist[s->n_topn_hist-1];
00989 else
00990 lastf = s->topn_hist[topn_idx-1];
00991 memcpy(s->f[i], lastf[i], sizeof(vqFeature_t) * s->max_topn);
00992 mgau_dist(s, frame, i, featbuf[i]);
00993 s->topn_hist_n[topn_idx][i] = mgau_norm(s, i);
00994 }
00995 if (s->mixw_cb) {
00996 if (compallsen)
00997 get_scores_4b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
00998 else
00999 get_scores_4b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
01000 senone_active, n_senone_active);
01001 }
01002 else {
01003 if (compallsen)
01004 get_scores_8b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
01005 else
01006 get_scores_8b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
01007 senone_active, n_senone_active);
01008 }
01009 }
01010
01011 return 0;
01012 }
01013
01014 int32
01015 s2_semi_mgau_load_kdtree(ps_mgau_t * ps, const char *kdtree_path,
01016 uint32 maxdepth, int32 maxbbi)
01017 {
01018 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
01019 if (read_kd_trees(kdtree_path, &s->kdtrees, &s->n_kdtrees,
01020 maxdepth, maxbbi) == -1)
01021 E_FATAL("Failed to read kd-trees from %s\n", kdtree_path);
01022 if (s->n_kdtrees != s->n_feat)
01023 E_FATAL("Number of kd-trees != %d\n", s->n_feat);
01024
01025 s->kd_maxdepth = maxdepth;
01026 s->kd_maxbbi = maxbbi;
01027 return 0;
01028 }
01029
01030 static int32
01031 read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file)
01032 {
01033 FILE *fp;
01034 char line[1000];
01035 int32 i, n, r, c;
01036 int32 do_swap, do_mmap;
01037 size_t filesize, offset;
01038 int n_clust = 0;
01039 int n_feat = s->n_feat;
01040 int n_density = s->n_density;
01041 int n_sen = bin_mdef_n_sen(mdef);
01042 int n_bits = 8;
01043
01044 s->n_sen = n_sen;
01045 do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
01046
01047 if ((fp = fopen(file, "rb")) == NULL)
01048 return -1;
01049
01050 E_INFO("Loading senones from dump file %s\n", file);
01051
01052 if (fread(&n, sizeof(int32), 1, fp) != 1) {
01053 E_ERROR_SYSTEM("Failed to read title size from %s", file);
01054 goto error_out;
01055 }
01056
01057 do_swap = 0;
01058 if (n < 1 || n > 999) {
01059 SWAP_INT32(&n);
01060 if (n < 1 || n > 999) {
01061 E_ERROR("Title length %x in dump file %s out of range\n", n, file);
01062 goto error_out;
01063 }
01064 do_swap = 1;
01065 }
01066 if (fread(line, sizeof(char), n, fp) != n) {
01067 E_ERROR_SYSTEM("Cannot read title");
01068 goto error_out;
01069 }
01070 if (line[n - 1] != '\0') {
01071 E_ERROR("Bad title in dump file\n");
01072 goto error_out;
01073 }
01074 E_INFO("%s\n", line);
01075
01076
01077 if (fread(&n, sizeof(n), 1, fp) != 1) {
01078 E_ERROR_SYSTEM("Failed to read header size from %s", file);
01079 goto error_out;
01080 }
01081 if (do_swap) SWAP_INT32(&n);
01082 if (fread(line, sizeof(char), n, fp) != n) {
01083 E_ERROR_SYSTEM("Cannot read header");
01084 goto error_out;
01085 }
01086 if (line[n - 1] != '\0') {
01087 E_ERROR("Bad header in dump file\n");
01088 goto error_out;
01089 }
01090
01091
01092 for (;;) {
01093 if (fread(&n, sizeof(n), 1, fp) != 1) {
01094 E_ERROR_SYSTEM("Failed to read header string size from %s", file);
01095 goto error_out;
01096 }
01097 if (do_swap) SWAP_INT32(&n);
01098 if (n == 0)
01099 break;
01100 if (fread(line, sizeof(char), n, fp) != n) {
01101 E_ERROR_SYSTEM("Cannot read header");
01102 goto error_out;
01103 }
01104
01105 if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
01106 n_feat = atoi(line + strlen("feature_count "));
01107 }
01108 if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
01109 n_density = atoi(line + strlen("mixture_count "));
01110 }
01111 if (!strncmp(line, "model_count ", strlen("model_count "))) {
01112 n_sen = atoi(line + strlen("model_count "));
01113 }
01114 if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
01115 n_clust = atoi(line + strlen("cluster_count "));
01116 }
01117 if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
01118 n_bits = atoi(line + strlen("cluster_bits "));
01119 }
01120 }
01121
01122
01123 c = n_sen;
01124 r = n_density;
01125 if (n_clust == 0) {
01126
01127 if (fread(&r, sizeof(r), 1, fp) != 1) {
01128 E_ERROR_SYSTEM("Cannot read #rows");
01129 goto error_out;
01130 }
01131 if (do_swap) SWAP_INT32(&r);
01132 if (fread(&c, sizeof(c), 1, fp) != 1) {
01133 E_ERROR_SYSTEM("Cannot read #columns");
01134 goto error_out;
01135 }
01136 if (do_swap) SWAP_INT32(&c);
01137 E_INFO("Rows: %d, Columns: %d\n", r, c);
01138 }
01139
01140 if (n_feat != s->n_feat) {
01141 E_ERROR("Number of feature streams mismatch: %d != %d\n",
01142 n_feat, s->n_feat);
01143 goto error_out;
01144 }
01145 if (n_density != s->n_density) {
01146 E_ERROR("Number of densities mismatch: %d != %d\n",
01147 n_density, s->n_density);
01148 goto error_out;
01149 }
01150 if (n_sen != s->n_sen) {
01151 E_ERROR("Number of senones mismatch: %d != %d\n",
01152 n_sen, s->n_sen);
01153 goto error_out;
01154 }
01155
01156 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
01157 E_ERROR("Cluster count must be 0, 15, or 16\n");
01158 goto error_out;
01159 }
01160 if (n_clust == 15)
01161 ++n_clust;
01162
01163 if (!((n_bits == 8) || (n_bits == 4))) {
01164 E_ERROR("Cluster count must be 4 or 8\n");
01165 goto error_out;
01166 }
01167
01168 if (do_mmap) {
01169 E_INFO("Using memory-mapped I/O for senones\n");
01170 }
01171 offset = ftell(fp);
01172 fseek(fp, 0, SEEK_END);
01173 filesize = ftell(fp);
01174 fseek(fp, offset, SEEK_SET);
01175
01176
01177 if (do_mmap) {
01178 s->sendump_mmap = mmio_file_read(file);
01179
01180 if (n_clust) {
01181 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
01182 offset += n_clust;
01183 }
01184 }
01185 else {
01186
01187 if (n_clust) {
01188 s->mixw_cb = ckd_calloc(1, n_clust);
01189 if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
01190 E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
01191 goto error_out;
01192 }
01193 }
01194 }
01195
01196
01197 if (s->sendump_mmap) {
01198 s->mixw = ckd_calloc_2d(s->n_feat, n_density, sizeof(*s->mixw));
01199 for (n = 0; n < n_feat; n++) {
01200 int step = c;
01201 if (n_bits == 4)
01202 step = (step + 1) / 2;
01203 for (i = 0; i < r; i++) {
01204 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
01205 offset += step;
01206 }
01207 }
01208 }
01209 else {
01210 s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
01211
01212 for (n = 0; n < n_feat; n++) {
01213 int step = c;
01214 if (n_bits == 4)
01215 step = (step + 1) / 2;
01216 for (i = 0; i < r; i++) {
01217 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
01218 != (size_t) step) {
01219 E_ERROR("Failed to read %d bytes from sendump\n", step);
01220 goto error_out;
01221 }
01222 }
01223 }
01224 }
01225
01226 fclose(fp);
01227 return 0;
01228 error_out:
01229 fclose(fp);
01230 return -1;
01231 }
01232
01233 static int32
01234 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin)
01235 {
01236 char **argname, **argval;
01237 char eofchk;
01238 FILE *fp;
01239 int32 byteswap, chksum_present;
01240 uint32 chksum;
01241 float32 *pdf;
01242 int32 i, f, c, n;
01243 int32 n_sen;
01244 int32 n_feat;
01245 int32 n_comp;
01246 int32 n_err;
01247
01248 E_INFO("Reading mixture weights file '%s'\n", file_name);
01249
01250 if ((fp = fopen(file_name, "rb")) == NULL)
01251 E_FATAL("fopen(%s,rb) failed\n", file_name);
01252
01253
01254 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
01255 E_FATAL("bio_readhdr(%s) failed\n", file_name);
01256
01257
01258 chksum_present = 0;
01259 for (i = 0; argname[i]; i++) {
01260 if (strcmp(argname[i], "version") == 0) {
01261 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
01262 E_WARN("Version mismatch(%s): %s, expecting %s\n",
01263 file_name, argval[i], MGAU_MIXW_VERSION);
01264 }
01265 else if (strcmp(argname[i], "chksum0") == 0) {
01266 chksum_present = 1;
01267 }
01268 }
01269 bio_hdrarg_free(argname, argval);
01270 argname = argval = NULL;
01271
01272 chksum = 0;
01273
01274
01275 if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01276 || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
01277 1)
01278 || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
01279 1)
01280 || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
01281 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
01282 }
01283 if (n_feat != s->n_feat)
01284 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat);
01285 if (n != n_sen * n_feat * n_comp) {
01286 E_FATAL
01287 ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
01288 file_name, i, n_sen, n_feat, n_comp);
01289 }
01290
01291
01292
01293
01294 s->n_sen = n_sen;
01295
01296
01297 s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw));
01298
01299
01300 pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
01301
01302
01303 n_err = 0;
01304 for (i = 0; i < n_sen; i++) {
01305 for (f = 0; f < n_feat; f++) {
01306 if (bio_fread((void *) pdf, sizeof(float32),
01307 n_comp, fp, byteswap, &chksum) != n_comp) {
01308 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
01309 }
01310
01311
01312 if (vector_sum_norm(pdf, n_comp) <= 0.0)
01313 n_err++;
01314 vector_floor(pdf, n_comp, SmoothMin);
01315 vector_sum_norm(pdf, n_comp);
01316
01317
01318 for (c = 0; c < n_comp; c++) {
01319 int32 qscr;
01320
01321 qscr = -logmath_log(s->lmath_8b, pdf[c]);
01322 if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
01323 qscr = MAX_NEG_MIXW;
01324 s->mixw[f][c][i] = qscr;
01325 }
01326 }
01327 }
01328 if (n_err > 0)
01329 E_ERROR("Weight normalization failed for %d senones\n", n_err);
01330
01331 ckd_free(pdf);
01332
01333 if (chksum_present)
01334 bio_verify_chksum(fp, byteswap, chksum);
01335
01336 if (fread(&eofchk, 1, 1, fp) == 1)
01337 E_FATAL("More data than expected in %s\n", file_name);
01338
01339 fclose(fp);
01340
01341 E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
01342 return n_sen;
01343 }
01344
01345
01346
01347 static int32
01348 s3_read_mgau(s2_semi_mgau_t *s, const char *file_name, float32 ***out_cb)
01349 {
01350 char tmp;
01351 FILE *fp;
01352 int32 i, blk, n;
01353 int32 n_mgau;
01354 int32 n_feat;
01355 int32 n_density;
01356 int32 *veclen;
01357 int32 byteswap, chksum_present;
01358 char **argname, **argval;
01359 uint32 chksum;
01360
01361 E_INFO("Reading S3 mixture gaussian file '%s'\n", file_name);
01362
01363 if ((fp = fopen(file_name, "rb")) == NULL)
01364 E_FATAL("fopen(%s,rb) failed\n", file_name);
01365
01366
01367 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
01368 E_FATAL("bio_readhdr(%s) failed\n", file_name);
01369
01370
01371 chksum_present = 0;
01372 for (i = 0; argname[i]; i++) {
01373 if (strcmp(argname[i], "version") == 0) {
01374 if (strcmp(argval[i], MGAU_PARAM_VERSION) != 0)
01375 E_WARN("Version mismatch(%s): %s, expecting %s\n",
01376 file_name, argval[i], MGAU_PARAM_VERSION);
01377 }
01378 else if (strcmp(argname[i], "chksum0") == 0) {
01379 chksum_present = 1;
01380 }
01381 }
01382 bio_hdrarg_free(argname, argval);
01383 argname = argval = NULL;
01384
01385 chksum = 0;
01386
01387
01388 if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01389 E_FATAL("fread(%s) (#codebooks) failed\n", file_name);
01390 if (n_mgau != 1) {
01391 E_ERROR("%s: #codebooks (%d) != 1\n", file_name, n_mgau);
01392 fclose(fp);
01393 return -1;
01394 }
01395
01396
01397 if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01398 E_FATAL("fread(%s) (#features) failed\n", file_name);
01399 if (s->n_feat == 0)
01400 s->n_feat = n_feat;
01401 else if (n_feat != s->n_feat)
01402 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat);
01403
01404
01405 if (bio_fread(&n_density, sizeof(int32), 1, fp,
01406 byteswap, &chksum) != 1)
01407 E_FATAL("fread(%s) (#density/codebook) failed\n", file_name);
01408 if (s->n_density == 0)
01409 s->n_density = n_density;
01410 else if (n_density != s->n_density)
01411 E_FATAL("%s: Number of densities per feature(%d) != %d\n",
01412 file_name, n_mgau, s->n_density);
01413
01414
01415 if (s->veclen == NULL)
01416 s->veclen = ckd_calloc(s->n_feat, sizeof(int32));
01417 veclen = ckd_calloc(s->n_feat, sizeof(int32));
01418 if (bio_fread(veclen, sizeof(int32), s->n_feat,
01419 fp, byteswap, &chksum) != s->n_feat)
01420 E_FATAL("fread(%s) (feature vector-length) failed\n", file_name);
01421 for (i = 0, blk = 0; i < s->n_feat; ++i) {
01422 if (s->veclen[i] == 0)
01423 s->veclen[i] = veclen[i];
01424 else if (veclen[i] != s->veclen[i])
01425 E_FATAL("feature stream length %d is inconsistent (%d != %d)\n",
01426 i, veclen[i], s->veclen[i]);
01427 blk += veclen[i];
01428 }
01429
01430
01431 if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01432 E_FATAL("fread(%s) (total #floats) failed\n", file_name);
01433 if (n != n_mgau * n_density * blk)
01434 E_FATAL
01435 ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
01436 file_name, n, n_mgau, n_density, blk);
01437
01438 *out_cb = ckd_calloc(s->n_feat, sizeof(float32 *));
01439 for (i = 0; i < s->n_feat; ++i) {
01440 (*out_cb)[i] =
01441 (float32 *) ckd_calloc(n_density * veclen[i],
01442 sizeof(float32));
01443 if (bio_fread
01444 ((*out_cb)[i], sizeof(float32),
01445 n_density * veclen[i], fp,
01446 byteswap, &chksum) != n_density * veclen[i])
01447 E_FATAL("fread(%s, %d) of feat %d failed\n", file_name,
01448 n_density * veclen[i], i);
01449 }
01450 ckd_free(veclen);
01451
01452 if (chksum_present)
01453 bio_verify_chksum(fp, byteswap, chksum);
01454
01455 if (fread(&tmp, 1, 1, fp) == 1)
01456 E_FATAL("%s: More data than expected\n", file_name);
01457
01458 fclose(fp);
01459
01460 E_INFO("%d mixture Gaussians, %d components, %d feature streams, veclen %d\n", n_mgau,
01461 n_density, n_feat, blk);
01462
01463 return n;
01464 }
01465
01466 static int32
01467 s3_precomp(s2_semi_mgau_t *s, logmath_t *lmath, float32 vFloor)
01468 {
01469 int feat;
01470
01471 for (feat = 0; feat < s->n_feat; ++feat) {
01472 float32 *fmp;
01473 mfcc_t *mp;
01474 mfcc_t *vp, *dp;
01475 int32 vecLen, i;
01476
01477 vecLen = s->veclen[feat];
01478 fmp = (float32 *) s->means[feat];
01479 mp = s->means[feat];
01480 vp = s->vars[feat];
01481 dp = s->dets[feat];
01482
01483 for (i = 0; i < s->n_density; ++i) {
01484 mfcc_t d;
01485 int32 j;
01486
01487 d = 0;
01488 for (j = 0; j < vecLen; ++j, ++vp, ++mp, ++fmp) {
01489 float64 fvar;
01490
01491 *mp = FLOAT2MFCC(*fmp);
01492
01493 fvar = *(float32 *) vp;
01494 if (fvar < vFloor)
01495 fvar = vFloor;
01496 d += (mfcc_t)logmath_log(lmath, 1 / sqrt(fvar * 2.0 * M_PI));
01497 *vp = (mfcc_t)logmath_ln_to_log(lmath, 1.0 / (2.0 * fvar));
01498 }
01499 *dp++ = d;
01500 }
01501 }
01502 return 0;
01503 }
01504
01505 int
01506 split_topn(char const *str, uint8 *out, int nfeat)
01507 {
01508 char *topn_list = ckd_salloc(str);
01509 char *c, *cc;
01510 int i, maxn;
01511
01512 c = topn_list;
01513 i = 0;
01514 maxn = 0;
01515 while (i < nfeat && (cc = strchr(c, ',')) != NULL) {
01516 *cc = '\0';
01517 out[i] = atoi(c);
01518 if (out[i] > maxn) maxn = out[i];
01519 c = cc + 1;
01520 ++i;
01521 }
01522 if (i < nfeat && *c != '\0') {
01523 out[i] = atoi(c);
01524 if (out[i] > maxn) maxn = out[i];
01525 ++i;
01526 }
01527 while (i < nfeat)
01528 out[i++] = maxn;
01529
01530 ckd_free(topn_list);
01531 return maxn;
01532 }
01533
01534
01535 ps_mgau_t *
01536 s2_semi_mgau_init(acmod_t *acmod)
01537 {
01538 s2_semi_mgau_t *s;
01539 ps_mgau_t *ps;
01540 char const *sendump_path;
01541 float32 **fgau;
01542 int i;
01543
01544 s = ckd_calloc(1, sizeof(*s));
01545 s->config = acmod->config;
01546
01547 s->lmath = logmath_retain(acmod->lmath);
01548
01549 s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
01550 if (s->lmath_8b == NULL) {
01551 s2_semi_mgau_free(ps_mgau_base(s));
01552 return NULL;
01553 }
01554
01555 if (logmath_get_width(s->lmath_8b) != 1) {
01556 E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
01557 logmath_get_base(s->lmath_8b));
01558 s2_semi_mgau_free(ps_mgau_base(s));
01559 return NULL;
01560 }
01561
01562
01563 s->n_feat = feat_dimension1(acmod->fcb);
01564 s->veclen = ckd_calloc(s->n_feat, sizeof(int32));
01565 for (i = 0; i < s->n_feat; ++i)
01566 s->veclen[i] = feat_dimension2(acmod->fcb, i);
01567
01568
01569 if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fgau) < 0) {
01570 s2_semi_mgau_free(ps_mgau_base(s));
01571 return NULL;
01572 }
01573 s->means = (mfcc_t **)fgau;
01574 if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fgau) < 0) {
01575 s2_semi_mgau_free(ps_mgau_base(s));
01576 return NULL;
01577 }
01578 s->vars = (mfcc_t **)fgau;
01579
01580
01581 s->dets = (mfcc_t **)ckd_calloc_2d(s->n_feat, s->n_density, sizeof(**s->dets));
01582 s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor"));
01583
01584
01585 if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) {
01586 if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
01587 s2_semi_mgau_free(ps_mgau_base(s));
01588 return NULL;
01589 }
01590 }
01591 else {
01592 if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
01593 cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
01594 s2_semi_mgau_free(ps_mgau_base(s));
01595 return NULL;
01596 }
01597 }
01598 s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
01599
01600
01601 s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam));
01602 s->max_topn = cmd_ln_int32_r(s->config, "-topn");
01603 split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat);
01604 E_INFO("Maximum top-N: %d ", s->max_topn);
01605 E_INFOCONT("Top-N beams:");
01606 for (i = 0; i < s->n_feat; ++i) {
01607 E_INFOCONT(" %d", s->topn_beam[i]);
01608 }
01609 E_INFOCONT("\n");
01610
01611
01612 s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
01613 s->topn_hist = (vqFeature_t ***)
01614 ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn,
01615 sizeof(***s->topn_hist));
01616 s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat,
01617 sizeof(**s->topn_hist_n));
01618 for (i = 0; i < s->n_topn_hist; ++i) {
01619 int j;
01620 for (j = 0; j < s->n_feat; ++j) {
01621 int k;
01622 for (k = 0; k < s->max_topn; ++k) {
01623 s->topn_hist[i][j][k].score = WORST_DIST;
01624 s->topn_hist[i][j][k].codeword = k;
01625 }
01626 }
01627 }
01628
01629 ps = (ps_mgau_t *)s;
01630 ps->vt = &s2_semi_mgau_funcs;
01631 return ps;
01632 }
01633
01634 int
01635 s2_semi_mgau_mllr_transform(ps_mgau_t *ps,
01636 ps_mllr_t *mllr)
01637 {
01638 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
01639 int32 i, d, l, m;
01640 float32 **fmean, *mp, **fvar, *vp;
01641 float64 *temp;
01642
01643
01644 if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fmean) < 0) {
01645 return -1;
01646 }
01647 if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fvar) < 0) {
01648 return -1;
01649 }
01650
01651
01652 for (i = 0; i < s->n_feat; i++) {
01653 temp = (float64 *) ckd_calloc(s->veclen[i], sizeof(float64));
01654 mp = fmean[i];
01655 vp = fvar[i];
01656
01657
01658 for (d = 0; d < s->n_density; d++) {
01659 for (l = 0; l < s->veclen[i]; l++) {
01660 temp[l] = 0.0;
01661 for (m = 0; m < s->veclen[i]; m++) {
01662 temp[l] += mllr->A[i][0][l][m] * mp[m];
01663 }
01664 temp[l] += mllr->b[i][0][l];
01665 }
01666
01667 for (l = 0; l < s->veclen[i]; l++) {
01668 mp[l] = (float32) temp[l];
01669 vp[l] *= mllr->h[i][0][l];
01670 }
01671 mp += s->veclen[i];
01672 vp += s->veclen[i];
01673 }
01674
01675 ckd_free(temp);
01676 }
01677
01678 for (i = 0; i < s->n_feat; ++i) {
01679 if (s->means)
01680 ckd_free(s->means[i]);
01681 if (s->vars)
01682 ckd_free(s->vars[i]);
01683 }
01684 ckd_free(s->means);
01685 ckd_free(s->vars);
01686
01687 s->means = (mfcc_t **)fmean;
01688 s->vars = (mfcc_t **)fvar;
01689 s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor"));
01690
01691 return 0;
01692 }
01693
01694 void
01695 s2_semi_mgau_free(ps_mgau_t *ps)
01696 {
01697 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
01698 uint32 i;
01699
01700 logmath_free(s->lmath);
01701 logmath_free(s->lmath_8b);
01702 if (s->sendump_mmap) {
01703 ckd_free_2d(s->mixw);
01704 mmio_file_unmap(s->sendump_mmap);
01705 }
01706 else {
01707 ckd_free_3d(s->mixw);
01708 }
01709 for (i = 0; i < s->n_feat; ++i) {
01710 if (s->means)
01711 ckd_free(s->means[i]);
01712 if (s->vars)
01713 ckd_free(s->vars[i]);
01714 }
01715 for (i = 0; i < s->n_kdtrees; ++i)
01716 free_kd_tree(s->kdtrees[i]);
01717 ckd_free(s->kdtrees);
01718 ckd_free(s->veclen);
01719 ckd_free(s->means);
01720 ckd_free(s->vars);
01721 ckd_free(s->topn_beam);
01722 ckd_free_2d(s->topn_hist_n);
01723 ckd_free_3d((void **)s->topn_hist);
01724 ckd_free_2d((void **)s->dets);
01725 ckd_free(s);
01726 }