00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093 #include <string.h>
00094
00095 #include "dict2pid.h"
00096 #include "hmm.h"
00097
00098
00112 static glist_t
00113 ldiph_comsseq(bin_mdef_t * mdef,
00114 int32 b,
00115 int32 r
00116 )
00117 {
00118 int32 l, p, ssid;
00119 glist_t g;
00120
00121 g = NULL;
00122 E_DEBUG(2,("%s(%s,?):",
00123 bin_mdef_ciphone_str(mdef, b),
00124 bin_mdef_ciphone_str(mdef, r)));
00125 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00126 p = bin_mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
00127 (s3cipid_t) r, WORD_POSN_BEGIN);
00128
00129 if (IS_S3PID(p)) {
00130 gnode_t *gn;
00131 ssid = bin_mdef_pid2ssid(mdef, p);
00132 for (gn = g; gn; gn = gnode_next(gn))
00133 if (gnode_int32(gn) == ssid)
00134 break;
00135 if (gn == NULL) {
00136 g = glist_add_int32(g, ssid);
00137 E_DEBUGCONT(2,(" %d", ssid));
00138 }
00139 }
00140 }
00141 if (g == NULL) {
00142 g = glist_add_int32(g, bin_mdef_pid2ssid(mdef, b));
00143 E_DEBUGCONT(2,(" %d", bin_mdef_pid2ssid(mdef, b)));
00144 }
00145 E_DEBUGCONT(2,("\n"));
00146
00147 return g;
00148 }
00149
00150
00156 static glist_t
00157 rdiph_comsseq(bin_mdef_t * mdef, int32 b, int32 l)
00158 {
00159 int32 r, p, ssid;
00160 glist_t g;
00161
00162 g = NULL;
00163 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00164 p = bin_mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
00165 (s3cipid_t) r, WORD_POSN_END);
00166
00167 if (IS_S3PID(p)) {
00168 gnode_t *gn;
00169 ssid = bin_mdef_pid2ssid(mdef, p);
00170 for (gn = g; gn; gn = gnode_next(gn))
00171 if (gnode_int32(gn) == ssid)
00172 break;
00173 if (gn == NULL)
00174 g = glist_add_int32(g, ssid);
00175 }
00176 }
00177 if (!g)
00178 g = glist_add_int32(g, bin_mdef_pid2ssid(mdef, b));
00179
00180 return g;
00181 }
00182
00183
00189 static glist_t
00190 single_comsseq(bin_mdef_t * mdef, int32 b)
00191 {
00192 int32 l, r, p, ssid;
00193 glist_t g;
00194
00195 g = NULL;
00196 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00197 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00198 p = bin_mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
00199 (s3cipid_t) r, WORD_POSN_SINGLE);
00200
00201 if (IS_S3PID(p)) {
00202 gnode_t *gn;
00203 ssid = bin_mdef_pid2ssid(mdef, p);
00204 for (gn = g; gn; gn = gnode_next(gn))
00205 if (gnode_int32(gn) == ssid)
00206 break;
00207 if (gn == NULL)
00208 g = glist_add_int32(g, ssid);
00209 }
00210 }
00211 }
00212 if (!g)
00213 g = glist_add_int32(g, bin_mdef_pid2ssid(mdef, b));
00214
00215 return g;
00216 }
00217
00218
00224 static glist_t
00225 single_lc_comsseq(bin_mdef_t * mdef, int32 b, int32 l)
00226 {
00227 int32 r, p, ssid;
00228 glist_t g;
00229
00230 g = NULL;
00231 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00232 p = bin_mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
00233 (s3cipid_t) r, WORD_POSN_SINGLE);
00234
00235 if (IS_S3PID(p)) {
00236 gnode_t *gn;
00237 ssid = bin_mdef_pid2ssid(mdef, p);
00238 for (gn = g; gn; gn = gnode_next(gn))
00239 if (gnode_int32(gn) == ssid)
00240 break;
00241 if (gn == NULL)
00242 g = glist_add_int32(g, ssid);
00243 }
00244 }
00245 if (!g)
00246 g = glist_add_int32(g, bin_mdef_pid2ssid(mdef, b));
00247
00248 return g;
00249 }
00250
00251 #if 0
00252
00260 static glist_t
00261 single_rc_comsseq(bin_mdef_t * mdef, int32 b, int32 r)
00262 {
00263 int32 l, p, ssid;
00264 glist_t g;
00265
00266 g = NULL;
00267 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00268 p = bin_mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
00269 (s3cipid_t) r, WORD_POSN_SINGLE);
00270
00271 if (IS_S3PID(p)) {
00272 gnode_t *gn;
00273 ssid = bin_mdef_pid2ssid(mdef, p);
00274 for (gn = g; gn; gn = gnode_next(gn))
00275 if (gnode_int32(gn) == ssid)
00276 break;
00277 if (gn == NULL)
00278 g = glist_add_int32(g, ssid);
00279 }
00280 }
00281 if (!g)
00282 g = glist_add_int32(g, bin_mdef_pid2ssid(mdef, b));
00283
00284 return g;
00285 }
00286 #endif
00287
00288
00292 static s3ssid_t
00293 ssidlist2comsseq(glist_t g, bin_mdef_t * mdef, dict2pid_t * dict2pid,
00294 hash_table_t * hs,
00295 hash_table_t * hp)
00296 {
00297 int32 i, j, n, s, ssid;
00298 s3senid_t **sen;
00299 s3senid_t *comsenid;
00300 gnode_t *gn;
00301
00302 n = glist_count(g);
00303 if (n <= 0)
00304 E_FATAL("Panic: length(ssidlist)= %d\n", n);
00305
00306
00307 sen =
00308 (s3senid_t **) ckd_calloc(bin_mdef_n_emit_state(mdef),
00309 sizeof(s3senid_t *));
00310 for (i = 0; i < bin_mdef_n_emit_state(mdef); i++) {
00311 sen[i] = (s3senid_t *) ckd_calloc(n + 1, sizeof(s3senid_t));
00312 sen[i][0] = BAD_S3SENID;
00313 }
00314
00315 comsenid =
00316 (s3senid_t *) ckd_calloc(bin_mdef_n_emit_state(mdef),
00317 sizeof(s3senid_t));
00318
00319
00320
00321 for (gn = g; gn; gn = gnode_next(gn)) {
00322 ssid = gnode_int32(gn);
00323
00324
00325 for (i = 0; i < bin_mdef_n_emit_state(mdef); i++) {
00326 s = bin_mdef_sseq2sen(mdef, ssid, i);
00327
00328 for (j = 0; (IS_S3SENID(sen[i][j])) && (sen[i][j] != s); j++);
00329 if (NOT_S3SENID(sen[i][j])) {
00330 sen[i][j] = s;
00331 sen[i][j + 1] = BAD_S3SENID;
00332 }
00333 }
00334 }
00335
00336
00337 for (i = 0; i < bin_mdef_n_emit_state(mdef); i++) {
00338
00339 for (j = 0; IS_S3SENID(sen[i][j]); j++);
00340 assert(j > 0);
00341
00342
00343 j = (long)hash_table_enter_bkey(hs, (char *) (sen[i]), j * sizeof(s3senid_t),
00344 (void *)(long)dict2pid->n_comstate);
00345
00346 if (j == dict2pid->n_comstate)
00347 dict2pid->n_comstate++;
00348 else
00349 ckd_free((void *) sen[i]);
00350
00351
00352 comsenid[i] = j;
00353 }
00354 ckd_free(sen);
00355
00356
00357 j = (long) hash_table_enter_bkey(hp, (char *) comsenid,
00358 mdef->n_emit_state * sizeof(s3senid_t),
00359 (void *)(long)dict2pid->n_comsseq);
00360
00361 if (j == dict2pid->n_comsseq) {
00362
00363 dict2pid->n_comsseq++;
00364 if (dict2pid->n_comsseq >= MAX_S3SENID)
00365 E_FATAL
00366 ("#Composite sseq limit(%d) reached; increase MAX_S3SENID\n",
00367 dict2pid->n_comsseq);
00368 }
00369 else
00370 ckd_free((void *) comsenid);
00371
00372 return ((s3ssid_t) j);
00373 }
00374
00375 void
00376 compress_table(s3ssid_t * uncomp_tab, s3ssid_t * com_tab,
00377 s3cipid_t * ci_map, int32 n_ci)
00378 {
00379 int32 found;
00380 int32 r;
00381 int32 tmp_r;
00382
00383 for (r = 0; r < n_ci; r++) {
00384 com_tab[r] = BAD_S3SSID;
00385 ci_map[r] = BAD_S3CIPID;
00386 }
00388 for (r = 0; r < n_ci; r++) {
00389
00390 found = 0;
00391 for (tmp_r = 0; tmp_r < r && com_tab[tmp_r] != BAD_S3SSID; tmp_r++) {
00392 if (uncomp_tab[r] == com_tab[tmp_r]) {
00393 found = 1;
00394 ci_map[r] = tmp_r;
00395 break;
00396 }
00397 }
00398
00399 if (found == 0) {
00400 com_tab[tmp_r] = uncomp_tab[r];
00401 ci_map[r] = tmp_r;
00402 }
00403 }
00404 }
00405
00406
00407 static void
00408 compress_right_context_tree(bin_mdef_t * mdef, dict2pid_t * d2p)
00409 {
00410 int32 n_ci;
00411 int32 b, l, r;
00412 s3ssid_t *rmap;
00413 s3ssid_t *tmpssid;
00414 s3cipid_t *tmpcimap;
00415
00416 n_ci = mdef->n_ciphone;
00417
00418 tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t));
00419 tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t));
00420
00421 assert(d2p->rdiph_rc);
00422 d2p->rssid =
00423 (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *));
00424
00425 for (b = 0; b < n_ci; b++) {
00426
00427 d2p->rssid[b] =
00428 (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t));
00429
00430 for (l = 0; l < n_ci; l++) {
00431
00432 rmap = d2p->rdiph_rc[b][l];
00433
00434 compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone);
00435
00436 for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID;
00437 r++);
00438
00439 if (tmpssid[0] != BAD_S3SSID) {
00440 d2p->rssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t));
00441 memcpy(d2p->rssid[b][l].ssid, tmpssid,
00442 r * sizeof(s3ssid_t));
00443 d2p->rssid[b][l].cimap =
00444 ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t));
00445 memcpy(d2p->rssid[b][l].cimap, tmpcimap,
00446 (mdef->n_ciphone) * sizeof(s3cipid_t));
00447 d2p->rssid[b][l].n_ssid = r;
00448 }
00449 else {
00450 d2p->rssid[b][l].ssid = NULL;
00451 d2p->rssid[b][l].cimap = NULL;
00452 d2p->rssid[b][l].n_ssid = 0;
00453 }
00454
00455 }
00456 }
00457
00458
00459 ckd_free(tmpssid);
00460 ckd_free(tmpcimap);
00461
00462
00463 }
00464
00465 static void
00466 compress_left_right_context_tree(bin_mdef_t * mdef, dict2pid_t * d2p)
00467 {
00468 int32 n_ci;
00469 int32 b, l, r;
00470 s3ssid_t *rmap;
00471 s3ssid_t *tmpssid;
00472 s3cipid_t *tmpcimap;
00473
00474 n_ci = mdef->n_ciphone;
00475
00476 tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t));
00477 tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t));
00478
00479 assert(d2p->lrdiph_rc);
00480
00481 d2p->lrssid =
00482 (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *));
00483
00484 for (b = 0; b < n_ci; b++) {
00485
00486 d2p->lrssid[b] =
00487 (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t));
00488
00489 for (l = 0; l < n_ci; l++) {
00490 rmap = d2p->lrdiph_rc[b][l];
00491
00492 compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone);
00493
00494 for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID;
00495 r++);
00496
00497 if (tmpssid[0] != BAD_S3SSID) {
00498 d2p->lrssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t));
00499 memcpy(d2p->lrssid[b][l].ssid, tmpssid,
00500 r * sizeof(s3ssid_t));
00501 d2p->lrssid[b][l].cimap =
00502 ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t));
00503 memcpy(d2p->lrssid[b][l].cimap, tmpcimap,
00504 (mdef->n_ciphone) * sizeof(s3cipid_t));
00505 d2p->lrssid[b][l].n_ssid = r;
00506 }
00507 else {
00508 d2p->lrssid[b][l].ssid = NULL;
00509 d2p->lrssid[b][l].cimap = NULL;
00510 d2p->lrssid[b][l].n_ssid = 0;
00511 }
00512 }
00513 }
00514
00515
00516 ckd_free(tmpssid);
00517 ckd_free(tmpcimap);
00518
00519
00520 }
00521
00526 int32
00527 get_rc_nssid(dict2pid_t * d2p, s3wid_t w, s3dict_t * dict)
00528 {
00529 int32 pronlen;
00530 s3cipid_t b, lc;
00531
00532 pronlen = dict->word[w].pronlen;
00533 b = dict->word[w].ciphone[pronlen - 1];
00534
00535 if (pronlen == 1) {
00536
00537
00538
00539
00540 return (d2p->lrssid[b][0].n_ssid);
00541 }
00542 else {
00543
00544 lc = dict->word[w].ciphone[pronlen - 2];
00545 return (d2p->rssid[b][lc].n_ssid);
00546 }
00547
00548 }
00549
00550 s3cipid_t *
00551 dict2pid_get_rcmap(dict2pid_t * d2p, s3wid_t w, s3dict_t * dict)
00552 {
00553 int32 pronlen;
00554 s3cipid_t b, lc;
00555
00556 pronlen = dict->word[w].pronlen;
00557 b = dict->word[w].ciphone[pronlen - 1];
00558
00559 if (pronlen == 1) {
00560
00561
00562
00563
00564 return (d2p->lrssid[b][0].cimap);
00565 }
00566 else {
00567
00568 lc = dict->word[w].ciphone[pronlen - 2];
00569 return (d2p->rssid[b][lc].cimap);
00570 }
00571
00572 }
00573
00574
00575
00576
00577 static void
00578 free_compress_map(xwdssid_t ** tree, int32 n_ci)
00579 {
00580 int32 b, l;
00581 for (b = 0; b < n_ci; b++) {
00582 for (l = 0; l < n_ci; l++) {
00583 ckd_free(tree[b][l].ssid);
00584 ckd_free(tree[b][l].cimap);
00585 }
00586 ckd_free(tree[b]);
00587 }
00588 ckd_free(tree);
00589 }
00590
00591
00592
00593 dict2pid_t *
00594 dict2pid_build(bin_mdef_t * mdef, s3dict_t * dict, int32 is_composite, logmath_t *logmath)
00595 {
00596 dict2pid_t *dict2pid;
00597 s3ssid_t *internal, **ldiph, **rdiph, *single;
00598 int32 pronlen;
00599 hash_table_t *hs, *hp;
00600 glist_t g;
00601 gnode_t *gn;
00602 s3senid_t *sen;
00603 hash_entry_t *he;
00604 int32 *cslen;
00605 int32 i, j, b, l, r, w, n, p;
00606
00607 E_INFO("Building PID tables for dictionary\n");
00608 assert(mdef);
00609 assert(dict);
00610
00611
00612 dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t));
00613
00614 dict2pid->n_dictsize = s3dict_size(dict);
00615 dict2pid->internal =
00616 (s3ssid_t **) ckd_calloc(s3dict_size(dict), sizeof(s3ssid_t *));
00617 dict2pid->ldiph_lc =
00618 (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
00619 mdef->n_ciphone, sizeof(s3ssid_t));
00620 dict2pid->rdiph_rc =
00621 (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
00622 mdef->n_ciphone, sizeof(s3ssid_t));
00623 dict2pid->is_composite = is_composite;
00624
00625 dict2pid->n_ci = mdef->n_ciphone;
00626 if (dict2pid->is_composite) {
00627 dict2pid->single_lc = (s3ssid_t **) ckd_calloc_2d(mdef->n_ciphone,
00628 mdef->n_ciphone,
00629 sizeof
00630 (s3ssid_t));
00631 dict2pid->lrdiph_rc = NULL;
00632 dict2pid->rssid = NULL;
00633 dict2pid->lrssid = NULL;
00634
00635 }
00636 else {
00637
00638 dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone,
00639 mdef->n_ciphone,
00640 mdef->n_ciphone,
00641 sizeof
00642 (s3ssid_t));
00643 dict2pid->single_lc = NULL;
00644
00645
00646 }
00647
00648 dict2pid->comstate = NULL;
00649 dict2pid->comsseq = NULL;
00650 dict2pid->comwt = NULL;
00651
00652 dict2pid->n_comstate = 0;
00653 dict2pid->n_comsseq = 0;
00654 dict2pid->is_composite = is_composite;
00655
00656 hs = hash_table_new(mdef->n_ciphone * mdef->n_ciphone * mdef->n_emit_state,
00657 HASH_CASE_YES);
00658 hp = hash_table_new(mdef->n_ciphone * mdef->n_ciphone, HASH_CASE_YES);
00659
00660 for (w = 0, n = 0; w < s3dict_size(dict); w++) {
00661 pronlen = s3dict_pronlen(dict, w);
00662 if (pronlen < 0)
00663 E_FATAL("Pronunciation-length(%s)= %d\n",
00664 s3dict_wordstr(dict, w), pronlen);
00665 n += pronlen;
00666 }
00667
00668 internal = (s3ssid_t *) ckd_calloc(n, sizeof(s3ssid_t));
00669
00670
00671
00672 ldiph =
00673 (s3ssid_t **) ckd_calloc_2d(mdef->n_ciphone, mdef->n_ciphone,
00674 sizeof(s3ssid_t));
00675 rdiph =
00676 (s3ssid_t **) ckd_calloc_2d(mdef->n_ciphone, mdef->n_ciphone,
00677 sizeof(s3ssid_t));
00678 single = (s3ssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(s3ssid_t));
00679 for (b = 0; b < mdef->n_ciphone; b++) {
00680 for (l = 0; l < mdef->n_ciphone; l++) {
00681 for (r = 0; r < mdef->n_ciphone; r++) {
00682 dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID;
00683 dict2pid->rdiph_rc[b][l][r] = BAD_S3SSID;
00684 }
00685
00686 if (dict2pid->is_composite) {
00687 assert(dict2pid->single_lc);
00688 dict2pid->single_lc[b][l] = BAD_S3SSID;
00689 }
00690
00691 ldiph[b][l] = BAD_S3SSID;
00692 rdiph[b][l] = BAD_S3SSID;
00693 }
00694 single[b] = BAD_S3SSID;
00695 }
00696
00697 for (w = 0; w < s3dict_size(dict); w++) {
00698 dict2pid->internal[w] = internal;
00699 pronlen = s3dict_pronlen(dict, w);
00700
00701 if (pronlen >= 2) {
00702
00707
00708 b = s3dict_pron(dict, w, 0);
00709 r = s3dict_pron(dict, w, 1);
00710 if (NOT_S3SSID(ldiph[b][r])) {
00711 if (dict2pid->is_composite) {
00712
00713 g = ldiph_comsseq(mdef, b, r);
00714
00715 ldiph[b][r] =
00716 ssidlist2comsseq(g, mdef, dict2pid, hs, hp);
00717 glist_free(g);
00718 }
00719 else {
00720
00721 ldiph[b][r] = 0;
00722 }
00723
00724
00725 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00726 p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
00727 (s3cipid_t) l, (s3cipid_t) r,
00728 WORD_POSN_BEGIN);
00729 dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p);
00730 }
00731 }
00732
00733
00734 if (dict2pid->is_composite)
00735 internal[0] = ldiph[b][r];
00736 else
00737 internal[0] = BAD_S3SSID;
00738
00739
00740
00741 for (i = 1; i < pronlen - 1; i++) {
00742 l = b;
00743 b = r;
00744 r = s3dict_pron(dict, w, i + 1);
00745
00746 p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
00747 (s3cipid_t) l, (s3cipid_t) r,
00748 WORD_POSN_INTERNAL);
00749 internal[i] = bin_mdef_pid2ssid(mdef, p);
00750 }
00751
00757 l = b;
00758 b = r;
00759 if (NOT_S3SSID(rdiph[b][l])) {
00760 if (dict2pid->is_composite) {
00761 g = rdiph_comsseq(mdef, b, l);
00762 rdiph[b][l] =
00763 ssidlist2comsseq(g, mdef, dict2pid, hs, hp);
00764 glist_free(g);
00765 }
00766 else {
00767
00768 rdiph[b][l] = 0;
00769 }
00770
00771 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00772 p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
00773 (s3cipid_t) l, (s3cipid_t) r,
00774 WORD_POSN_END);
00775 dict2pid->rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p);
00776 }
00777 }
00778
00779 if (dict2pid->is_composite)
00780 internal[pronlen - 1] = rdiph[b][l];
00781 else
00782 internal[pronlen - 1] = BAD_S3SSID;
00783
00784 }
00785 else if (pronlen == 1) {
00786 b = s3dict_pron(dict, w, 0);
00787 E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n",
00788 s3dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b)));
00789 if (dict2pid->is_composite) {
00790 assert(dict2pid->single_lc);
00791
00792
00793 if (NOT_S3SSID(single[b])) {
00794 g = single_comsseq(mdef, b);
00795 single[b] =
00796 ssidlist2comsseq(g, mdef, dict2pid, hs, hp);
00797 glist_free(g);
00798
00799
00800 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00801 g = single_lc_comsseq(mdef, b, l);
00802 dict2pid->single_lc[b][l] =
00803 ssidlist2comsseq(g, mdef, dict2pid, hs, hp);
00804 glist_free(g);
00805 }
00806 }
00807 internal[0] = single[b];
00808 }
00809 else {
00810
00811 if (NOT_S3SSID(single[b])) {
00812 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00813 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00814 p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
00815 (s3cipid_t) l,
00816 (s3cipid_t) r,
00817 WORD_POSN_SINGLE);
00818 dict2pid->lrdiph_rc[b][l][r]
00819 = bin_mdef_pid2ssid(mdef, p);
00820 if (r == bin_mdef_silphone(mdef))
00821 dict2pid->ldiph_lc[b][r][l]
00822 = bin_mdef_pid2ssid(mdef, p);
00823 if (l == bin_mdef_silphone(mdef))
00824 dict2pid->rdiph_rc[b][l][r]
00825 = bin_mdef_pid2ssid(mdef, p);
00826 assert(IS_S3SSID(bin_mdef_pid2ssid(mdef, p)));
00827 E_DEBUG(2,("%s(%s,%s) => %d / %d\n",
00828 bin_mdef_ciphone_str(mdef, b),
00829 bin_mdef_ciphone_str(mdef, l),
00830 bin_mdef_ciphone_str(mdef, r),
00831 p, bin_mdef_pid2ssid(mdef, p)));
00832 }
00833 }
00834 single[b] = dict2pid->lrdiph_rc[b]
00835 [bin_mdef_silphone(mdef)][bin_mdef_silphone(mdef)];
00836 assert(IS_S3SSID(single[b]));
00837 }
00838 internal[pronlen - 1] = BAD_S3SSID;
00839 }
00840
00841 }
00842 else {
00843 E_FATAL("panic: pronlen=0, what's going on?\n");
00844 }
00845
00846 if (!dict2pid->is_composite) {
00847
00848 assert(internal[0] == BAD_S3SSID
00849 && internal[pronlen - 1] == BAD_S3SSID);
00850 }
00851
00852 internal += pronlen;
00853 }
00854
00855 ckd_free_2d((void **) ldiph);
00856 ckd_free_2d((void **) rdiph);
00857 ckd_free((void *) single);
00858
00859 if (dict2pid->is_composite) {
00860
00861
00862
00863 cslen = (int32 *) ckd_calloc(dict2pid->n_comstate, sizeof(int32));
00864
00865 g = hash_table_tolist(hs, &n);
00866 assert(n == dict2pid->n_comstate);
00867 n = 0;
00868
00869 for (gn = g; gn; gn = gnode_next(gn)) {
00870 he = (hash_entry_t *) gnode_ptr(gn);
00871
00872 sen = (s3senid_t *) hash_entry_key(he);
00873 for (i = 0; IS_S3SENID(sen[i]); i++);
00874
00875
00876 cslen[(long)hash_entry_val(he)] = i + 1;
00877
00878 n += (i + 1);
00879 }
00880
00881 dict2pid->comstate =
00882 (s3senid_t **) ckd_calloc(dict2pid->n_comstate,
00883 sizeof(s3senid_t *));
00884 sen = (s3senid_t *) ckd_calloc(n, sizeof(s3senid_t));
00885 for (i = 0; i < dict2pid->n_comstate; i++) {
00886 dict2pid->comstate[i] = sen;
00887 sen += cslen[i];
00888 }
00889
00890
00891 for (gn = g; gn; gn = gnode_next(gn)) {
00892 he = (hash_entry_t *) gnode_ptr(gn);
00893 sen = (s3senid_t *) hash_entry_key(he);
00894 i = (long)hash_entry_val(he);
00895
00896 for (j = 0; j < cslen[i]; j++)
00897 dict2pid->comstate[i][j] = sen[j];
00898 assert(sen[j - 1] == BAD_S3SENID);
00899
00900 ckd_free((void *) sen);
00901 sen = NULL;
00902 }
00903 ckd_free(cslen);
00904 glist_free(g);
00905
00906
00907
00908 dict2pid->comsseq =
00909 (s3senid_t **) ckd_calloc(dict2pid->n_comsseq,
00910 sizeof(s3senid_t *));
00911
00912 for (i = 0; i < dict2pid->n_comsseq; i++) {
00913 dict2pid->comsseq[i] = NULL;
00914 }
00915
00916
00917 g = hash_table_tolist(hp, &n);
00918 assert(n == dict2pid->n_comsseq);
00919
00920
00921 for (gn = g; gn; gn = gnode_next(gn)) {
00922 he = (hash_entry_t *) gnode_ptr(gn);
00923
00924 i = (long)hash_entry_val(he);
00925
00926 dict2pid->comsseq[i] = (s3senid_t *) hash_entry_key(he);
00927 }
00928 glist_free(g);
00929
00930
00931
00932
00933
00934
00935
00936 dict2pid->comwt =
00937 (int16 *) ckd_calloc(dict2pid->n_comstate, sizeof(int16));
00938 for (i = 0; i < dict2pid->n_comstate; i++) {
00939 sen = dict2pid->comstate[i];
00940
00941 for (j = 0; IS_S3SENID(sen[j]); j++);
00942
00943
00944 dict2pid->comwt[i] = -(-logmath_log(logmath, (float64) j)) >> SENSCR_SHIFT;
00945 }
00946 }
00947
00948 if (!(dict2pid->is_composite)) {
00949 assert(dict2pid->comstate == NULL);
00950 assert(dict2pid->comsseq == NULL);
00951 assert(dict2pid->comwt == NULL);
00952 assert(dict2pid->single_lc == NULL);
00953 assert(dict2pid->n_comstate == 0);
00954 assert(dict2pid->n_comsseq == 0);
00955
00956
00957
00958
00959
00960 compress_right_context_tree(mdef, dict2pid);
00961 compress_left_right_context_tree(mdef, dict2pid);
00962
00963 }
00964 else {
00965 assert(dict2pid->rssid == NULL);
00966 assert(dict2pid->lrssid == NULL);
00967 }
00968
00969 hash_table_free(hs);
00970 hash_table_free(hp);
00971
00972 dict2pid_report(dict2pid);
00973 return dict2pid;
00974 }
00975
00976 dict2pid_t *
00977 dict2pid_retain(dict2pid_t *d2p)
00978 {
00979 ++d2p->refcount;
00980 return d2p;
00981 }
00982
00983 int
00984 dict2pid_free(dict2pid_t * d2p)
00985 {
00986 int32 i;
00987
00988 if (d2p == NULL)
00989 return 0;
00990 if (--d2p->refcount > 0)
00991 return d2p->refcount;
00992
00993 if (d2p->comwt)
00994 ckd_free((void *) d2p->comwt);
00995 if (d2p->comsseq) {
00996
00997 for (i = 0; i < d2p->n_comsseq; i++) {
00998 if (d2p->comsseq[i] != NULL) {
00999 ckd_free((void *) d2p->comsseq[i]);
01000 }
01001 }
01002 ckd_free((void *) d2p->comsseq);
01003 }
01004
01005 if (d2p->comstate) {
01006 ckd_free((void **) d2p->comstate[0]);
01007 ckd_free((void **) d2p->comstate);
01008 }
01009
01010 if (d2p->single_lc)
01011 ckd_free_2d((void *) d2p->single_lc);
01012
01013 if (d2p->ldiph_lc)
01014 ckd_free_3d((void ***) d2p->ldiph_lc);
01015
01016
01017 if (d2p->rdiph_rc)
01018 ckd_free_3d((void ***) d2p->rdiph_rc);
01019
01020 if (d2p->lrdiph_rc)
01021 ckd_free_3d((void ***) d2p->lrdiph_rc);
01022
01023 if (d2p->internal) {
01024 ckd_free((void *) d2p->internal[0]);
01025 ckd_free((void **) d2p->internal);
01026 }
01027
01028 if (d2p->rssid)
01029 free_compress_map(d2p->rssid, d2p->n_ci);
01030
01031 if (d2p->lrssid)
01032 free_compress_map(d2p->lrssid, d2p->n_ci);
01033
01034 ckd_free(d2p);
01035 return 0;
01036 }
01037
01038
01039
01040 void
01041 dict2pid_report(dict2pid_t * d2p)
01042 {
01043 E_INFO_NOFN("Initialization of dict2pid_t, report:\n");
01044 if (d2p->is_composite) {
01045 E_INFO_NOFN("Dict2pid is in composite triphone mode\n");
01046 E_INFO_NOFN("%d composite states; %d composite sseq\n",
01047 d2p->n_comstate, d2p->n_comsseq);
01048 }
01049 else {
01050 E_INFO_NOFN("Dict2pid is in normal triphone mode\n");
01051 }
01052 E_INFO_NOFN("\n");
01053
01054
01055 }
01056
01063 void
01064 dict2pid_comsenscr(dict2pid_t * d2p, int16 const * senscr, int16 * comsenscr)
01065 {
01066 int32 i, j;
01067 int32 best;
01068 s3senid_t *comstate, k;
01069
01070 for (i = 0; i < d2p->n_comstate; i++) {
01071 comstate = d2p->comstate[i];
01072
01073 best = senscr[comstate[0]];
01074 for (j = 1;; j++) {
01075 k = comstate[j];
01076 if (NOT_S3SENID(k))
01077 break;
01078 if (best > senscr[k])
01079 best = senscr[k];
01080 }
01081
01082 comsenscr[i] = best + d2p->comwt[i];
01083 }
01084 }
01085
01089 void
01090 dict2pid_comsseq2sen_active(dict2pid_t * d2p, bin_mdef_t * mdef,
01091 bitvec_t * comssid, bitvec_t * sen)
01092 {
01093 int32 ss, cs, i, j;
01094 s3senid_t *csp, *sp;
01095
01096 for (ss = 0; ss < d2p->n_comsseq; ss++) {
01097 if (bitvec_is_set(comssid,ss)) {
01098 csp = d2p->comsseq[ss];
01099 E_DEBUG(4,("comssid[%d] is active:",ss));
01100 for (i = 0; i < bin_mdef_n_emit_state(mdef); i++) {
01101 cs = csp[i];
01102 sp = d2p->comstate[cs];
01103 E_DEBUGCONT(4,(" %d",cs));
01104
01105 for (j = 0; IS_S3SENID(sp[j]); j++)
01106 bitvec_set(sen, sp[j]);
01107 }
01108 E_DEBUGCONT(4,("\n"));
01109 }
01110 }
01111 }
01112
01113
01114 void
01115 dict2pid_dump(FILE * fp, dict2pid_t * d2p, bin_mdef_t * mdef, s3dict_t * dict)
01116 {
01117 int32 w, p, pronlen;
01118 int32 i, j, b, l, r;
01119
01120 fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n");
01121 for (w = 0; w < s3dict_size(dict); w++) {
01122 fprintf(fp, "%30s ", s3dict_wordstr(dict, w));
01123
01124 pronlen = s3dict_pronlen(dict, w);
01125 for (p = 0; p < pronlen; p++)
01126 fprintf(fp, " %5d", d2p->internal[w][p]);
01127 fprintf(fp, "\n");
01128 }
01129 fprintf(fp, "#\n");
01130
01131 fprintf(fp, "# LDIPH_LC (b r l ssid)\n");
01132 for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) {
01133 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
01134 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
01135 if (IS_S3SSID(d2p->ldiph_lc[b][r][l]))
01136 fprintf(fp, "%6s %6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) r), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]);
01137 }
01138 }
01139 }
01140 fprintf(fp, "#\n");
01141
01142 fprintf(fp, "# SINGLE_LC (b l comssid)\n");
01143 for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) {
01144 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
01145 if (IS_S3SSID(d2p->single_lc[b][l]))
01146 fprintf(fp, "%6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->single_lc[b][l]);
01147 }
01148 }
01149 fprintf(fp, "#\n");
01150
01151 fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq);
01152 for (i = 0; i < mdef->n_sseq; i++) {
01153 fprintf(fp, "%5d ", i);
01154 for (j = 0; j < bin_mdef_n_emit_state(mdef); j++)
01155 fprintf(fp, " %5d", mdef->sseq[i][j]);
01156 fprintf(fp, "\n");
01157 }
01158 fprintf(fp, "#\n");
01159
01160 fprintf(fp, "# COMSSEQ %d (comstate comstate ...)\n", d2p->n_comsseq);
01161 for (i = 0; i < d2p->n_comsseq; i++) {
01162 fprintf(fp, "%5d ", i);
01163 for (j = 0; j < bin_mdef_n_emit_state(mdef); j++)
01164 fprintf(fp, " %5d", d2p->comsseq[i][j]);
01165 fprintf(fp, "\n");
01166 }
01167 fprintf(fp, "#\n");
01168
01169 fprintf(fp, "# COMSTATE %d (senid senid ...)\n", d2p->n_comstate);
01170 for (i = 0; i < d2p->n_comstate; i++) {
01171 fprintf(fp, "%5d ", i);
01172 for (j = 0; IS_S3SENID(d2p->comstate[i][j]); j++)
01173 fprintf(fp, " %5d", d2p->comstate[i][j]);
01174 fprintf(fp, "\n");
01175 }
01176 fprintf(fp, "#\n");
01177 fprintf(fp, "# END\n");
01178
01179 fflush(fp);
01180 }
01181
01182
01183 #if 0
01184 for (r = 0; r < mdef->n_ciphone; r++) {
01185 printf("%d ", rmap[r]);
01186 }
01187
01188 printf("\n");
01189 fflush(stdout);
01190
01191 for (r = 0; r < mdef->n_ciphone; r++) {
01192 printf("%d ", tmpssid[r]);
01193 }
01194
01195 printf("\n");
01196 fflush(stdout);
01197 for (r = 0; r < mdef->n_ciphone; r++) {
01198 printf("%d ", tmpcimap[r]);
01199 }
01200
01201 printf("\n");
01202 fflush(stdout);
01203
01204 for (r = 0; r < dict2pid->rssid[b][l].n_ssid; r++) {
01205 printf("%d ", dict2pid->rssid[b][l].ssid[r]);
01206 }
01207
01208 printf("\n");
01209 fflush(stdout);
01210
01211 if (dict2pid->rssid[b][l].n_ssid > 0) {
01212 for (r = 0; r < mdef->n_ciphone; r++) {
01213 printf("%d ", dict2pid->rssid[b][l].cimap[r]);
01214 }
01215 }
01216 printf("\n");
01217
01218 fflush(stdout);
01219 #endif