00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080 #include <stdio.h>
00081 #include <string.h>
00082 #include <assert.h>
00083
00084
00085 #include <ckd_alloc.h>
00086 #include <err.h>
00087
00088
00089 #include "fsg_lextree.h"
00090
00091 #define __FSG_DBG__ 0
00092
00099 static fsg_pnode_t *fsg_psubtree_init(fsg_lextree_t *tree,
00100 fsg_model_t *fsg,
00101 int32 from_state,
00102 fsg_pnode_t **alloc_head);
00103
00108 static void fsg_psubtree_free(fsg_pnode_t *alloc_head);
00109
00114 static void fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *alloc_head, FILE *fp);
00115
00119 static void
00120 fsg_lextree_lc_rc(fsg_lextree_t *lextree)
00121 {
00122 int32 s, d, i, j;
00123 int32 n_ci;
00124 gnode_t *gn;
00125 fsg_model_t *fsg;
00126 fsg_link_t *l;
00127 int32 silcipid;
00128 int32 len;
00129
00130 silcipid = bin_mdef_ciphone_id(lextree->mdef, "SIL");
00131 assert(silcipid >= 0);
00132 n_ci = bin_mdef_n_ciphone(lextree->mdef);
00133
00134 fsg = lextree->fsg;
00135
00136
00137
00138
00139 lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc));
00140 lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc));
00141
00142 for (s = 0; s < fsg->n_state; s++) {
00143 for (d = 0; d < fsg->n_state; d++) {
00144 for (gn = fsg->trans[s][d]; gn; gn = gnode_next(gn)) {
00145 int32 dictwid;
00147 l = (fsg_link_t *) gnode_ptr(gn);
00148 assert(l->wid >= 0);
00149 dictwid = s3dict_wordid(lextree->dict,
00150 fsg_model_word_str(lextree->fsg, l->wid));
00151
00152
00153
00154
00155
00156
00157
00158
00159 if (fsg_model_is_filler(fsg, l->wid)) {
00160
00161 lextree->rc[s][silcipid] = 1;
00162 lextree->lc[d][silcipid] = 1;
00163 }
00164 else {
00165 len = s3dict_pronlen(lextree->dict, dictwid);
00166 lextree->rc[s][s3dict_pron(lextree->dict, dictwid, 0)] = 1;
00167 lextree->lc[d][s3dict_pron(lextree->dict, dictwid, len - 1)] = 1;
00168 }
00169 }
00170 }
00171
00172
00173
00174
00175
00176
00177
00178
00179 lextree->lc[s][silcipid] = 1;
00180 lextree->rc[s][silcipid] = 1;
00181 }
00182
00183
00184
00185
00186
00187
00188 for (s = 0; s < fsg->n_state; s++) {
00189 for (d = 0; d < fsg->n_state; d++) {
00190 l = fsg->null_trans[s][d];
00191 if (l) {
00192
00193
00194
00195
00196 for (i = 0; i < n_ci; i++)
00197 lextree->lc[d][i] |= lextree->lc[s][i];
00198
00199
00200
00201
00202
00203 for (i = 0; i < n_ci; i++)
00204 lextree->rc[s][i] |= lextree->rc[d][i];
00205 }
00206 }
00207 }
00208
00209
00210 for (s = 0; s < fsg->n_state; s++) {
00211 j = 0;
00212 for (i = 0; i < n_ci; i++) {
00213 if (lextree->lc[s][i]) {
00214 lextree->lc[s][j] = i;
00215 j++;
00216 }
00217 }
00218 lextree->lc[s][j] = -1;
00219
00220 j = 0;
00221 for (i = 0; i < n_ci; i++) {
00222 if (lextree->rc[s][i]) {
00223 lextree->rc[s][j] = i;
00224 j++;
00225 }
00226 }
00227 lextree->rc[s][j] = -1;
00228 }
00229 }
00230
00231
00232
00233
00234 fsg_lextree_t *
00235 fsg_lextree_init(fsg_model_t * fsg, s3dict_t *dict, dict2pid_t *d2p,
00236 bin_mdef_t *mdef, hmm_context_t *ctx,
00237 int32 wip, int32 pip)
00238 {
00239 int32 s;
00240 fsg_lextree_t *lextree;
00241 fsg_pnode_t *pn;
00242
00243 lextree = ckd_calloc(1, sizeof(fsg_lextree_t));
00244 lextree->fsg = fsg;
00245 lextree->root = ckd_calloc(fsg_model_n_state(fsg),
00246 sizeof(fsg_pnode_t *));
00247 lextree->alloc_head = ckd_calloc(fsg_model_n_state(fsg),
00248 sizeof(fsg_pnode_t *));
00249 lextree->ctx = ctx;
00250 lextree->dict = dict;
00251 lextree->d2p = d2p;
00252 lextree->mdef = mdef;
00253 lextree->wip = wip;
00254 lextree->pip = pip;
00255
00256
00257 fsg_lextree_lc_rc(lextree);
00258
00259
00260 lextree->n_pnode = 0;
00261 for (s = 0; s < fsg_model_n_state(fsg); s++) {
00262 lextree->root[s] =
00263 fsg_psubtree_init(lextree, fsg, s, &(lextree->alloc_head[s]));
00264
00265 for (pn = lextree->alloc_head[s]; pn; pn = pn->alloc_next)
00266 lextree->n_pnode++;
00267 }
00268 E_INFO("%d HMM nodes in lextree\n", lextree->n_pnode);
00269
00270 #if __FSG_DBG__
00271 fsg_lextree_dump(lextree, stdout);
00272 #endif
00273
00274 return lextree;
00275 }
00276
00277
00278 void
00279 fsg_lextree_dump(fsg_lextree_t * lextree, FILE * fp)
00280 {
00281 int32 s;
00282
00283 for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) {
00284 fprintf(fp, "State %5d root %p\n", s, lextree->root[s]);
00285 fsg_psubtree_dump(lextree, lextree->alloc_head[s], fp);
00286 }
00287 fflush(fp);
00288 }
00289
00290
00291 void
00292 fsg_lextree_free(fsg_lextree_t * lextree)
00293 {
00294 int32 s;
00295
00296 if (lextree == NULL)
00297 return;
00298
00299 if (lextree->fsg)
00300 for (s = 0; s < fsg_model_n_state(lextree->fsg); s++)
00301 fsg_psubtree_free(lextree->alloc_head[s]);
00302
00303 ckd_free_2d(lextree->lc);
00304 ckd_free_2d(lextree->rc);
00305 ckd_free(lextree->root);
00306 ckd_free(lextree->alloc_head);
00307 ckd_free(lextree);
00308 }
00309
00310 void
00311 fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t * ctxt)
00312 {
00313 int32 i;
00314
00315 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
00316 ctxt->bv[i] = 0xffffffff;
00317 }
00318
00319
00320 uint32
00321 fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub)
00322 {
00323 int32 i;
00324 uint32 non_zero;
00325
00326 non_zero = 0;
00327
00328 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) {
00329 src->bv[i] = ~(sub->bv[i]) & src->bv[i];
00330 non_zero |= src->bv[i];
00331 }
00332
00333 return non_zero;
00334 }
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348 static fsg_pnode_t *
00349 psubtree_add_trans(fsg_lextree_t *lextree,
00350 fsg_pnode_t * root,
00351 fsg_link_t * fsglink,
00352 int16 *lclist, int16 *rclist,
00353 fsg_pnode_t ** alloc_head)
00354 {
00355 int32 silcipid;
00356 int32 pronlen;
00357 int32 wid;
00358 int32 dictwid;
00359 int32 ssid;
00360 gnode_t *gn;
00361 fsg_pnode_t *pnode, *pred, *head;
00362 int32 n_ci, p, lc, rc;
00363 glist_t lc_pnodelist;
00364 glist_t rc_pnodelist;
00365 int32 i, j;
00366
00367 silcipid = bin_mdef_silphone(lextree->mdef);
00368 n_ci = bin_mdef_n_ciphone(lextree->mdef);
00369
00370 wid = fsg_link_wid(fsglink);
00371 assert(wid >= 0);
00372 dictwid = s3dict_wordid(lextree->dict,
00373 fsg_model_word_str(lextree->fsg, wid));
00374 pronlen = s3dict_pronlen(lextree->dict, dictwid);
00375 assert(pronlen >= 1);
00376
00377 assert(lclist[0] >= 0);
00378 assert(rclist[0] >= 0);
00379
00380 head = *alloc_head;
00381 pred = NULL;
00382
00383 if (pronlen == 1) {
00384 int ci = s3dict_first_phone(lextree->dict, dictwid);
00385
00386 if (s3dict_filler_word(lextree->dict, dictwid)) {
00387
00388
00389
00390
00391 lc_pnodelist = NULL;
00392
00393 for (i = 0; lclist[i] >= 0; i++) {
00394 lc = lclist[i];
00395 ssid = lextree->d2p->lrdiph_rc[ci][lc][silcipid];
00396
00397 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00398 pnode = (fsg_pnode_t *) gnode_ptr(gn);
00399
00400 if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
00401
00402 fsg_pnode_add_ctxt(pnode, lc);
00403 break;
00404 }
00405 }
00406
00407 if (!gn) {
00408 pnode =
00409 (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00410 pnode->ctx = lextree->ctx;
00411 pnode->next.fsglink = fsglink;
00412 pnode->logs2prob =
00413 fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
00414 pnode->ci_ext = s3dict_first_phone(lextree->dict, dictwid);
00415 pnode->ppos = 0;
00416 pnode->leaf = TRUE;
00417 pnode->sibling = root;
00418 fsg_pnode_add_ctxt(pnode, lc);
00419 pnode->alloc_next = head;
00420 head = pnode;
00421 root = pnode;
00422
00423 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00424
00425 lc_pnodelist =
00426 glist_add_ptr(lc_pnodelist, (void *) pnode);
00427 }
00428 }
00429
00430 glist_free(lc_pnodelist);
00431 }
00432 else {
00433 ssid = bin_mdef_pid2ssid(lextree->mdef, ci);
00434
00435 pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00436 pnode->ctx = lextree->ctx;
00437 pnode->next.fsglink = fsglink;
00438 pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
00439 pnode->ci_ext = silcipid;
00440 pnode->ppos = 0;
00441 pnode->leaf = TRUE;
00442 pnode->sibling = root;
00443 fsg_pnode_add_all_ctxt(&(pnode->ctxt));
00444 pnode->alloc_next = head;
00445 head = pnode;
00446 root = pnode;
00447
00448 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00449 }
00450 }
00451 else {
00452 fsg_pnode_t **ssid_pnode_map;
00453 ssid_pnode_map =
00454 (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *));
00455 lc_pnodelist = NULL;
00456 rc_pnodelist = NULL;
00457
00458 for (p = 0; p < pronlen; p++) {
00459 int ci = s3dict_pron(lextree->dict, dictwid, p);
00460 if (p == 0) {
00461 rc = s3dict_pron(lextree->dict, dictwid, 1);
00462 for (i = 0; lclist[i] >= 0; i++) {
00463 lc = lclist[i];
00464 ssid = lextree->d2p->ldiph_lc[ci][rc][lc];
00465
00466
00467
00468 pnode = ssid_pnode_map[0];
00469 for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) {
00470 pnode = ssid_pnode_map[j];
00471 if (hmm_nonmpx_ssid(&pnode->hmm) == ssid)
00472 break;
00473 }
00474 assert(j < n_ci);
00475 if (!pnode) {
00476 pnode =
00477 (fsg_pnode_t *) ckd_calloc(1,
00478 sizeof
00479 (fsg_pnode_t));
00480 pnode->ctx = lextree->ctx;
00481 pnode->logs2prob =
00482 fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
00483 pnode->ci_ext = s3dict_first_phone(lextree->dict, dictwid);
00484 pnode->ppos = 0;
00485 pnode->leaf = FALSE;
00486 pnode->sibling = root;
00487 pnode->alloc_next = head;
00488 head = pnode;
00489 root = pnode;
00490
00491 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00492
00493 lc_pnodelist =
00494 glist_add_ptr(lc_pnodelist, (void *) pnode);
00495 ssid_pnode_map[j] = pnode;
00496 }
00497 fsg_pnode_add_ctxt(pnode, lc);
00498 }
00499 }
00500 else if (p != pronlen - 1) {
00501 ssid = lextree->d2p->internal[dictwid][p];
00502 pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00503 pnode->ctx = lextree->ctx;
00504 pnode->logs2prob = lextree->pip;
00505 pnode->ci_ext = s3dict_pron(lextree->dict, dictwid, p);
00506 pnode->ppos = p;
00507 pnode->leaf = FALSE;
00508 pnode->sibling = NULL;
00509 if (p == 1) {
00510 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00511 pred = (fsg_pnode_t *) gnode_ptr(gn);
00512 pred->next.succ = pnode;
00513 }
00514 }
00515 else {
00516 pred->next.succ = pnode;
00517 }
00518 pnode->alloc_next = head;
00519 head = pnode;
00520
00521 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00522
00523 pred = pnode;
00524 }
00525 else {
00526 xwdssid_t *rssid;
00527 memset((void *) ssid_pnode_map, 0,
00528 n_ci * sizeof(fsg_pnode_t *));
00529 lc = s3dict_pron(lextree->dict, dictwid, p-1);
00530 rssid = dict2pid_rssid(lextree->d2p, ci, lc);
00531
00532 for (i = 0; rclist[i] >= 0; i++) {
00533 rc = rclist[i];
00534
00535 j = rssid->cimap[rc];
00536 ssid = rssid->ssid[j];
00537 pnode = ssid_pnode_map[j];
00538
00539 if (!pnode) {
00540 pnode =
00541 (fsg_pnode_t *) ckd_calloc(1,
00542 sizeof
00543 (fsg_pnode_t));
00544 pnode->ctx = lextree->ctx;
00545 pnode->logs2prob = lextree->pip;
00546 pnode->ci_ext = s3dict_pron(lextree->dict, dictwid, p);
00547 pnode->ppos = p;
00548 pnode->leaf = TRUE;
00549 pnode->sibling = rc_pnodelist ?
00550 (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL;
00551 pnode->next.fsglink = fsglink;
00552 pnode->alloc_next = head;
00553 head = pnode;
00554
00555 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00556
00557 rc_pnodelist =
00558 glist_add_ptr(rc_pnodelist, (void *) pnode);
00559 ssid_pnode_map[j] = pnode;
00560 }
00561 else {
00562 assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
00563 }
00564 fsg_pnode_add_ctxt(pnode, rc);
00565 }
00566
00567 if (p == 1) {
00568 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00569 pred = (fsg_pnode_t *) gnode_ptr(gn);
00570 pred->next.succ =
00571 (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
00572 }
00573 }
00574 else {
00575 pred->next.succ =
00576 (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
00577 }
00578 }
00579 }
00580
00581 ckd_free((void *) ssid_pnode_map);
00582 glist_free(lc_pnodelist);
00583 glist_free(rc_pnodelist);
00584 }
00585
00586 *alloc_head = head;
00587
00588 return root;
00589 }
00590
00591
00592
00593
00594
00595 static fsg_pnode_t *
00596 fsg_psubtree_init(fsg_lextree_t *lextree,
00597 fsg_model_t * fsg, int32 from_state,
00598 fsg_pnode_t ** alloc_head)
00599 {
00600 int32 dst;
00601 gnode_t *gn;
00602 fsg_link_t *fsglink;
00603 fsg_pnode_t *root;
00604 int32 n_ci;
00605
00606 root = NULL;
00607 assert(*alloc_head == NULL);
00608
00609 n_ci = bin_mdef_n_ciphone(lextree->mdef);
00610 if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) {
00611 E_FATAL
00612 ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n",
00613 FSG_PNODE_CTXT_BVSZ * 32);
00614 }
00615 for (dst = 0; dst < fsg_model_n_state(fsg); dst++) {
00616
00617 for (gn = fsg_model_trans(fsg, from_state, dst); gn;
00618 gn = gnode_next(gn)) {
00619
00620 fsglink = (fsg_link_t *) gnode_ptr(gn);
00621
00622 assert(fsg_link_wid(fsglink) >= 0);
00623
00624 root = psubtree_add_trans(lextree, root, fsglink,
00625 lextree->lc[from_state],
00626 lextree->rc[dst],
00627 alloc_head);
00628 }
00629 }
00630
00631 return root;
00632 }
00633
00634
00635 static void
00636 fsg_psubtree_free(fsg_pnode_t * head)
00637 {
00638 fsg_pnode_t *next;
00639
00640 while (head) {
00641 next = head->alloc_next;
00642 hmm_deinit(&head->hmm);
00643 ckd_free(head);
00644 head = next;
00645 }
00646 }
00647
00648 static void
00649 fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t * head, FILE * fp)
00650 {
00651 int32 i;
00652 fsg_link_t *tl;
00653
00654 for (; head; head = head->alloc_next) {
00655
00656 for (i = 0; i <= head->ppos; i++)
00657 fprintf(fp, " ");
00658
00659 fprintf(fp, "%p.@", head);
00660 fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&head->hmm));
00661 fprintf(fp, " %10d.LP", head->logs2prob);
00662 fprintf(fp, " %p.SIB", head->sibling);
00663 fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, head->ci_ext), head->ppos);
00664 if ((head->ppos == 0) || head->leaf) {
00665 fprintf(fp, " [");
00666 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
00667 fprintf(fp, "%08x", head->ctxt.bv[i]);
00668 fprintf(fp, "]");
00669 }
00670 if (head->leaf) {
00671 tl = head->next.fsglink;
00672 fprintf(fp, " {%s[%d->%d](%d)}",
00673 fsg_model_word_str(tree->fsg, tl->wid),
00674 tl->from_state, tl->to_state, tl->logs2prob);
00675 }
00676 else {
00677 fprintf(fp, " %p.NXT", head->next.succ);
00678 }
00679 fprintf(fp, "\n");
00680 }
00681
00682 fflush(fp);
00683 }
00684
00685
00686 void
00687 fsg_psubtree_pnode_deactivate(fsg_pnode_t * pnode)
00688 {
00689 hmm_clear(&pnode->hmm);
00690 }