00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 #include <string.h>
00104
00105 #include "strfuncs.h"
00106 #include "s3dict.h"
00107
00108
00109 #define DELIM " \t\n"
00110 #define DEFAULT_NUM_PHONE (MAX_S3CIPID+1)
00111
00112 #if WIN32
00113 #define snprintf sprintf_s
00114 #endif
00115
00116 extern const char *const cmu6_lts_phone_table[];
00117
00118 static s3cipid_t
00119 s3dict_ciphone_id(s3dict_t * d, const char *str)
00120 {
00121 return bin_mdef_ciphone_id(d->mdef, str);
00122 }
00123
00124
00125 const char *
00126 s3dict_ciphone_str(s3dict_t * d, s3wid_t wid, int32 pos)
00127 {
00128 assert(d != NULL);
00129 assert((wid >= 0) && (wid < d->n_word));
00130 assert((pos >= 0) && (pos < d->word[wid].pronlen));
00131
00132 return bin_mdef_ciphone_str(d->mdef, d->word[wid].ciphone[pos]);
00133 }
00134
00135
00136 s3wid_t
00137 s3dict_add_word(s3dict_t * d, char *word, s3cipid_t * p, int32 np)
00138 {
00139 int32 len;
00140 dictword_t *wordp;
00141 s3wid_t newwid;
00142
00143 if (d->n_word >= d->max_words) {
00144 E_INFO
00145 ("Dictionary max size (%d) exceeded; reallocate another entries %d \n",
00146 d->max_words, S3DICT_INC_SZ);
00147 d->word =
00148 (dictword_t *) ckd_realloc(d->word,
00149 (d->max_words +
00150 S3DICT_INC_SZ) * sizeof(dictword_t));
00151 d->max_words = d->max_words + S3DICT_INC_SZ;
00152
00153 return (BAD_S3WID);
00154 }
00155
00156 wordp = d->word + d->n_word;
00157 wordp->word = (char *) ckd_salloc(word);
00158
00159
00160 if (hash_table_enter_int32(d->ht, wordp->word, d->n_word) != d->n_word) {
00161 ckd_free(wordp->word);
00162 return (BAD_S3WID);
00163 }
00164
00165
00166 if (p && (np > 0)) {
00167 wordp->ciphone = (s3cipid_t *) ckd_malloc(np * sizeof(s3cipid_t));
00168 memcpy(wordp->ciphone, p, np * sizeof(s3cipid_t));
00169 wordp->pronlen = np;
00170 }
00171 else {
00172 wordp->ciphone = NULL;
00173 wordp->pronlen = 0;
00174 }
00175 wordp->alt = BAD_S3WID;
00176 wordp->basewid = d->n_word;
00177
00178
00179 if ((len = s3dict_word2basestr(word)) > 0) {
00180 int32 w;
00181
00182
00183 if (hash_table_lookup_int32(d->ht, word, &w) < 0) {
00184 word[len] = '(';
00185 E_FATAL("Missing base word for: %s\n", word);
00186 }
00187 else
00188 word[len] = '(';
00189
00190
00191 wordp->basewid = w;
00192 wordp->alt = d->word[w].alt;
00193 d->word[w].alt = d->n_word;
00194 }
00195
00196 newwid = d->n_word++;
00197
00198 return newwid;
00199 }
00200
00201
00202 static int32
00203 s3dict_read(FILE * fp, s3dict_t * d)
00204 {
00205 char line[16384], **wptr;
00206 s3cipid_t p[4096];
00207 int32 lineno, nwd;
00208 s3wid_t w;
00209 int32 i, maxwd;
00210 s3cipid_t ci;
00211 int32 ph;
00212
00213 maxwd = 4092;
00214 wptr = (char **) ckd_calloc(maxwd, sizeof(char *));
00215
00216 lineno = 0;
00217 while (fgets(line, sizeof(line), fp) != NULL) {
00218 lineno++;
00219 if (line[0] == '#')
00220 continue;
00221
00222 if ((nwd = str2words(line, wptr, maxwd)) < 0)
00223 E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line,
00224 maxwd);
00225
00226 if (nwd == 0)
00227 continue;
00228
00229 if (nwd == 1) {
00230 E_ERROR("Line %d: No pronunciation for word %s; ignored\n",
00231 lineno, wptr[0]);
00232 continue;
00233 }
00234
00235
00236 for (i = 1; i < nwd; i++) {
00237 p[i - 1] = s3dict_ciphone_id(d, wptr[i]);
00238 if (NOT_S3CIPID(p[i - 1])) {
00239 E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",
00240 lineno, wptr[i], wptr[0]);
00241 break;
00242 }
00243 }
00244
00245 if (i == nwd) {
00246 w = s3dict_add_word(d, wptr[0], p, nwd - 1);
00247 if (NOT_S3WID(w))
00248 E_ERROR
00249 ("Line %d: s3dict_add_word (%s) failed (duplicate?); ignored\n",
00250 lineno, wptr[0]);
00251 }
00252 }
00253
00254
00255 if (d->lts_rules) {
00256
00257 #if 1
00258
00259
00260 for (ci = 0; ci < bin_mdef_n_ciphone(d->mdef); ci++) {
00261
00262 if (!bin_mdef_is_fillerphone(d->mdef, ci)) {
00263 for (ph = 0; cmu6_lts_phone_table[ph] != NULL; ph++) {
00264
00265
00266 if (!strcmp
00267 (cmu6_lts_phone_table[ph],
00268 bin_mdef_ciphone_str(d->mdef, ci)))
00269 break;
00270 }
00271 if (cmu6_lts_phone_table[ph] == NULL) {
00272 E_FATAL
00273 ("A phone in the model definition doesn't appear in the letter to sound ",
00274 "rules. \n This is case we don't recommend user to ",
00275 "use the built-in LTS. \n Please kindly turn off ",
00276 "-lts_mismatch\n");
00277 }
00278 }
00279 }
00280 #endif
00281 }
00282 ckd_free(wptr);
00283
00284 return 0;
00285 }
00286
00287 s3dict_t *
00288 s3dict_init(bin_mdef_t * mdef, const char *dictfile, const char *fillerfile,
00289 int useLTS, int breport)
00290 {
00291 FILE *fp, *fp2;
00292 int32 n;
00293 char line[1024];
00294 s3dict_t *d;
00295 s3cipid_t sil;
00296
00297 if (!dictfile)
00298 E_FATAL("No dictionary file\n");
00299
00300
00301
00302
00303
00304
00305 if ((fp = fopen(dictfile, "r")) == NULL)
00306 E_FATAL_SYSTEM("fopen(%s,r) failed\n", dictfile);
00307 n = 0;
00308 while (fgets(line, sizeof(line), fp) != NULL) {
00309 if (line[0] != '#')
00310 n++;
00311 }
00312 rewind(fp);
00313
00314 fp2 = NULL;
00315 if (fillerfile) {
00316 if ((fp2 = fopen(fillerfile, "r")) == NULL)
00317 E_FATAL_SYSTEM("fopen(%s,r) failed\n", fillerfile);
00318
00319 while (fgets(line, sizeof(line), fp2) != NULL) {
00320 if (line[0] != '#')
00321 n++;
00322 }
00323 rewind(fp2);
00324 }
00325
00326
00327
00328
00329
00330 d = (s3dict_t *) ckd_calloc(1, sizeof(s3dict_t));
00331 d->refcnt = 1;
00332 d->max_words =
00333 (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID;
00334 if (n >= MAX_S3WID)
00335 E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n,
00336 MAX_S3WID);
00337
00338 d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t));
00339 d->n_word = 0;
00340 d->mdef = bin_mdef_retain(mdef);
00341
00342
00343 d->ht = hash_table_new(d->max_words, 1 );
00344
00345 d->lts_rules = NULL;
00346 if (useLTS)
00347 d->lts_rules = (lts_t *) & (cmu6_lts_rules);
00348
00349
00350
00351 E_INFO("Reading main dictionary: %s\n", dictfile);
00352 s3dict_read(fp, d);
00353 fclose(fp);
00354 E_INFO("%d words read\n", d->n_word);
00355
00356
00357 d->filler_start = d->n_word;
00358 if (fillerfile) {
00359 E_INFO("Reading filler dictionary: %s\n", fillerfile);
00360 s3dict_read(fp2, d);
00361 fclose(fp2);
00362 E_INFO("%d words read\n", d->n_word - d->filler_start);
00363 }
00364 sil = bin_mdef_silphone(mdef);
00365 if (s3dict_wordid(d, S3_START_WORD) == BAD_S3WID) {
00366 s3dict_add_word(d, S3_START_WORD, &sil, 1);
00367 }
00368 if (s3dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) {
00369 s3dict_add_word(d, S3_FINISH_WORD, &sil, 1);
00370 }
00371 if (s3dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) {
00372 s3dict_add_word(d, S3_SILENCE_WORD, &sil, 1);
00373 }
00374
00375 d->filler_end = d->n_word - 1;
00376
00377
00378 d->startwid = s3dict_wordid(d, S3_START_WORD);
00379 d->finishwid = s3dict_wordid(d, S3_FINISH_WORD);
00380 d->silwid = s3dict_wordid(d, S3_SILENCE_WORD);
00381
00382 if ((d->filler_start > d->filler_end)
00383 || (!s3dict_filler_word(d, d->silwid)))
00384 E_FATAL("%s must occur (only) in filler dictionary\n",
00385 S3_SILENCE_WORD);
00386
00387
00388
00389 return d;
00390 }
00391
00392
00393 s3wid_t
00394 s3dict_wordid(s3dict_t * d, const char *word)
00395 {
00396 int32 w;
00397
00398 assert(d);
00399 assert(word);
00400
00401 if (hash_table_lookup_int32(d->ht, word, &w) < 0)
00402 return (BAD_S3WID);
00403 return w;
00404 }
00405
00406
00407 s3wid_t
00408 _s3dict_basewid(s3dict_t * d, s3wid_t w)
00409 {
00410 assert(d);
00411 assert((w >= 0) && (w < d->n_word));
00412
00413 return (d->word[w].basewid);
00414 }
00415
00416
00417 char *
00418 _s3dict_wordstr(s3dict_t * d, s3wid_t wid)
00419 {
00420 assert(d);
00421 assert(IS_S3WID(wid) && (wid < d->n_word));
00422
00423 return (d->word[wid].word);
00424 }
00425
00426
00427 s3wid_t
00428 _s3dict_nextalt(s3dict_t * d, s3wid_t wid)
00429 {
00430 assert(d);
00431 assert(IS_S3WID(wid) && (wid < d->n_word));
00432
00433 return (d->word[wid].alt);
00434 }
00435
00436
00437 int
00438 s3dict_filler_word(s3dict_t * d, s3wid_t w)
00439 {
00440 assert(d);
00441 assert((w >= 0) && (w < d->n_word));
00442
00443 w = s3dict_basewid(d, w);
00444 if ((w == d->startwid) || (w == d->finishwid))
00445 return 0;
00446 if ((w >= d->filler_start) && (w <= d->filler_end))
00447 return 1;
00448 return 0;
00449 }
00450
00451 int
00452 s3dict_real_word(s3dict_t * d, s3wid_t w)
00453 {
00454 assert(d);
00455 assert((w >= 0) && (w < d->n_word));
00456
00457 w = s3dict_basewid(d, w);
00458 if ((w == d->startwid) || (w == d->finishwid))
00459 return 0;
00460 if ((w >= d->filler_start) && (w <= d->filler_end))
00461 return 0;
00462 return 1;
00463 }
00464
00465
00466 int32
00467 s3dict_word2basestr(char *word)
00468 {
00469 int32 i, len;
00470
00471 len = strlen(word);
00472 if (word[len - 1] == ')') {
00473 for (i = len - 2; (i > 0) && (word[i] != '('); --i);
00474
00475 if (i > 0) {
00476
00477 word[i] = '\0';
00478 return i;
00479 }
00480 }
00481
00482 return -1;
00483 }
00484
00485 s3dict_t *
00486 s3dict_retain(s3dict_t *d)
00487 {
00488 ++d->refcnt;
00489 return d;
00490 }
00491
00492 int
00493 s3dict_free(s3dict_t * d)
00494 {
00495 int i;
00496 dictword_t *word;
00497
00498 if (d == NULL)
00499 return 0;
00500 if (--d->refcnt > 0)
00501 return d->refcnt;
00502
00503
00504 for (i = 0; i < d->n_word; i++) {
00505 word = (dictword_t *) & (d->word[i]);
00506 if (word->word)
00507 ckd_free((void *) word->word);
00508 if (word->ciphone)
00509 ckd_free((void *) word->ciphone);
00510 }
00511
00512 if (d->word)
00513 ckd_free((void *) d->word);
00514 if (d->ht)
00515 hash_table_free(d->ht);
00516 bin_mdef_free(d->mdef);
00517 ckd_free((void *) d);
00518
00519 return 0;
00520 }
00521
00522 void
00523 s3dict_report(s3dict_t * d)
00524 {
00525 E_INFO_NOFN("Initialization of s3dict_t, report:\n");
00526 E_INFO_NOFN("Max word: %d\n", d->max_words);
00527 E_INFO_NOFN("No of word: %d\n", d->n_word);
00528 E_INFO_NOFN("\n");
00529 }