src/libpocketsphinx/s3dict.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * dict.c -- Pronunciation dictionary.
00039  *
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1997 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log: dict.c,v $
00049  * Revision 1.7  2006/02/28  02:06:46  egouvea
00050  * Updated MS Visual C++ 6.0 support files. Fixed things that didn't
00051  * compile in Visual C++ (declarations didn't match, etc). There are
00052  * still some warnings, so this is not final. Also, sorted files in
00053  * several Makefile.am.
00054  * 
00055  * Revision 1.6  2006/02/22 20:55:06  arthchan2003
00056  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH:
00057  *
00058  * 1, Added Letter-to-sound LTS rule, dict_init will only specify
00059  * d->lts_rules to be true if the useLTS is specified.  Only if
00060  * d->lts_rules is specified, the LTS logic will be used. The code safe
00061  * guarded the case when a phone in mdef doesn't appear in LTS, in that
00062  * case, the code will force exit.
00063  *
00064  * 2, The LTS logic is only used as a reserved measure.  By default, it
00065  * is not turned on.  See also the comment in kbcore.c and the default
00066  * parameters in revision 1.3 cmdln_macro.h . We added it because we have
00067  * this functionality in SphinxTrain.
00068  *
00069  * Revision 1.5.4.2  2006/01/16 19:53:17  arthchan2003
00070  * Changed the option name from -ltsoov to -lts_mismatch
00071  *
00072  * Revision 1.5.4.1  2005/09/25 19:12:09  arthchan2003
00073  * Added optional LTS support for the dictionary.
00074  *
00075  * Revision 1.5  2005/06/21 21:04:36  arthchan2003
00076  * 1, Introduced a reporting routine. 2, Fixed doyxgen documentation, 3, Added  keyword.
00077  *
00078  * Revision 1.5  2005/06/19 03:58:16  archan
00079  * 1, Move checking of Silence wid, start wid, finish wid to dict_init. This unify the checking and remove several segments of redundant code. 2, Remove all startwid, silwid and finishwid.  They are artefacts of 3.0/3.x merging. This is already implemented in dict.  (In align, startwid, endwid, finishwid occured in several places.  Checking is also done multiple times.) 3, Making corresponding changes to all files which has variable startwid, silwid and finishwid.  Should make use of the marco more.
00080  *
00081  * Revision 1.4  2005/04/21 23:50:26  archan
00082  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
00083  *
00084  * Revision 1.3  2005/03/30 01:22:46  archan
00085  * Fixed mistakes in last updates. Add
00086  *
00087  * 19-Apr-01    Ricky Houghton, added code for freeing memory that is allocated internally.
00088  * 
00089  * 23-Apr-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00090  *              Made usage of mdef optional.  If no mdef is specified while loading
00091  *              a dictionary, it maintains the needed CI phone information internally.
00092  *              Added dict_ciphone_str().
00093  * 
00094  * 02-Jul-97    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00095  *              Added startwid, finishwid, silwid to dict_t.  Modified dict_filler_word
00096  *              to check for start and finishwid.
00097  * 
00098  * 07-Feb-97    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00099  *              Created from previous Sphinx-3 version.
00100  */
00101 
00102 
00103 #include <string.h>
00104 
00105 #include "strfuncs.h"
00106 #include "s3dict.h"
00107 
00108 
00109 #define DELIM   " \t\n"         /* Set of field separator characters */
00110 #define DEFAULT_NUM_PHONE       (MAX_S3CIPID+1)
00111 
00112 #if WIN32
00113 #define snprintf sprintf_s
00114 #endif 
00115 
00116 extern const char *const cmu6_lts_phone_table[];
00117 
00118 static s3cipid_t
00119 s3dict_ciphone_id(s3dict_t * d, const char *str)
00120 {
00121     return bin_mdef_ciphone_id(d->mdef, str);
00122 }
00123 
00124 
00125 const char *
00126 s3dict_ciphone_str(s3dict_t * d, s3wid_t wid, int32 pos)
00127 {
00128     assert(d != NULL);
00129     assert((wid >= 0) && (wid < d->n_word));
00130     assert((pos >= 0) && (pos < d->word[wid].pronlen));
00131 
00132     return bin_mdef_ciphone_str(d->mdef, d->word[wid].ciphone[pos]);
00133 }
00134 
00135 
00136 s3wid_t
00137 s3dict_add_word(s3dict_t * d, char *word, s3cipid_t * p, int32 np)
00138 {
00139     int32 len;
00140     dictword_t *wordp;
00141     s3wid_t newwid;
00142 
00143     if (d->n_word >= d->max_words) {
00144         E_INFO
00145             ("Dictionary max size (%d) exceeded; reallocate another entries %d \n",
00146              d->max_words, S3DICT_INC_SZ);
00147         d->word =
00148             (dictword_t *) ckd_realloc(d->word,
00149                                        (d->max_words +
00150                                         S3DICT_INC_SZ) * sizeof(dictword_t));
00151         d->max_words = d->max_words + S3DICT_INC_SZ;
00152 
00153         return (BAD_S3WID);
00154     }
00155 
00156     wordp = d->word + d->n_word;
00157     wordp->word = (char *) ckd_salloc(word);    /* Freed in s3dict_free */
00158 
00159     /* Associate word string with d->n_word in hash table */
00160     if (hash_table_enter_int32(d->ht, wordp->word, d->n_word) != d->n_word) {
00161         ckd_free(wordp->word);
00162         return (BAD_S3WID);
00163     }
00164 
00165     /* Fill in word entry, and set defaults */
00166     if (p && (np > 0)) {
00167         wordp->ciphone = (s3cipid_t *) ckd_malloc(np * sizeof(s3cipid_t));      /* Freed in s3dict_free */
00168         memcpy(wordp->ciphone, p, np * sizeof(s3cipid_t));
00169         wordp->pronlen = np;
00170     }
00171     else {
00172         wordp->ciphone = NULL;
00173         wordp->pronlen = 0;
00174     }
00175     wordp->alt = BAD_S3WID;
00176     wordp->basewid = d->n_word;
00177 
00178     /* Determine base/alt wids */
00179     if ((len = s3dict_word2basestr(word)) > 0) {
00180         int32 w;
00181 
00182         /* Truncated to a baseword string; find its ID */
00183         if (hash_table_lookup_int32(d->ht, word, &w) < 0) {
00184             word[len] = '(';    /* Get back the original word */
00185             E_FATAL("Missing base word for: %s\n", word);
00186         }
00187         else
00188             word[len] = '(';    /* Get back the original word */
00189 
00190         /* Link into alt list */
00191         wordp->basewid = w;
00192         wordp->alt = d->word[w].alt;
00193         d->word[w].alt = d->n_word;
00194     }
00195 
00196     newwid = d->n_word++;
00197 
00198     return newwid;
00199 }
00200 
00201 
00202 static int32
00203 s3dict_read(FILE * fp, s3dict_t * d)
00204 {
00205     char line[16384], **wptr;
00206     s3cipid_t p[4096];
00207     int32 lineno, nwd;
00208     s3wid_t w;
00209     int32 i, maxwd;
00210     s3cipid_t ci;
00211     int32 ph;
00212 
00213     maxwd = 4092;
00214     wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */
00215 
00216     lineno = 0;
00217     while (fgets(line, sizeof(line), fp) != NULL) {
00218         lineno++;
00219         if (line[0] == '#')     /* Comment line */
00220             continue;
00221 
00222         if ((nwd = str2words(line, wptr, maxwd)) < 0)
00223             E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line,
00224                     maxwd);
00225 
00226         if (nwd == 0)           /* Empty line */
00227             continue;
00228         /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */
00229         if (nwd == 1) {
00230             E_ERROR("Line %d: No pronunciation for word %s; ignored\n",
00231                     lineno, wptr[0]);
00232             continue;
00233         }
00234 
00235         /* Convert pronunciation string to CI-phone-ids */
00236         for (i = 1; i < nwd; i++) {
00237             p[i - 1] = s3dict_ciphone_id(d, wptr[i]);
00238             if (NOT_S3CIPID(p[i - 1])) {
00239                 E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",
00240                         lineno, wptr[i], wptr[0]);
00241                 break;
00242             }
00243         }
00244 
00245         if (i == nwd) {         /* All CI-phones successfully converted to IDs */
00246             w = s3dict_add_word(d, wptr[0], p, nwd - 1);
00247             if (NOT_S3WID(w))
00248                 E_ERROR
00249                     ("Line %d: s3dict_add_word (%s) failed (duplicate?); ignored\n",
00250                      lineno, wptr[0]);
00251         }
00252     }
00253 
00254 
00255     if (d->lts_rules) {
00256 
00257 #if 1                           /* Until we allow user to put in a mapping of the phoneset from LTS to the phoneset from mdef, 
00258                                    The checking will intrusively stop the recognizer.  */
00259 
00260         for (ci = 0; ci < bin_mdef_n_ciphone(d->mdef); ci++) {
00261 
00262             if (!bin_mdef_is_fillerphone(d->mdef, ci)) {
00263                 for (ph = 0; cmu6_lts_phone_table[ph] != NULL; ph++) {
00264 
00265                     /*        E_INFO("%s %s\n",cmu6_lts_phone_table[ph],mdef_ciphone_str(d->mdef,ci)); */
00266                     if (!strcmp
00267                         (cmu6_lts_phone_table[ph],
00268                          bin_mdef_ciphone_str(d->mdef, ci)))
00269                         break;
00270                 }
00271                 if (cmu6_lts_phone_table[ph] == NULL) {
00272                     E_FATAL
00273                         ("A phone in the model definition doesn't appear in the letter to sound ",
00274                          "rules. \n This is case we don't recommend user to ",
00275                          "use the built-in LTS. \n Please kindly turn off ",
00276                          "-lts_mismatch\n");
00277                 }
00278             }
00279         }
00280 #endif
00281     }
00282     ckd_free(wptr);
00283 
00284     return 0;
00285 }
00286 
00287 s3dict_t *
00288 s3dict_init(bin_mdef_t * mdef, const char *dictfile, const char *fillerfile,
00289             int useLTS, int breport)
00290 {
00291     FILE *fp, *fp2;
00292     int32 n;
00293     char line[1024];
00294     s3dict_t *d;
00295     s3cipid_t sil;
00296 
00297     if (!dictfile)
00298         E_FATAL("No dictionary file\n");
00299 
00300     /*
00301      * First obtain #words in dictionary (for hash table allocation).
00302      * Reason: The PC NT system doesn't like to grow memory gradually.  Better to allocate
00303      * all the required memory in one go.
00304      */
00305     if ((fp = fopen(dictfile, "r")) == NULL)
00306         E_FATAL_SYSTEM("fopen(%s,r) failed\n", dictfile);
00307     n = 0;
00308     while (fgets(line, sizeof(line), fp) != NULL) {
00309         if (line[0] != '#')
00310             n++;
00311     }
00312     rewind(fp);
00313 
00314     fp2 = NULL;
00315     if (fillerfile) {
00316         if ((fp2 = fopen(fillerfile, "r")) == NULL)
00317             E_FATAL_SYSTEM("fopen(%s,r) failed\n", fillerfile);
00318 
00319         while (fgets(line, sizeof(line), fp2) != NULL) {
00320             if (line[0] != '#')
00321                 n++;
00322         }
00323         rewind(fp2);
00324     }
00325 
00326     /*
00327      * Allocate dict entries.  HACK!!  Allow some extra entries for words not in file.
00328      * Also check for type size restrictions.
00329      */
00330     d = (s3dict_t *) ckd_calloc(1, sizeof(s3dict_t));       /* freed in s3dict_free() */
00331     d->refcnt = 1;
00332     d->max_words =
00333         (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID;
00334     if (n >= MAX_S3WID)
00335         E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n,
00336                 MAX_S3WID);
00337 
00338     d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t));      /* freed in s3dict_free() */
00339     d->n_word = 0;
00340     d->mdef = bin_mdef_retain(mdef);
00341 
00342     /* Create new hash table for word strings; case-insensitive word strings */
00343     d->ht = hash_table_new(d->max_words, 1 /* no-case */ );
00344 
00345     d->lts_rules = NULL;
00346     if (useLTS)
00347         d->lts_rules = (lts_t *) & (cmu6_lts_rules);
00348 
00349 
00350     /* Digest main dictionary file */
00351     E_INFO("Reading main dictionary: %s\n", dictfile);
00352     s3dict_read(fp, d);
00353     fclose(fp);
00354     E_INFO("%d words read\n", d->n_word);
00355 
00356     /* Now the filler dictionary file, if it exists */
00357     d->filler_start = d->n_word;
00358     if (fillerfile) {
00359         E_INFO("Reading filler dictionary: %s\n", fillerfile);
00360         s3dict_read(fp2, d);
00361         fclose(fp2);
00362         E_INFO("%d words read\n", d->n_word - d->filler_start);
00363     }
00364     sil = bin_mdef_silphone(mdef);
00365     if (s3dict_wordid(d, S3_START_WORD) == BAD_S3WID) {
00366         s3dict_add_word(d, S3_START_WORD, &sil, 1);
00367     }
00368     if (s3dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) {
00369         s3dict_add_word(d, S3_FINISH_WORD, &sil, 1);
00370     }
00371     if (s3dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) {
00372         s3dict_add_word(d, S3_SILENCE_WORD, &sil, 1);
00373     }
00374 
00375     d->filler_end = d->n_word - 1;
00376 
00377     /* Initialize distinguished word-ids */
00378     d->startwid = s3dict_wordid(d, S3_START_WORD);
00379     d->finishwid = s3dict_wordid(d, S3_FINISH_WORD);
00380     d->silwid = s3dict_wordid(d, S3_SILENCE_WORD);
00381 
00382     if ((d->filler_start > d->filler_end)
00383         || (!s3dict_filler_word(d, d->silwid)))
00384         E_FATAL("%s must occur (only) in filler dictionary\n",
00385                 S3_SILENCE_WORD);
00386 
00387     /* No check that alternative pronunciations for filler words are in filler range!! */
00388 
00389     return d;
00390 }
00391 
00392 
00393 s3wid_t
00394 s3dict_wordid(s3dict_t * d, const char *word)
00395 {
00396     int32 w;
00397 
00398     assert(d);
00399     assert(word);
00400 
00401     if (hash_table_lookup_int32(d->ht, word, &w) < 0)
00402         return (BAD_S3WID);
00403     return w;
00404 }
00405 
00406 
00407 s3wid_t
00408 _s3dict_basewid(s3dict_t * d, s3wid_t w)
00409 {
00410     assert(d);
00411     assert((w >= 0) && (w < d->n_word));
00412 
00413     return (d->word[w].basewid);
00414 }
00415 
00416 
00417 char *
00418 _s3dict_wordstr(s3dict_t * d, s3wid_t wid)
00419 {
00420     assert(d);
00421     assert(IS_S3WID(wid) && (wid < d->n_word));
00422 
00423     return (d->word[wid].word);
00424 }
00425 
00426 
00427 s3wid_t
00428 _s3dict_nextalt(s3dict_t * d, s3wid_t wid)
00429 {
00430     assert(d);
00431     assert(IS_S3WID(wid) && (wid < d->n_word));
00432 
00433     return (d->word[wid].alt);
00434 }
00435 
00436 
00437 int
00438 s3dict_filler_word(s3dict_t * d, s3wid_t w)
00439 {
00440     assert(d);
00441     assert((w >= 0) && (w < d->n_word));
00442 
00443     w = s3dict_basewid(d, w);
00444     if ((w == d->startwid) || (w == d->finishwid))
00445         return 0;
00446     if ((w >= d->filler_start) && (w <= d->filler_end))
00447         return 1;
00448     return 0;
00449 }
00450 
00451 int
00452 s3dict_real_word(s3dict_t * d, s3wid_t w)
00453 {
00454     assert(d);
00455     assert((w >= 0) && (w < d->n_word));
00456 
00457     w = s3dict_basewid(d, w);
00458     if ((w == d->startwid) || (w == d->finishwid))
00459         return 0;
00460     if ((w >= d->filler_start) && (w <= d->filler_end))
00461         return 0;
00462     return 1;
00463 }
00464 
00465 
00466 int32
00467 s3dict_word2basestr(char *word)
00468 {
00469     int32 i, len;
00470 
00471     len = strlen(word);
00472     if (word[len - 1] == ')') {
00473         for (i = len - 2; (i > 0) && (word[i] != '('); --i);
00474 
00475         if (i > 0) {
00476             /* The word is of the form <baseword>(...); strip from left-paren */
00477             word[i] = '\0';
00478             return i;
00479         }
00480     }
00481 
00482     return -1;
00483 }
00484 
00485 s3dict_t *
00486 s3dict_retain(s3dict_t *d)
00487 {
00488     ++d->refcnt;
00489     return d;
00490 }
00491 
00492 int
00493 s3dict_free(s3dict_t * d)
00494 {
00495     int i;
00496     dictword_t *word;
00497 
00498     if (d == NULL)
00499         return 0;
00500     if (--d->refcnt > 0)
00501         return d->refcnt;
00502 
00503     /* First Step, free all memory allocated for each word */
00504     for (i = 0; i < d->n_word; i++) {
00505         word = (dictword_t *) & (d->word[i]);
00506         if (word->word)
00507             ckd_free((void *) word->word);
00508         if (word->ciphone)
00509             ckd_free((void *) word->ciphone);
00510     }
00511 
00512     if (d->word)
00513         ckd_free((void *) d->word);
00514     if (d->ht)
00515         hash_table_free(d->ht);
00516     bin_mdef_free(d->mdef);
00517     ckd_free((void *) d);
00518 
00519     return 0;
00520 }
00521 
00522 void
00523 s3dict_report(s3dict_t * d)
00524 {
00525     E_INFO_NOFN("Initialization of s3dict_t, report:\n");
00526     E_INFO_NOFN("Max word: %d\n", d->max_words);
00527     E_INFO_NOFN("No of word: %d\n", d->n_word);
00528     E_INFO_NOFN("\n");
00529 }

Generated on Mon Jan 24 21:50:16 2011 for PocketSphinx by  doxygen 1.4.7