src/libpocketsphinx/fillpen.c

00001 /* ====================================================================
00002  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00003  * reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice, this list of conditions and the following disclaimer. 
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  *
00017  * This work was supported in part by funding from the Defense Advanced 
00018  * Research Projects Agency and the National Science Foundation of the 
00019  * United States of America, and the CMU Sphinx Speech Consortium.
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00022  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00023  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00024  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00025  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00027  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00028  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00029  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00030  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00031  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * ====================================================================
00034  *
00035  */
00036 /*
00037  * fillpen.c -- Filler penalties (penalties for words that do not show up in
00038  * the main LM.
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1996 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log$
00049  * Revision 1.6  2006/02/23  04:11:13  arthchan2003
00050  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Added silprob and fillprob. Added fillpen_report.
00051  * 
00052  * Revision 1.5.4.1  2005/06/28 06:59:04  arthchan2003
00053  * Add silence probability and filler probability as members of fillpen_t, add reporting functions.
00054  *
00055  * Revision 1.5  2005/06/21 21:09:22  arthchan2003
00056  * 1, Fixed doxygen documentation. 2, Added  keyword.
00057  *
00058  * Revision 1.3  2005/03/30 01:22:46  archan
00059  * Fixed mistakes in last updates. Add
00060  *
00061  * 20-Apr-2001  Ricky Houghton (ricky.houghton@cs.cmu.edu or rhoughton@mediasite.com)
00062  *              Added fillpen_free to free memory allocated by fillpen_init
00063  * 
00064  * 30-Dec-2000  Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
00065  *              Removed language weight application to wip. To maintain
00066  *              comparability between s3decode and current decoder. Does
00067  *              not affect decoding performance.
00068  *
00069  * 24-Feb-2000  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00070  *              Bugfix: Applied language weight to word insertion penalty.
00071  * 
00072  * 11-Oct-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00073  *              Created.
00074  */
00075 
00076 
00077 #include "fillpen.h"
00078 
00079 fillpen_t *
00080 fillpen_init(s3dict_t * dict, const char *file, float64 silprob, float64 fillprob,
00081              float64 lw, float64 wip, logmath_t *logmath)
00082 {
00083     s3wid_t w, bw;
00084     float64 prob;
00085     FILE *fp;
00086     char line[1024], wd[1024];
00087     int32 k;
00088     fillpen_t *_fillpen;
00089 
00090     _fillpen = (fillpen_t *) ckd_calloc(1, sizeof(fillpen_t));
00091 
00092     _fillpen->dict = dict;
00093     _fillpen->lw = lw;
00094     _fillpen->wip = wip;
00095     _fillpen->silprob = silprob;
00096     _fillpen->fillerprob = fillprob;
00097     if (dict->filler_end >= dict->filler_start)
00098         _fillpen->prob =
00099             (int32 *) ckd_calloc(dict->filler_end - dict->filler_start + 1,
00100                                  sizeof(int32));
00101     else
00102         _fillpen->prob = NULL;
00103 
00104     /* Initialize all words with filler penalty (HACK!! backward compatibility) */
00105     prob = fillprob;
00106     for (w = dict->filler_start; w <= dict->filler_end; w++)
00107     _fillpen->prob[w - dict->filler_start] =
00108         (int32) ((logmath_log(logmath, prob) * lw + logmath_log(logmath, wip)));
00109 
00110     /* Overwrite silence penalty (HACK!! backward compatibility) */
00111     w = s3dict_wordid(dict, S3_SILENCE_WORD);
00112     if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end))
00113         E_FATAL("%s not a filler word in the given dictionary\n",
00114                 S3_SILENCE_WORD);
00115     prob = silprob;
00116     _fillpen->prob[w - dict->filler_start] =
00117         (int32) ((logmath_log(logmath, prob) * lw + logmath_log(logmath, wip)));
00118 
00119     /* Overwrite with filler prob input file, if specified */
00120     if (!file)
00121         return _fillpen;
00122 
00123     E_INFO("Reading filler penalty file: %s\n", file);
00124     if ((fp = fopen(file, "r")) == NULL)
00125         E_FATAL("fopen(%s,r) failed\n", file);
00126     while (fgets(line, sizeof(line), fp) != NULL) {
00127         if (line[0] == '#')     /* Skip comment lines */
00128             continue;
00129 
00130         k = sscanf(line, "%s %lf", wd, &prob);
00131         if ((k != 0) && (k != 2))
00132             E_FATAL("Bad input line: %s\n", line);
00133         w = s3dict_wordid(dict, wd);
00134         if (NOT_S3WID(w) || (w < dict->filler_start)
00135             || (w > dict->filler_end))
00136             E_FATAL("%s not a filler word in the given dictionary\n",
00137                     S3_SILENCE_WORD);
00138 
00139         _fillpen->prob[w - dict->filler_start] =
00140             (int32) ((logmath_log(logmath, prob) * lw + logmath_log(logmath, wip)));
00141     }
00142     fclose(fp);
00143 
00144     /* Replicate fillpen values for alternative pronunciations */
00145     for (w = dict->filler_start; w <= dict->filler_end; w++) {
00146         bw = s3dict_basewid(dict, w);
00147         if (bw != w)
00148             _fillpen->prob[w - dict->filler_start] =
00149                 _fillpen->prob[bw - dict->filler_start];
00150     }
00151 
00152     return _fillpen;
00153 }
00154 
00155 void
00156 fillpen_report(fillpen_t * f)
00157 {
00158     E_INFO_NOFN("Initialization of fillpen_t, report:\n");
00159     E_INFO_NOFN("Language weight =%f \n", f->lw);
00160     E_INFO_NOFN("Word Insertion Penalty =%f \n", f->wip);
00161     E_INFO_NOFN("Silence probability =%f \n", f->silprob);
00162     E_INFO_NOFN("Filler probability =%f \n", f->fillerprob);
00163     E_INFO_NOFN("\n");
00164 
00165 }
00166 
00167 int32
00168 fillpen(fillpen_t * f, s3wid_t w)
00169 {
00170     assert((w >= f->dict->filler_start) && (w <= f->dict->filler_end));
00171     return (f->prob[w - f->dict->filler_start]);
00172 }
00173 
00174 
00175 /* RAH, free memory allocated above */
00176 void
00177 fillpen_free(fillpen_t * f)
00178 {
00179     if (f) {
00180         if (f->prob)
00181             ckd_free((void *) f->prob);
00182         ckd_free((void *) f);
00183     }
00184 }

Generated on Mon Jan 24 21:50:15 2011 for PocketSphinx by  doxygen 1.4.7