include/ngram_model.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2007 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00043 #ifndef __NGRAM_MODEL_H__
00044 #define __NGRAM_MODEL_H__
00045 
00046 /* Win32/WinCE DLL gunk */
00047 #include <sphinxbase_export.h>
00048 #include <prim_type.h>
00049 #include <cmd_ln.h>
00050 #include <logmath.h>
00051 #include <mmio.h>
00052 #include <stdarg.h>
00053 
00054 #ifdef __cplusplus
00055 extern "C" {
00056 #endif
00057 #if 0
00058 /* Fool Emacs. */
00059 }
00060 #endif
00061 
00065 typedef struct ngram_model_s ngram_model_t;
00066 
00070 typedef struct ngram_class_s ngram_class_t;
00071 
00075 typedef enum ngram_file_type_e {
00076     NGRAM_AUTO,  
00077     NGRAM_ARPA,  
00078     NGRAM_DMP,   
00079     NGRAM_DMP32  
00080 } ngram_file_type_t;
00081 
00082 #define NGRAM_INVALID_WID -1 
00104 SPHINXBASE_EXPORT
00105 ngram_model_t *ngram_model_read(cmd_ln_t *config,
00106                                 const char *file_name,
00107                                 ngram_file_type_t file_type,
00108                                 logmath_t *lmath);
00109 
00117 SPHINXBASE_EXPORT
00118 int ngram_model_write(ngram_model_t *model, const char *file_name,
00119                       ngram_file_type_t format);
00120 
00126 SPHINXBASE_EXPORT
00127 ngram_model_t *ngram_model_retain(ngram_model_t *model);
00128 
00134 SPHINXBASE_EXPORT
00135 int ngram_model_free(ngram_model_t *model);
00136 
00153 SPHINXBASE_EXPORT
00154 int ngram_model_recode(ngram_model_t *model, const char *from, const char *to);
00155 
00167 SPHINXBASE_EXPORT
00168 int ngram_model_apply_weights(ngram_model_t *model,
00169                               float32 lw, float32 wip, float32 uw);
00170 
00179 SPHINXBASE_EXPORT
00180 float32 ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip,
00181                                 int32 *out_log_uw);
00182 
00215 SPHINXBASE_EXPORT
00216 int32 ngram_score(ngram_model_t *model, const char *word, ...);
00217 
00221 SPHINXBASE_EXPORT
00222 int32 ngram_tg_score(ngram_model_t *model,
00223                      int32 w3, int32 w2, int32 w1,
00224                      int32 *n_used);
00225 
00229 SPHINXBASE_EXPORT
00230 int32 ngram_bg_score(ngram_model_t *model,
00231                      int32 w2, int32 w1,
00232                      int32 *n_used);
00233 
00237 SPHINXBASE_EXPORT
00238 int32 ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history,
00239                      int32 n_hist, int32 *n_used);
00240 
00251 SPHINXBASE_EXPORT
00252 int32 ngram_prob(ngram_model_t *model, const char *word, ...);
00253 
00260 SPHINXBASE_EXPORT
00261 int32 ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history,
00262                     int32 n_hist, int32 *n_used);
00263 
00275 SPHINXBASE_EXPORT
00276 int32 ngram_score_to_prob(ngram_model_t *model, int32 score);
00277 
00281 SPHINXBASE_EXPORT
00282 int32 ngram_wid(ngram_model_t *model, const char *word);
00283 
00287 SPHINXBASE_EXPORT
00288 const char *ngram_word(ngram_model_t *model, int32 wid);
00289 
00303 SPHINXBASE_EXPORT
00304 int32 ngram_unknown_wid(ngram_model_t *model);
00305 
00309 SPHINXBASE_EXPORT
00310 int32 ngram_zero(ngram_model_t *model);
00311 
00315 SPHINXBASE_EXPORT
00316 int32 ngram_model_get_size(ngram_model_t *model);
00317 
00321 SPHINXBASE_EXPORT
00322 int32 const *ngram_model_get_counts(ngram_model_t *model);
00323 
00327 typedef struct ngram_iter_s ngram_iter_t;
00328 
00337 SPHINXBASE_EXPORT
00338 ngram_iter_t *ngram_model_mgrams(ngram_model_t *model, int m);
00339 
00343 SPHINXBASE_EXPORT
00344 ngram_iter_t *ngram_iter(ngram_model_t *model, const char *word, ...);
00345 
00349 SPHINXBASE_EXPORT
00350 ngram_iter_t *ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist);
00351 
00360 SPHINXBASE_EXPORT
00361 int32 const *ngram_iter_get(ngram_iter_t *itor,
00362                             int32 *out_score,
00363                             int32 *out_bowt);
00364 
00370 SPHINXBASE_EXPORT
00371 ngram_iter_t *ngram_iter_successors(ngram_iter_t *itor);
00372 
00376 SPHINXBASE_EXPORT
00377 ngram_iter_t *ngram_iter_next(ngram_iter_t *itor);
00378 
00382 SPHINXBASE_EXPORT
00383 void ngram_iter_free(ngram_iter_t *itor);
00384 
00397 SPHINXBASE_EXPORT
00398 int32 ngram_model_add_word(ngram_model_t *model,
00399                            const char *word, float32 weight);
00400 
00414 SPHINXBASE_EXPORT
00415 int32 ngram_model_read_classdef(ngram_model_t *model,
00416                                 const char *file_name);
00417 
00426 SPHINXBASE_EXPORT
00427 int32 ngram_model_add_class(ngram_model_t *model,
00428                             const char *classname,
00429                             float32 classweight,
00430                             char **words,
00431                             const float32 *weights,
00432                             int32 n_words);
00433 
00443 SPHINXBASE_EXPORT
00444 int32 ngram_model_add_class_word(ngram_model_t *model,
00445                                  const char *classname,
00446                                  const char *word,
00447                                  float32 weight);
00448 
00473 SPHINXBASE_EXPORT
00474 ngram_model_t *ngram_model_set_init(cmd_ln_t *config,
00475                                     ngram_model_t **models,
00476                                     char **names,
00477                                     const float32 *weights,
00478                                     int32 n_models);
00479 
00510 SPHINXBASE_EXPORT
00511 ngram_model_t *ngram_model_set_read(cmd_ln_t *config,
00512                                     const char *lmctlfile,
00513                                     logmath_t *lmath);
00514 
00518 SPHINXBASE_EXPORT
00519 int32 ngram_model_set_count(ngram_model_t *set);
00520 
00524 typedef struct ngram_model_set_iter_s ngram_model_set_iter_t;
00525 
00531 SPHINXBASE_EXPORT
00532 ngram_model_set_iter_t *ngram_model_set_iter(ngram_model_t *set);
00533 
00539 SPHINXBASE_EXPORT
00540 ngram_model_set_iter_t *ngram_model_set_iter_next(ngram_model_set_iter_t *itor);
00541 
00545 SPHINXBASE_EXPORT
00546 void ngram_model_set_iter_free(ngram_model_set_iter_t *itor);
00547 
00555 SPHINXBASE_EXPORT
00556 ngram_model_t *ngram_model_set_iter_model(ngram_model_set_iter_t *itor,
00557                                           char const **lmname);
00558 
00565 SPHINXBASE_EXPORT
00566 ngram_model_t *ngram_model_set_select(ngram_model_t *set,
00567                                       const char *name);
00568 
00575 SPHINXBASE_EXPORT
00576 ngram_model_t *ngram_model_set_lookup(ngram_model_t *set,
00577                                       const char *name);
00578 
00582 SPHINXBASE_EXPORT
00583 const char *ngram_model_set_current(ngram_model_t *set);
00584 
00592 SPHINXBASE_EXPORT
00593 ngram_model_t *ngram_model_set_interp(ngram_model_t *set,
00594                                       const char **names,
00595                                       const float32 *weights);
00596 
00609 SPHINXBASE_EXPORT
00610 ngram_model_t *ngram_model_set_add(ngram_model_t *set,
00611                                    ngram_model_t *model,
00612                                    const char *name,
00613                                    float32 weight,
00614                                    int reuse_widmap);
00615 
00624 SPHINXBASE_EXPORT
00625 ngram_model_t *ngram_model_set_remove(ngram_model_t *set,
00626                                       const char *name,
00627                                       int reuse_widmap);
00628 
00632 SPHINXBASE_EXPORT
00633 void ngram_model_set_map_words(ngram_model_t *set,
00634                                const char **words,
00635                                int32 n_words);
00636 
00644 SPHINXBASE_EXPORT
00645 int32 ngram_model_set_current_wid(ngram_model_t *set,
00646                                   int32 set_wid);
00647 
00657 SPHINXBASE_EXPORT
00658 int32 ngram_model_set_known_wid(ngram_model_t *set, int32 set_wid);
00659 
00667 SPHINXBASE_EXPORT
00668 void ngram_model_flush(ngram_model_t *lm);
00669 
00670 #ifdef __cplusplus
00671 }
00672 #endif
00673 
00674 
00675 #endif /* __NGRAM_MODEL_H__ */

Generated on Mon Jan 24 21:36:19 2011 for SphinxBase by  doxygen 1.4.7