00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060 #include <stdio.h>
00061 #include <string.h>
00062 #include <ctype.h>
00063 #include "lts.h"
00064 #include "ckd_alloc.h"
00065 #include "bio.h"
00066
00067
00068 static cst_lts_phone apply_model(cst_lts_letter * vals,
00069 cst_lts_addr start,
00070 const cst_lts_model * model);
00071
00072 void
00073 lex_print(lex_entry_t * ent)
00074 {
00075 int i;
00076
00077 for (i = 0; i < ent->phone_cnt; ++i) {
00078 printf("%s ", ent->phone[i]);
00079 }
00080 printf("\n");
00081 fflush(stdout);
00082 }
00083
00084 static char *
00085 cst_substr(const char *str, int start, int length)
00086 {
00087 char *nstr = NULL;
00088
00089 if (str) {
00090 nstr = ckd_malloc(length + 1);
00091 strncpy(nstr, str + start, length);
00092 nstr[length] = '\0';
00093 }
00094 return nstr;
00095 }
00096
00097 int
00098 lts_apply(const char *in_word, const char *feats,
00099 const cst_lts_rules * r, lex_entry_t * out_phones)
00100 {
00101 int pos, index, i, maxphones;
00102 cst_lts_letter *fval_buff;
00103 cst_lts_letter *full_buff;
00104 cst_lts_phone phone;
00105 char *left, *right, *p;
00106 char hash;
00107 char zeros[8];
00108 char *word;
00109
00110
00111 word = ckd_salloc((char *) in_word);
00112 if (!r->letter_table)
00113 for (i = 0; i < strlen(word); ++i)
00114 word[i] = tolower(word[i]);
00115
00116
00117 maxphones = strlen(word) + 10;
00118 out_phones->phone = ckd_malloc(maxphones * sizeof(char *));
00119 out_phones->ci_acmod_id = ckd_malloc(maxphones * sizeof(acmod_id_t));
00120 out_phones->phone_cnt = 0;
00121
00122
00123 fval_buff = ckd_calloc((r->context_window_size * 2) +
00124 r->context_extra_feats, sizeof(cst_lts_letter));
00125
00126 full_buff = ckd_calloc((r->context_window_size * 2) +
00127 strlen(word) + 1, sizeof(cst_lts_letter));
00128 if (r->letter_table) {
00129 for (i = 0; i < 8; i++)
00130 zeros[i] = 2;
00131 sprintf((char *)full_buff, "%.*s%c%s%c%.*s",
00132 r->context_window_size - 1, zeros,
00133 1, word, 1, r->context_window_size - 1, zeros);
00134 hash = 1;
00135 }
00136 else {
00137
00138 sprintf((char *)full_buff, "%.*s#%s#%.*s",
00139 r->context_window_size - 1, "00000000",
00140 word, r->context_window_size - 1, "00000000");
00141 hash = '#';
00142 }
00143
00144
00145 for (pos = r->context_window_size; full_buff[pos] != hash; ++pos) {
00146
00147 sprintf((char *)fval_buff, "%.*s%.*s%s",
00148 r->context_window_size,
00149 full_buff + pos - r->context_window_size,
00150 r->context_window_size, full_buff + pos + 1, feats);
00151 if ((!r->letter_table
00152 && ((full_buff[pos] < 'a') || (full_buff[pos] > 'z')))) {
00153 #ifdef EXCESSIVELY_CHATTY
00154 E_WARN("lts:skipping unknown char \"%c\"\n", full_buff[pos]);
00155 #endif
00156 continue;
00157 }
00158 if (r->letter_table)
00159 index = full_buff[pos] - 3;
00160 else
00161 index = (full_buff[pos] - 'a') % 26;
00162 phone = apply_model(fval_buff, r->letter_index[index], r->models);
00163
00164 if (0 == strcmp("epsilon", r->phone_table[phone]))
00165 continue;
00166
00167 if (out_phones->phone_cnt + 2 > maxphones) {
00168 maxphones += 10;
00169 out_phones->phone = ckd_realloc(out_phones->phone,
00170 maxphones * sizeof(char *));
00171 out_phones->ci_acmod_id = ckd_realloc(out_phones->ci_acmod_id,
00172 maxphones *
00173 sizeof(acmod_id_t));
00174 }
00175 if ((p = strchr(r->phone_table[phone], '-')) != NULL) {
00176 left = cst_substr(r->phone_table[phone], 0,
00177 strlen(r->phone_table[phone]) - strlen(p));
00178 right = cst_substr(r->phone_table[phone],
00179 (strlen(r->phone_table[phone]) -
00180 strlen(p)) + 1, (strlen(p) - 1));
00181 out_phones->phone[out_phones->phone_cnt++] = left;
00182 out_phones->phone[out_phones->phone_cnt++] = right;
00183 }
00184 else
00185 out_phones->phone[out_phones->phone_cnt++] =
00186 ckd_salloc((char *) r->phone_table[phone]);
00187 }
00188
00189
00190 ckd_free(full_buff);
00191 ckd_free(fval_buff);
00192 ckd_free(word);
00193 return S3_SUCCESS;
00194 }
00195
00196 static cst_lts_phone
00197 apply_model(cst_lts_letter * vals, cst_lts_addr start,
00198 const cst_lts_model * model)
00199 {
00200
00201
00202
00203
00204
00205 cst_lts_rule state;
00206 unsigned short nstate;
00207 static const int sizeof_cst_lts_rule = sizeof(cst_lts_rule);
00208
00209 memmove(&state, &model[start * sizeof_cst_lts_rule],
00210 sizeof_cst_lts_rule);
00211 for (; state.feat != CST_LTS_EOR;) {
00212 if (vals[state.feat] == state.val)
00213 nstate = state.qtrue;
00214 else
00215 nstate = state.qfalse;
00216
00217 #if defined(WORDS_BIGENDIAN)
00218 SWAP_INT16(&nstate);
00219 #endif
00220
00221 memmove(&state, &model[nstate * sizeof_cst_lts_rule],
00222 sizeof_cst_lts_rule);
00223 }
00224
00225 return (cst_lts_phone) state.val;
00226 }
00227
00228 #ifdef UNIT_TEST
00229
00230
00231 int
00232 main(int argc, char *argv[])
00233 {
00234 lex_entry_t out;
00235 int i;
00236
00237 lts_apply("HELLO", "", &cmu6_lts_rules, &out);
00238 lex_print(&out);
00239 ckd_free(out.phone);
00240 ckd_free(out.ci_acmod_id);
00241
00242 lts_apply("EXCELLENT", "", &cmu6_lts_rules, &out);
00243 lex_print(&out);
00244 ckd_free(out.phone);
00245 ckd_free(out.ci_acmod_id);
00246
00247 lts_apply("TWELVE", "", &cmu6_lts_rules, &out);
00248 lex_print(&out);
00249 ckd_free(out.phone);
00250 ckd_free(out.ci_acmod_id);
00251
00252 return 0;
00253 }
00254 #endif