00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include <string.h>
00039 #include <assert.h>
00040
00041 #include "ckd_alloc.h"
00042 #include "strfuncs.h"
00043 #include "hash_table.h"
00044 #include "err.h"
00045
00046 #include "jsgf_internal.h"
00047 #include "jsgf_parser.h"
00048 #include "jsgf_scanner.h"
00049
00050 int yyparse (yyscan_t yyscanner, jsgf_t *jsgf);
00051
00059 jsgf_atom_t *
00060 jsgf_atom_new(char *name, float weight)
00061 {
00062 jsgf_atom_t *atom;
00063
00064 atom = ckd_calloc(1, sizeof(*atom));
00065 atom->name = ckd_salloc(name);
00066 atom->weight = weight;
00067 return atom;
00068 }
00069
00070 int
00071 jsgf_atom_free(jsgf_atom_t *atom)
00072 {
00073 if (atom == NULL)
00074 return 0;
00075 ckd_free(atom->name);
00076 ckd_free(atom);
00077 return 0;
00078 }
00079
00080 jsgf_t *
00081 jsgf_grammar_new(jsgf_t *parent)
00082 {
00083 jsgf_t *grammar;
00084
00085 grammar = ckd_calloc(1, sizeof(*grammar));
00086
00087
00088 if (parent) {
00089 grammar->rules = parent->rules;
00090 grammar->imports = parent->imports;
00091 grammar->searchpath = parent->searchpath;
00092 grammar->parent = parent;
00093 }
00094 else {
00095 char *jsgf_path;
00096
00097 grammar->rules = hash_table_new(64, 0);
00098 grammar->imports = hash_table_new(16, 0);
00099
00100
00101 #if !defined(_WIN32_WCE)
00102 if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
00103 char *word, *c;
00104
00105
00106
00107 word = jsgf_path = ckd_salloc(jsgf_path);
00108 while ((c = strchr(word, ':'))) {
00109 *c = '\0';
00110 grammar->searchpath = glist_add_ptr(grammar->searchpath, word);
00111 word = c + 1;
00112 }
00113 grammar->searchpath = glist_add_ptr(grammar->searchpath, word);
00114 grammar->searchpath = glist_reverse(grammar->searchpath);
00115 }
00116 else {
00117
00118 grammar->searchpath = glist_add_ptr(grammar->searchpath, ckd_salloc("."));
00119 }
00120 #endif
00121 }
00122
00123 return grammar;
00124 }
00125
00126 void
00127 jsgf_grammar_free(jsgf_t *jsgf)
00128 {
00129
00130 if (jsgf->parent == NULL) {
00131 hash_iter_t *itor;
00132 gnode_t *gn;
00133
00134 for (itor = hash_table_iter(jsgf->rules); itor;
00135 itor = hash_table_iter_next(itor)) {
00136 ckd_free((char *)itor->ent->key);
00137 jsgf_rule_free((jsgf_rule_t *)itor->ent->val);
00138 }
00139 hash_table_free(jsgf->rules);
00140 for (itor = hash_table_iter(jsgf->imports); itor;
00141 itor = hash_table_iter_next(itor)) {
00142 ckd_free((char *)itor->ent->key);
00143 jsgf_grammar_free((jsgf_t *)itor->ent->val);
00144 }
00145 hash_table_free(jsgf->imports);
00146 for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
00147 ckd_free(gnode_ptr(gn));
00148 glist_free(jsgf->searchpath);
00149 for (gn = jsgf->links; gn; gn = gnode_next(gn))
00150 ckd_free(gnode_ptr(gn));
00151 glist_free(jsgf->links);
00152 }
00153 ckd_free(jsgf->name);
00154 ckd_free(jsgf->version);
00155 ckd_free(jsgf->charset);
00156 ckd_free(jsgf->locale);
00157 ckd_free(jsgf);
00158 }
00159
00160 static void
00161 jsgf_rhs_free(jsgf_rhs_t *rhs)
00162 {
00163 gnode_t *gn;
00164
00165 if (rhs == NULL)
00166 return;
00167
00168 jsgf_rhs_free(rhs->alt);
00169 for (gn = rhs->atoms; gn; gn = gnode_next(gn))
00170 jsgf_atom_free(gnode_ptr(gn));
00171 glist_free(rhs->atoms);
00172 ckd_free(rhs);
00173 }
00174
00175 jsgf_atom_t *
00176 jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus)
00177 {
00178 jsgf_rule_t *rule;
00179 jsgf_atom_t *rule_atom;
00180 jsgf_rhs_t *rhs;
00181
00182
00183
00184 rhs = ckd_calloc(1, sizeof(*rhs));
00185 if (plus)
00186 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
00187 else
00188 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
00189 rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
00190 rule_atom = jsgf_atom_new(rule->name, 1.0);
00191 rhs = ckd_calloc(1, sizeof(*rhs));
00192 rhs->atoms = glist_add_ptr(NULL, rule_atom);
00193 rhs->atoms = glist_add_ptr(rhs->atoms, atom);
00194 rule->rhs->alt = rhs;
00195
00196 return jsgf_atom_new(rule->name, 1.0);
00197 }
00198
00199 jsgf_rule_t *
00200 jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp)
00201 {
00202 jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
00203 jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
00204 rhs->alt = exp;
00205 rhs->atoms = glist_add_ptr(NULL, atom);
00206 return jsgf_define_rule(jsgf, NULL, rhs, 0);
00207 }
00208
00209 void
00210 jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to)
00211 {
00212 jsgf_link_t *link;
00213
00214 link = ckd_calloc(1, sizeof(*link));
00215 link->from = from;
00216 link->to = to;
00217 link->atom = atom;
00218 grammar->links = glist_add_ptr(grammar->links, link);
00219 }
00220
00221 static char *
00222 extract_grammar_name(char *rule_name)
00223 {
00224 char* dot_pos;
00225 char* grammar_name = ckd_salloc(rule_name+1);
00226 if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
00227 ckd_free(grammar_name);
00228 return NULL;
00229 }
00230 *dot_pos='\0';
00231 return grammar_name;
00232 }
00233
00234 static char *
00235 jsgf_fullname(jsgf_t *jsgf, const char *name)
00236 {
00237 char *fullname;
00238
00239
00240 if (strchr(name + 1, '.'))
00241 return ckd_salloc(name);
00242
00243
00244 fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
00245 sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
00246 return fullname;
00247 }
00248
00249 static char *
00250 jsgf_fullname_from_rule(jsgf_rule_t *rule, const char *name)
00251 {
00252 char *fullname, *grammar_name;
00253
00254
00255 if (strchr(name + 1, '.'))
00256 return ckd_salloc(name);
00257
00258
00259 if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
00260 return ckd_salloc(name);
00261 fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
00262 sprintf(fullname, "<%s.%s", grammar_name, name + 1);
00263 ckd_free(grammar_name);
00264
00265 return fullname;
00266 }
00267
00268
00269
00270 static char *
00271 importname2rulename(char *importname)
00272 {
00273 char *rulename = ckd_salloc(importname);
00274 char *last_dotpos;
00275 char *secondlast_dotpos;
00276
00277 if ((last_dotpos = strrchr(rulename+1, '.')) != NULL) {
00278 *last_dotpos='\0';
00279 if ((secondlast_dotpos = strrchr(rulename+1, '.')) != NULL) {
00280 *last_dotpos='.';
00281 *secondlast_dotpos='<';
00282 secondlast_dotpos = ckd_salloc(secondlast_dotpos);
00283 ckd_free(rulename);
00284 return secondlast_dotpos;
00285 }
00286 else {
00287 *last_dotpos='.';
00288 return rulename;
00289 }
00290 }
00291 else {
00292 return rulename;
00293 }
00294 }
00295
00296 static int expand_rule(jsgf_t *grammar, jsgf_rule_t *rule);
00297 static int
00298 expand_rhs(jsgf_t *grammar, jsgf_rule_t *rule, jsgf_rhs_t *rhs)
00299 {
00300 gnode_t *gn;
00301 int lastnode;
00302
00303
00304 lastnode = rule->entry;
00305
00306
00307 for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
00308 jsgf_atom_t *atom = gnode_ptr(gn);
00309 if (jsgf_atom_is_rule(atom)) {
00310 jsgf_rule_t *subrule;
00311 char *fullname;
00312 gnode_t *subnode;
00313 void *val;
00314
00315
00316 if (0 == strcmp(atom->name, "<NULL>")) {
00317
00318 jsgf_add_link(grammar, atom,
00319 lastnode, grammar->nstate);
00320 lastnode = grammar->nstate;
00321 ++grammar->nstate;
00322 continue;
00323 }
00324 else if (0 == strcmp(atom->name, "<VOID>")) {
00325
00326 return -1;
00327 }
00328
00329 fullname = jsgf_fullname_from_rule(rule, atom->name);
00330 if (hash_table_lookup(grammar->rules, fullname, &val) == -1) {
00331 E_ERROR("Undefined rule in RHS: %s\n", fullname);
00332 ckd_free(fullname);
00333 return -1;
00334 }
00335 ckd_free(fullname);
00336 subrule = val;
00337
00338 for (subnode = grammar->rulestack; subnode; subnode = gnode_next(subnode))
00339 if (gnode_ptr(subnode) == (void *)subrule)
00340 break;
00341 if (subnode != NULL) {
00342
00343 if (gnode_next(gn) != NULL) {
00344 E_ERROR("Only right-recursion is permitted (in %s.%s)\n",
00345 grammar->name, rule->name);
00346 return -1;
00347 }
00348
00349 E_INFO("Right recursion %s %d => %d\n", atom->name, lastnode, subrule->entry);
00350 jsgf_add_link(grammar, atom, lastnode, subrule->entry);
00351 }
00352 else {
00353
00354 if (expand_rule(grammar, subrule) == -1)
00355 return -1;
00356
00357 jsgf_add_link(grammar, atom,
00358 lastnode, subrule->entry);
00359 lastnode = subrule->exit;
00360 }
00361 }
00362 else {
00363
00364 jsgf_add_link(grammar, atom,
00365 lastnode, grammar->nstate);
00366 lastnode = grammar->nstate;
00367 ++grammar->nstate;
00368 }
00369 }
00370
00371 return lastnode;
00372 }
00373
00374 static int
00375 expand_rule(jsgf_t *grammar, jsgf_rule_t *rule)
00376 {
00377 jsgf_rhs_t *rhs;
00378 float norm;
00379
00380
00381 grammar->rulestack = glist_add_ptr(grammar->rulestack, rule);
00382
00383
00384 norm = 0;
00385 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
00386 if (rhs->atoms) {
00387 jsgf_atom_t *atom = gnode_ptr(rhs->atoms);
00388 norm += atom->weight;
00389 }
00390 }
00391
00392 rule->entry = grammar->nstate++;
00393 rule->exit = grammar->nstate++;
00394 if (norm == 0) norm = 1;
00395 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
00396 int lastnode;
00397
00398 if (rhs->atoms) {
00399 jsgf_atom_t *atom = gnode_ptr(rhs->atoms);
00400 atom->weight /= norm;
00401 }
00402 lastnode = expand_rhs(grammar, rule, rhs);
00403 if (lastnode == -1) {
00404 return -1;
00405 }
00406 else {
00407 jsgf_add_link(grammar, NULL, lastnode, rule->exit);
00408 }
00409 }
00410
00411
00412 grammar->rulestack = gnode_free(grammar->rulestack, NULL);
00413 return rule->exit;
00414 }
00415
00416 jsgf_rule_iter_t *
00417 jsgf_rule_iter(jsgf_t *grammar)
00418 {
00419 return hash_table_iter(grammar->rules);
00420 }
00421
00422 jsgf_rule_t *
00423 jsgf_get_rule(jsgf_t *grammar, char const *name)
00424 {
00425 void *val;
00426
00427 if (hash_table_lookup(grammar->rules, name, &val) < 0)
00428 return NULL;
00429 return (jsgf_rule_t *)val;
00430 }
00431
00432 char const *
00433 jsgf_rule_name(jsgf_rule_t *rule)
00434 {
00435 return rule->name;
00436 }
00437
00438 int
00439 jsgf_rule_public(jsgf_rule_t *rule)
00440 {
00441 return rule->public;
00442 }
00443
00444 static fsg_model_t *
00445 jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule,
00446 logmath_t *lmath, float32 lw, int do_closure)
00447 {
00448 fsg_model_t *fsg;
00449 glist_t nulls;
00450 gnode_t *gn;
00451
00452
00453 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
00454 ckd_free(gnode_ptr(gn));
00455 }
00456 glist_free(grammar->links);
00457 grammar->links = NULL;
00458 rule->entry = rule->exit = 0;
00459 grammar->nstate = 0;
00460 expand_rule(grammar, rule);
00461
00462 fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
00463 fsg->start_state = rule->entry;
00464 fsg->final_state = rule->exit;
00465 grammar->links = glist_reverse(grammar->links);
00466 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
00467 jsgf_link_t *link = gnode_ptr(gn);
00468
00469 if (link->atom) {
00470 if (jsgf_atom_is_rule(link->atom)) {
00471 fsg_model_null_trans_add(fsg, link->from, link->to,
00472 logmath_log(lmath, link->atom->weight));
00473 }
00474 else {
00475 int wid = fsg_model_word_add(fsg, link->atom->name);
00476 fsg_model_trans_add(fsg, link->from, link->to,
00477 logmath_log(lmath, link->atom->weight), wid);
00478 }
00479 }
00480 else {
00481 fsg_model_null_trans_add(fsg, link->from, link->to, 0);
00482 }
00483 }
00484 if (do_closure) {
00485 nulls = fsg_model_null_trans_closure(fsg, NULL);
00486 glist_free(nulls);
00487 }
00488
00489 return fsg;
00490 }
00491
00492 fsg_model_t *
00493 jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule,
00494 logmath_t *lmath, float32 lw)
00495 {
00496 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
00497 }
00498
00499 fsg_model_t *
00500 jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule,
00501 logmath_t *lmath, float32 lw)
00502 {
00503 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
00504 }
00505
00506 jsgf_rule_t *
00507 jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int public)
00508 {
00509 jsgf_rule_t *rule;
00510 void *val;
00511
00512 if (name == NULL) {
00513 name = ckd_malloc(strlen(jsgf->name) + 16);
00514 sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules));
00515 }
00516 else {
00517 char *newname;
00518
00519 newname = jsgf_fullname(jsgf, name);
00520 name = newname;
00521 }
00522
00523 rule = ckd_calloc(1, sizeof(*rule));
00524 rule->refcnt = 1;
00525 rule->name = ckd_salloc(name);
00526 rule->rhs = rhs;
00527 rule->public = public;
00528
00529 E_INFO("Defined rule: %s%s\n",
00530 rule->public ? "PUBLIC " : "",
00531 rule->name);
00532 val = hash_table_enter(jsgf->rules, name, rule);
00533 if (val != (void *)rule) {
00534 E_WARN("Multiply defined symbol: %s\n", name);
00535 }
00536 return rule;
00537 }
00538
00539 jsgf_rule_t *
00540 jsgf_rule_retain(jsgf_rule_t *rule)
00541 {
00542 ++rule->refcnt;
00543 return rule;
00544 }
00545
00546 int
00547 jsgf_rule_free(jsgf_rule_t *rule)
00548 {
00549 if (rule == NULL)
00550 return 0;
00551 if (--rule->refcnt > 0)
00552 return rule->refcnt;
00553 jsgf_rhs_free(rule->rhs);
00554 ckd_free(rule->name);
00555 ckd_free(rule);
00556 return 0;
00557 }
00558
00559
00560
00561 static char *
00562 path_list_search(glist_t paths, char *path)
00563 {
00564 gnode_t *gn;
00565
00566 for (gn = paths; gn; gn = gnode_next(gn)) {
00567 char *fullpath;
00568 FILE *tmp;
00569
00570 fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
00571 tmp = fopen(fullpath, "r");
00572 if (tmp != NULL) {
00573 fclose(tmp);
00574 return fullpath;
00575 }
00576 else
00577 ckd_free(fullpath);
00578 }
00579 return NULL;
00580 }
00581
00582 jsgf_rule_t *
00583 jsgf_import_rule(jsgf_t *jsgf, char *name)
00584 {
00585 char *c, *path, *newpath;
00586 size_t namelen, packlen;
00587 void *val;
00588 jsgf_t *imp;
00589 int import_all;
00590
00591
00592 namelen = strlen(name);
00593 path = ckd_malloc(namelen - 2 + 6);
00594 strcpy(path, name + 1);
00595
00596 c = strrchr(path, '.');
00597 if (c == NULL) {
00598 E_ERROR("Imported rule is not qualified: %s\n", name);
00599 ckd_free(path);
00600 return NULL;
00601 }
00602 packlen = c - path;
00603 *c = '\0';
00604
00605
00606 import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>"));
00607
00608
00609 for (c = path; *c; ++c)
00610 if (*c == '.') *c = '/';
00611 strcat(path, ".gram");
00612 newpath = path_list_search(jsgf->searchpath, path);
00613 ckd_free(path);
00614 if (newpath == NULL)
00615 return NULL;
00616
00617 path = newpath;
00618 E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
00619
00620
00621
00622
00623 if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
00624 E_INFO("Already imported %s\n", path);
00625 imp = val;
00626 ckd_free(path);
00627 }
00628 else {
00629
00630 imp = jsgf_parse_file(path, jsgf);
00631 val = hash_table_enter(jsgf->imports, path, imp);
00632 if (val != (void *)imp) {
00633 E_WARN("Multiply imported file: %s\n", path);
00634 }
00635 }
00636 if (imp != NULL) {
00637 hash_iter_t *itor;
00638
00639 for (itor = hash_table_iter(imp->rules); itor;
00640 itor = hash_table_iter_next(itor)) {
00641 hash_entry_t *he = itor->ent;
00642 jsgf_rule_t *rule = hash_entry_val(he);
00643 int rule_matches;
00644 char *rule_name = importname2rulename(name);
00645
00646 if (import_all) {
00647
00648 rule_matches = !strncmp(rule_name, rule->name, packlen + 1);
00649 }
00650 else {
00651
00652 rule_matches = !strcmp(rule_name, rule->name);
00653 }
00654 ckd_free(rule_name);
00655 if (rule->public && rule_matches) {
00656 void *val;
00657 char *newname;
00658
00659
00660 c = strrchr(rule->name, '.');
00661 assert(c != NULL);
00662 newname = jsgf_fullname(jsgf, c);
00663
00664 E_INFO("Imported %s\n", newname);
00665 val = hash_table_enter(jsgf->rules, newname,
00666 jsgf_rule_retain(rule));
00667 if (val != (void *)rule) {
00668 E_WARN("Multiply defined symbol: %s\n", newname);
00669 }
00670 if (!import_all) {
00671 hash_table_iter_free(itor);
00672 return rule;
00673 }
00674 }
00675 }
00676 }
00677
00678 return NULL;
00679 }
00680
00681 jsgf_t *
00682 jsgf_parse_file(const char *filename, jsgf_t *parent)
00683 {
00684 yyscan_t yyscanner;
00685 jsgf_t *jsgf;
00686 int yyrv;
00687 FILE *in = NULL;
00688
00689 yylex_init(&yyscanner);
00690 if (filename == NULL) {
00691 yyset_in(stdin, yyscanner);
00692 }
00693 else {
00694 in = fopen(filename, "r");
00695 if (in == NULL) {
00696 fprintf(stderr, "Failed to open %s for parsing: %s\n",
00697 filename, strerror(errno));
00698 return NULL;
00699 }
00700 yyset_in(in, yyscanner);
00701 }
00702
00703 jsgf = jsgf_grammar_new(parent);
00704 yyrv = yyparse(yyscanner, jsgf);
00705 if (yyrv != 0) {
00706 fprintf(stderr, "JSGF parse of %s failed\n",
00707 filename ? filename : "(stdin)");
00708 jsgf_grammar_free(jsgf);
00709 yylex_destroy(yyscanner);
00710 return NULL;
00711 }
00712 if (in)
00713 fclose(in);
00714 yylex_destroy(yyscanner);
00715
00716 return jsgf;
00717 }