00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <string.h>
00040 #include <time.h>
00041 #include <assert.h>
00042
00043 #ifdef _WIN32
00044 #pragma warning (disable: 4996 4018)
00045 #endif
00046
00047 #if defined(WIN32) && !defined(GNUWINCE) && !defined(_WIN32_WCE)
00048 #include <errno.h>
00049 #endif
00050
00051 #ifdef HAVE_CONFIG_H
00052 #include <config.h>
00053 #endif
00054
00055 #include "fe.h"
00056 #include "strfuncs.h"
00057 #include "cmd_ln.h"
00058 #include "err.h"
00059 #include "ckd_alloc.h"
00060 #include "byteorder.h"
00061
00062 #include "wave2feat.h"
00063 #include "cmd_ln_defn.h"
00064
00065 struct globals_s {
00066 cmd_ln_t *config;
00067 int32 nskip;
00068 int32 runlen;
00069 char const *wavfile;
00070 char const *cepfile;
00071 char const *ctlfile;
00072 char const *wavdir;
00073 char const *cepdir;
00074 char const *wavext;
00075 char const *cepext;
00076 int32 input_format;
00077 int32 is_batch;
00078 int32 is_single;
00079 int32 blocksize;
00080 int32 machine_endian;
00081 int32 input_endian;
00082 int32 output_endian;
00083 int32 nchans;
00084 int32 whichchan;
00085 int32 convert;
00086 int32 verbose;
00087 int32 logspec;
00088 };
00089 typedef struct globals_s globals_t;
00090
00091 globals_t *fe_parse_options(int argc, char **argv);
00092 int32 fe_convert_files(globals_t * P);
00093 int32 fe_build_filenames(globals_t * P, char *fileroot, char **infilename,
00094 char **outfilename);
00095 int32 fe_openfiles(globals_t * P, fe_t * FE, char *infile, FILE **fh_in,
00096 int32 * nsamps, int32 * nframes, int32 * nblocks,
00097 char *outfile, FILE **fh_out);
00098 int32 fe_readblock_spch(globals_t * P, FILE *fh, int32 nsamps,
00099 int16 * buf);
00100 int32 fe_writeblock_feat(globals_t * P, fe_t * FE, FILE *fh, int32 nframes,
00101 mfcc_t ** feat);
00102 int32 fe_closefiles(globals_t *P, FILE *fh_in, FILE *fh_out);
00103 int32 fe_convert_with_dct(globals_t * P, fe_t * FE, char *infile, char *outfile);
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126 int32
00127 main(int32 argc, char **argv)
00128 {
00129 globals_t *P;
00130
00131 P = fe_parse_options(argc, argv);
00132 if (fe_convert_files(P) != FE_SUCCESS) {
00133 E_FATAL("error converting files...exiting\n");
00134 }
00135 free(P);
00136 return (0);
00137 }
00138
00139
00140 int32
00141 fe_convert_files(globals_t * P)
00142 {
00143
00144 fe_t *FE;
00145 char *infile, *outfile, fileroot[MAXCHARS];
00146 FILE *ctlfile;
00147 int16 *spdata = NULL;
00148 int32 splen =
00149 0, total_samps, frames_proc, nframes, nblocks, last_frame;
00150 int32 last_blocksize = 0, curr_block, total_frames;
00151 FILE *fh_in, *fh_out;
00152 mfcc_t **cep = NULL, **last_frame_cep;
00153 int32 return_value;
00154 int32 warn_zero_energy = 0;
00155 int32 process_utt_return_value;
00156
00157 if ((FE = fe_init_auto_r(P->config)) == NULL) {
00158 E_ERROR("memory alloc failed...exiting\n");
00159 return (FE_MEM_ALLOC_ERROR);
00160 }
00161
00162 if (P->is_batch) {
00163 int32 nskip = P->nskip;
00164 int32 runlen = P->runlen;
00165
00166 if ((ctlfile = fopen(P->ctlfile, "r")) == NULL) {
00167 E_ERROR("Unable to open control file %s\n", P->ctlfile);
00168 fe_free(FE);
00169 return (FE_CONTROL_FILE_ERROR);
00170 }
00171 while (fscanf(ctlfile, "%s", fileroot) != EOF) {
00172 if (nskip > 0) {
00173 --nskip;
00174 continue;
00175 }
00176 if (runlen > 0) {
00177 --runlen;
00178 }
00179 else if (runlen == 0) {
00180 break;
00181 }
00182
00183 fe_build_filenames(P, fileroot, &infile, &outfile);
00184
00185 if (P->verbose)
00186 E_INFO("%s\n", infile);
00187
00188 if (P->convert) {
00189
00190 return_value = fe_convert_with_dct(P, FE, infile, outfile);
00191 ckd_free(infile);
00192 ckd_free(outfile);
00193 infile = outfile = NULL;
00194 if (return_value != FE_SUCCESS) {
00195 fe_free(FE);
00196 return return_value;
00197 }
00198 continue;
00199 }
00200 return_value =
00201 fe_openfiles(P, FE, infile, &fh_in,
00202 &total_samps, &nframes, &nblocks,
00203 outfile, &fh_out);
00204 ckd_free(infile);
00205 ckd_free(outfile);
00206 infile = outfile = NULL;
00207 if (return_value != FE_SUCCESS) {
00208 fe_free(FE);
00209 return (return_value);
00210 }
00211
00212 warn_zero_energy = 0;
00213
00214 if (nblocks * P->blocksize >= total_samps)
00215 last_blocksize =
00216 total_samps - (nblocks - 1) * P->blocksize;
00217
00218 if (!fe_start_utt(FE)) {
00219 curr_block = 1;
00220 total_frames = frames_proc = 0;
00221
00222
00223 while (curr_block < nblocks) {
00224 splen = P->blocksize;
00225 if ((spdata =
00226 (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00227 E_ERROR
00228 ("Unable to allocate memory block of %d shorts for input speech\n",
00229 splen);
00230 fe_free(FE);
00231 return (FE_MEM_ALLOC_ERROR);
00232 }
00233 if (fe_readblock_spch
00234 (P, fh_in, splen, spdata) != splen) {
00235 E_ERROR("error reading speech data\n");
00236 fe_free(FE);
00237 return (FE_INPUT_FILE_READ_ERROR);
00238 }
00239 process_utt_return_value =
00240 fe_process_utt(FE, spdata,
00241 splen, &cep, &frames_proc);
00242 if (process_utt_return_value != FE_SUCCESS) {
00243 if (FE_ZERO_ENERGY_ERROR ==
00244 process_utt_return_value) {
00245 warn_zero_energy = 1;
00246 }
00247 else {
00248 fe_free(FE);
00249 return (process_utt_return_value);
00250 }
00251 }
00252 if (frames_proc > 0)
00253 fe_writeblock_feat(P, FE,
00254 fh_out, frames_proc, cep);
00255 if (cep != NULL) {
00256 ckd_free_2d((void **) cep);
00257 cep = NULL;
00258 }
00259 curr_block++;
00260 total_frames += frames_proc;
00261 free(spdata);
00262 spdata = NULL;
00263 }
00264
00265 free(spdata);
00266 spdata = NULL;
00267 splen = last_blocksize;
00268
00269 if ((spdata =
00270 (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00271 E_ERROR
00272 ("Unable to allocate memory block of %d shorts for input speech\n",
00273 splen);
00274 fe_free(FE);
00275 return (FE_MEM_ALLOC_ERROR);
00276 }
00277
00278 if (fe_readblock_spch(P, fh_in, splen, spdata) != splen) {
00279 E_ERROR("error reading speech data\n");
00280 fe_free(FE);
00281 return (FE_INPUT_FILE_READ_ERROR);
00282 }
00283
00284 process_utt_return_value =
00285 fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00286 if (process_utt_return_value != FE_SUCCESS) {
00287 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00288 warn_zero_energy = 1;
00289 }
00290 else {
00291 fe_free(FE);
00292 return (process_utt_return_value);
00293 }
00294 }
00295 if (frames_proc > 0)
00296 fe_writeblock_feat(P, FE, fh_out, frames_proc, cep);
00297 if (cep != NULL) {
00298 ckd_free_2d((void **) cep);
00299 cep = NULL;
00300 }
00301 curr_block++;
00302 last_frame_cep =
00303 (mfcc_t **) ckd_calloc_2d(1,
00304 fe_get_output_size(FE),
00305 sizeof(float32));
00306 process_utt_return_value =
00307 fe_end_utt(FE, last_frame_cep[0], &last_frame);
00308 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00309 warn_zero_energy = 1;
00310 }
00311 else {
00312 assert(process_utt_return_value == FE_SUCCESS);
00313 }
00314 if (last_frame > 0) {
00315 fe_writeblock_feat(P, FE, fh_out,
00316 last_frame, last_frame_cep);
00317 frames_proc++;
00318 }
00319 total_frames += frames_proc;
00320
00321 fe_closefiles(P, fh_in, fh_out);
00322 free(spdata);
00323 spdata = NULL;
00324 if (last_frame_cep != NULL) {
00325 ckd_free_2d((void **)
00326 last_frame_cep);
00327 last_frame_cep = NULL;
00328 }
00329 if (warn_zero_energy) {
00330 E_WARN
00331 ("File %s has some frames with zero energy. Consider using dither\n",
00332 infile);
00333 }
00334 }
00335 else {
00336 E_ERROR("fe_start_utt() failed\n");
00337 return (FE_START_ERROR);
00338 }
00339 }
00340 }
00341 else if (P->is_single) {
00342
00343 fe_build_filenames(P, fileroot, &infile, &outfile);
00344 if (P->verbose)
00345 printf("%s\n", infile);
00346
00347
00348 if (P->convert != WAV2FEAT) {
00349 int rv;
00350
00351 rv = fe_convert_with_dct(P, FE, infile, outfile);
00352 ckd_free(infile);
00353 ckd_free(outfile);
00354 infile = outfile = NULL;
00355 fe_free(FE);
00356 return rv;
00357 }
00358
00359 return_value =
00360 fe_openfiles(P, FE, infile, &fh_in, &total_samps,
00361 &nframes, &nblocks, outfile, &fh_out);
00362 ckd_free(infile);
00363 ckd_free(outfile);
00364 infile = outfile = NULL;
00365 if (return_value != FE_SUCCESS) {
00366 fe_free(FE);
00367 return (return_value);
00368 }
00369
00370 warn_zero_energy = 0;
00371
00372 if (nblocks * P->blocksize >= total_samps)
00373 last_blocksize = total_samps - (nblocks - 1) * P->blocksize;
00374
00375 if (!fe_start_utt(FE)) {
00376 curr_block = 1;
00377 total_frames = frames_proc = 0;
00378
00379
00380 while (curr_block < nblocks) {
00381 splen = P->blocksize;
00382 if ((spdata =
00383 (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00384 E_ERROR
00385 ("Unable to allocate memory block of %d shorts for input speech\n",
00386 splen);
00387 fe_free(FE);
00388 return (FE_MEM_ALLOC_ERROR);
00389 }
00390 if (fe_readblock_spch(P, fh_in, splen, spdata) != splen) {
00391 E_ERROR("Error reading speech data\n");
00392 fe_free(FE);
00393 return (FE_INPUT_FILE_READ_ERROR);
00394 }
00395 process_utt_return_value =
00396 fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00397 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00398 warn_zero_energy = 1;
00399 }
00400 else {
00401 assert(process_utt_return_value == FE_SUCCESS);
00402 }
00403 if (frames_proc > 0)
00404 fe_writeblock_feat(P, FE, fh_out, frames_proc, cep);
00405 if (cep != NULL) {
00406 ckd_free_2d((void **) cep);
00407 cep = NULL;
00408 }
00409 curr_block++;
00410 total_frames += frames_proc;
00411 if (spdata != NULL) {
00412 free(spdata);
00413 spdata = NULL;
00414 }
00415 }
00416
00417 if (spdata != NULL) {
00418 free(spdata);
00419 spdata = NULL;
00420 }
00421 splen = last_blocksize;
00422 if ((spdata = (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00423 E_ERROR
00424 ("Unable to allocate memory block of %d shorts for input speech\n",
00425 splen);
00426 fe_free(FE);
00427 return (FE_MEM_ALLOC_ERROR);
00428 }
00429 if (fe_readblock_spch(P, fh_in, splen, spdata) != splen) {
00430 E_ERROR("Error reading speech data\n");
00431 fe_free(FE);
00432 return (FE_INPUT_FILE_READ_ERROR);
00433 }
00434 process_utt_return_value =
00435 fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00436 free(spdata);
00437 spdata = NULL;
00438 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00439 warn_zero_energy = 1;
00440 }
00441 else {
00442 assert(process_utt_return_value == FE_SUCCESS);
00443 }
00444 if (frames_proc > 0)
00445 fe_writeblock_feat(P, FE, fh_out, frames_proc, cep);
00446 if (cep != NULL) {
00447 ckd_free_2d((void **) cep);
00448 cep = NULL;
00449 }
00450
00451 curr_block++;
00452 last_frame_cep =
00453 (mfcc_t **) ckd_calloc_2d(1,
00454 fe_get_output_size(FE),
00455 sizeof(float32));
00456 process_utt_return_value =
00457 fe_end_utt(FE, last_frame_cep[0], &last_frame);
00458 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00459 warn_zero_energy = 1;
00460 }
00461 else {
00462 assert(process_utt_return_value == FE_SUCCESS);
00463 }
00464 if (last_frame > 0) {
00465 fe_writeblock_feat(P, FE, fh_out,
00466 last_frame, last_frame_cep);
00467 frames_proc++;
00468 }
00469 total_frames += frames_proc;
00470
00471 fe_closefiles(P, fh_in, fh_out);
00472 if (last_frame_cep != NULL) {
00473 ckd_free_2d((void **) last_frame_cep);
00474 last_frame_cep = NULL;
00475 }
00476 }
00477 else {
00478 E_ERROR("fe_start_utt() failed\n");
00479 fe_free(FE);
00480 return (FE_START_ERROR);
00481 }
00482
00483 if (warn_zero_energy) {
00484 E_WARN
00485 ("File %s has some frames with zero energy. Consider using dither\n",
00486 infile);
00487 }
00488 }
00489 else {
00490 E_ERROR("Unknown mode - single or batch?\n");
00491 fe_free(FE);
00492 return (FE_UNKNOWN_SINGLE_OR_BATCH);
00493 }
00494
00495 fe_free(FE);
00496 return (FE_SUCCESS);
00497 }
00498
00499 void
00500 fe_validate_parameters(globals_t * P)
00501 {
00502
00503 if ((P->is_batch) && (P->is_single)) {
00504 E_FATAL("You cannot define an input file and a control file\n");
00505 }
00506
00507 if (P->wavfile == NULL && P->wavdir == NULL) {
00508 E_FATAL("No input file or file directory given\n");
00509 }
00510
00511 if (P->cepfile == NULL && P->cepdir == NULL) {
00512 E_FATAL("No cepstra file or file directory given\n");
00513 }
00514
00515 if (P->ctlfile == NULL && P->cepfile == NULL && P->wavfile == NULL) {
00516 E_FATAL("No control file given\n");
00517 }
00518
00519 if (P->nchans > 1) {
00520 E_INFO("Files have %d channels of data\n", P->nchans);
00521 E_INFO("Will extract features for channel %d\n", P->whichchan);
00522 }
00523
00524 if (P->whichchan > P->nchans) {
00525 E_FATAL("You cannot select channel %d out of %d\n",
00526 P->whichchan, P->nchans);
00527 }
00528
00529 if ((cmd_ln_float32_r(P->config, "-upperf") * 2)
00530 > cmd_ln_float32_r(P->config, "-samprate")) {
00531 E_WARN("Upper frequency higher than Nyquist frequency\n");
00532 }
00533
00534 if (cmd_ln_boolean_r(P->config, "-doublebw")) {
00535 E_INFO("Will use double bandwidth filters\n");
00536 }
00537
00538 }
00539
00540
00541 globals_t *
00542 fe_parse_options(int32 argc, char **argv)
00543 {
00544 globals_t *P;
00545 int32 format;
00546 char const *endian;
00547
00548 P = ckd_calloc(1, sizeof(*P));
00549 P->config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE);
00550
00551
00552 if (cmd_ln_str_r(P->config, "-argfile")) {
00553 P->config = cmd_ln_parse_file_r(P->config, defn,
00554 cmd_ln_str_r(P->config, "-argfile"),
00555 FALSE);
00556 }
00557
00558 P->nskip = P->runlen = -1;
00559 P->wavfile = cmd_ln_str_r(P->config, "-i");
00560 if (P->wavfile != NULL) {
00561 P->is_single = 1;
00562 }
00563 P->cepfile = cmd_ln_str_r(P->config, "-o");
00564 P->ctlfile = cmd_ln_str_r(P->config, "-c");
00565 if (P->ctlfile != NULL) {
00566 char const *nskip;
00567 char const *runlen;
00568
00569 P->is_batch = 1;
00570
00571 nskip = cmd_ln_str_r(P->config, "-nskip");
00572 runlen = cmd_ln_str_r(P->config, "-runlen");
00573 if (nskip != NULL) {
00574 P->nskip = atoi(nskip);
00575 }
00576 if (runlen != NULL) {
00577 P->runlen = atoi(runlen);
00578 }
00579 }
00580 P->wavdir = cmd_ln_str_r(P->config, "-di");
00581 P->cepdir = cmd_ln_str_r(P->config, "-do");
00582 P->wavext = cmd_ln_str_r(P->config, "-ei");
00583 P->cepext = cmd_ln_str_r(P->config, "-eo");
00584 format = cmd_ln_int32_r(P->config, "-raw");
00585 if (format) {
00586 P->input_format = RAW;
00587 }
00588 format = cmd_ln_int32_r(P->config, "-nist");
00589 if (format) {
00590 P->input_format = NIST;
00591 }
00592 format = cmd_ln_int32_r(P->config, "-mswav");
00593 if (format) {
00594 P->input_format = MSWAV;
00595 }
00596
00597 P->nchans = cmd_ln_int32_r(P->config, "-nchans");
00598 P->whichchan = cmd_ln_int32_r(P->config, "-whichchan");
00599 P->output_endian = BIG;
00600 P->blocksize = cmd_ln_int32_r(P->config, "-blocksize");
00601 endian = cmd_ln_str_r(P->config, "-mach_endian");
00602 if (!strcmp("big", endian)) {
00603 P->machine_endian = BIG;
00604 }
00605 else {
00606 if (!strcmp("little", endian)) {
00607 P->machine_endian = LITTLE;
00608 }
00609 else {
00610 E_FATAL("Machine must be big or little Endian\n");
00611 }
00612 }
00613 endian = cmd_ln_str_r(P->config, "-input_endian");
00614 if (!strcmp("big", endian)) {
00615 P->input_endian = BIG;
00616 }
00617 else {
00618 if (!strcmp("little", endian)) {
00619 P->input_endian = LITTLE;
00620 }
00621 else {
00622 E_FATAL("Input must be big or little Endian\n");
00623 }
00624 }
00625
00626 if (cmd_ln_boolean_r(P->config, "-logspec")
00627 || cmd_ln_boolean_r(P->config, "-smoothspec"))
00628 P->logspec = TRUE;
00629 if (cmd_ln_boolean_r(P->config, "-spec2cep"))
00630 P->convert = SPEC2CEP;
00631 if (cmd_ln_boolean_r(P->config, "-cep2spec"))
00632 P->convert = CEP2SPEC;
00633
00634 fe_validate_parameters(P);
00635
00636 return (P);
00637
00638 }
00639
00640 int32
00641 fe_build_filenames(globals_t * P, char *fileroot, char **infilename,
00642 char **outfilename)
00643 {
00644 char chanlabel[32];
00645
00646 if (P->nchans > 1)
00647 sprintf(chanlabel, ".ch%d", P->whichchan);
00648
00649 if (P->is_batch) {
00650 if (infilename != NULL) {
00651 *infilename = string_join(P->wavdir, "/",
00652 fileroot, ".",
00653 P->wavext, NULL);
00654 }
00655
00656 if (outfilename != NULL) {
00657 if (P->nchans > 1)
00658 *outfilename = string_join(P->cepdir, "/",
00659 fileroot, chanlabel,
00660 ".", P->cepext, NULL);
00661 else
00662 *outfilename = string_join(P->cepdir, "/",
00663 fileroot, ".",
00664 P->cepext, NULL);
00665 }
00666 }
00667 else if (P->is_single) {
00668 if (infilename != NULL) {
00669 *infilename = ckd_salloc(P->wavfile);
00670 }
00671 if (outfilename != NULL) {
00672 *outfilename = ckd_salloc(P->cepfile);
00673 }
00674 }
00675 else {
00676 E_FATAL("Unspecified Batch or Single Mode\n");
00677 }
00678
00679 return 0;
00680 }
00681
00682 int32
00683 fe_openfiles(globals_t * P, fe_t * FE, char *infile, FILE **fh_in,
00684 int32 * nsamps, int32 * nframes, int32 * nblocks,
00685 char *outfile, FILE **fh_out)
00686 {
00687 int len = 0, outlen, numframes, numblocks;
00688 FILE *fp2, *fh;
00689 char line[MAXCHARS];
00690 int got_it = 0;
00691
00692
00693
00694
00695 if (P->input_format == NIST) {
00696 if ((fp2 = fopen(infile, "rb")) == NULL) {
00697 E_ERROR_SYSTEM("Cannot read %s", infile);
00698 return (FE_INPUT_FILE_READ_ERROR);
00699 }
00700 *line = 0;
00701 got_it = 0;
00702 while (strcmp(line, "end_head") && !got_it) {
00703 fscanf(fp2, "%s", line);
00704 if (!strcmp(line, "sample_byte_format")) {
00705 fscanf(fp2, "%s", line);
00706 if (!strcmp(line, "-s2")) {
00707 fscanf(fp2, "%s", line);
00708 if (!strcmp(line, "01")) {
00709 P->input_endian = LITTLE;
00710 got_it = 1;
00711 }
00712 else if (!strcmp(line, "10")) {
00713 P->input_endian = BIG;
00714 got_it = 1;
00715 }
00716 else
00717 E_ERROR("Unknown/unsupported byte order\n");
00718 }
00719 else
00720 E_ERROR("Error determining byte format\n");
00721 }
00722 }
00723 if (!got_it) {
00724 E_WARN
00725 ("Can't find byte format in header, setting to machine's endian\n");
00726 P->input_endian = P->machine_endian;
00727 }
00728 fclose(fp2);
00729 }
00730 else if (P->input_format == RAW) {
00731
00732
00733
00734 }
00735 else if (P->input_format == MSWAV) {
00736 P->input_endian = LITTLE;
00737 }
00738
00739 if ((fh = fopen(infile, "rb")) == NULL) {
00740 fprintf(stderr, "Cannot open %s\n", infile);
00741 return (FE_INPUT_FILE_OPEN_ERROR);
00742 }
00743 else {
00744 long fsize;
00745
00746 fseek(fh, 0, SEEK_END);
00747 fsize = ftell(fh);
00748 fseek(fh, 0, SEEK_SET);
00749
00750 if (P->input_format == NIST) {
00751 short *hdr_buf;
00752
00753 len = (fsize - HEADER_BYTES) / sizeof(short);
00754
00755 hdr_buf =
00756 (short *) calloc(HEADER_BYTES / sizeof(short),
00757 sizeof(short));
00758 if (fread(hdr_buf, 1, HEADER_BYTES, fh) != HEADER_BYTES) {
00759 E_ERROR("Cannot read %s\n", infile);
00760 return (FE_INPUT_FILE_READ_ERROR);
00761 }
00762 free(hdr_buf);
00763 }
00764 else if (P->input_format == RAW) {
00765 len = fsize / sizeof(int16);
00766 }
00767 else if (P->input_format == MSWAV) {
00768
00769 MSWAV_hdr *hdr_buf = NULL;
00770
00771 const int hdr_len_to_read = ((char *) (&hdr_buf->datatag))
00772 - (char *) hdr_buf;
00773 int data_start;
00774
00775 if ((hdr_buf =
00776 (MSWAV_hdr *) calloc(1, sizeof(MSWAV_hdr))) == NULL) {
00777 E_ERROR("Cannot allocate for input file header\n");
00778 return (FE_INPUT_FILE_READ_ERROR);
00779 }
00780 if (fread(hdr_buf, 1, hdr_len_to_read, fh) != hdr_len_to_read) {
00781 E_ERROR("Cannot allocate for input file header\n");
00782 return (FE_INPUT_FILE_READ_ERROR);
00783 }
00784
00785 if (strncmp(hdr_buf->rifftag, "RIFF", 4) != 0 ||
00786 strncmp(hdr_buf->wavefmttag, "WAVEfmt", 7) != 0) {
00787 E_ERROR("Error in mswav file header\n");
00788 return (FE_INPUT_FILE_READ_ERROR);
00789 }
00790 {
00791
00792
00793
00794
00795
00796 int16 found = 0;
00797 char readChar;
00798 char *dataString = "data";
00799 int16 charPointer = 0;
00800 while (!found) {
00801 if (fread(&readChar, 1, 1, fh) != 1) {
00802 E_ERROR("Failed reading wav file.\n");
00803 return (FE_INPUT_FILE_READ_ERROR);
00804 }
00805 if (readChar == dataString[charPointer]) {
00806 charPointer++;
00807 }
00808 if (charPointer == (int) strlen(dataString)) {
00809 found = 1;
00810 strcpy(hdr_buf->datatag, dataString);
00811 if (fread(&(hdr_buf->datalength), sizeof(int32), 1, fh) != 1) {
00812 E_ERROR("Failed reading wav file.\n");
00813 return (FE_INPUT_FILE_READ_ERROR);
00814 }
00815 }
00816 }
00817 }
00818 data_start = ftell(fh);
00819 if (P->input_endian != P->machine_endian) {
00820 hdr_buf->datalength = SWAP_INT32(&(hdr_buf->datalength));
00821 hdr_buf->data_format = SWAP_INT16(&(hdr_buf->data_format));
00822 hdr_buf->numchannels = SWAP_INT16(&(hdr_buf->numchannels));
00823 hdr_buf->BitsPerSample =
00824 SWAP_INT16(&(hdr_buf->BitsPerSample));
00825 hdr_buf->SamplingFreq =
00826 SWAP_INT32(&(hdr_buf->SamplingFreq));
00827 hdr_buf->BytesPerSec = SWAP_INT32(&(hdr_buf->BytesPerSec));
00828 }
00829
00830 if (hdr_buf->data_format != 1 || hdr_buf->BitsPerSample != 16) {
00831 E_ERROR("MS WAV file not in 16-bit PCM format\n");
00832 return (FE_INPUT_FILE_READ_ERROR);
00833 }
00834
00835 len = hdr_buf->datalength / sizeof(short);
00836 if (len > (fsize - data_start) / sizeof(short))
00837 len = (fsize - data_start) / sizeof(short);
00838
00839 P->nchans = hdr_buf->numchannels;
00840
00841 if (P->verbose) {
00842 E_INFO("Reading MS Wav file %s:\n", infile);
00843 E_INFO
00844 ("\t16 bit PCM data, %d channels %d samples\n",
00845 P->nchans, len);
00846 E_INFO("\tSampled at %d\n", hdr_buf->SamplingFreq);
00847 }
00848 free(hdr_buf);
00849 }
00850 else {
00851 E_ERROR("Unknown input file format\n");
00852 return (FE_INPUT_FILE_OPEN_ERROR);
00853 }
00854 }
00855
00856
00857 len = len / P->nchans;
00858 *nsamps = len;
00859 *fh_in = fh;
00860
00861 numblocks = (int) ((float) len / (float) P->blocksize);
00862 if (numblocks * P->blocksize < len)
00863 numblocks++;
00864
00865 *nblocks = numblocks;
00866
00867 if ((fh = fopen(outfile, "wb")) < 0) {
00868 E_ERROR("Unable to open %s for writing features\n", outfile);
00869 return (FE_OUTPUT_FILE_OPEN_ERROR);
00870 }
00871 else {
00872 size_t nsamps = len;
00873 int frame_shift, frame_size;
00874
00875
00876 fe_process_frames(FE, NULL, &nsamps, NULL, &numframes);
00877
00878
00879 fe_get_input_size(FE, &frame_shift, &frame_size);
00880
00881 if (frame_size + (numframes - 1) * frame_shift <= len)
00882 ++numframes;
00883
00884
00885
00886 outlen = numframes * fe_get_output_size(FE);
00887 if (P->output_endian != P->machine_endian)
00888 SWAP_INT32(&outlen);
00889 if (fwrite(&outlen, 4, 1, fh) != 1) {
00890 E_ERROR("Data write error on %s\n", outfile);
00891 fclose(fh);
00892 return (FE_OUTPUT_FILE_WRITE_ERROR);
00893 }
00894 if (P->output_endian != P->machine_endian)
00895 SWAP_INT32(&outlen);
00896 }
00897
00898 *nframes = numframes;
00899 *fh_out = fh;
00900
00901 return 0;
00902 }
00903
00904 int32
00905 fe_readblock_spch(globals_t * P, FILE *fh, int32 nsamps, int16 * buf)
00906 {
00907 int32 nsamps_read, cum_samps_read, actsamps, offset, i,
00908 j, k;
00909 int16 *tmpbuf;
00910 int32 nchans, whichchan;
00911
00912 nchans = P->nchans;
00913 whichchan = P->whichchan;
00914
00915 if (nchans == 1) {
00916 if (P->input_format == RAW
00917 || P->input_format == NIST
00918 || P->input_format == MSWAV) {
00919 if ((nsamps_read = fread(buf, 2, nsamps, fh)) != nsamps) {
00920 E_ERROR_SYSTEM("error reading block: %ld != %d",
00921 nsamps_read, nsamps);
00922 return (0);
00923 }
00924 }
00925 else {
00926 E_ERROR("unknown input file format\n");
00927 return (0);
00928 }
00929 cum_samps_read = nsamps_read;
00930 }
00931 else if (nchans > 1) {
00932 if (nsamps < P->blocksize) {
00933 actsamps = nsamps * nchans;
00934 tmpbuf = (int16 *) calloc(nsamps * nchans, sizeof(int16));
00935 cum_samps_read = 0;
00936 if (P->input_format == RAW
00937 || P->input_format == MSWAV || P->input_format == NIST) {
00938
00939 k = 0;
00940 if ((nsamps_read =
00941 fread(tmpbuf, 2, actsamps, fh)) != actsamps) {
00942 E_ERROR
00943 ("error reading block (got %d not %d)\n",
00944 nsamps_read, actsamps);
00945 return (0);
00946 }
00947
00948 for (j = whichchan - 1; j < actsamps; j = j + nchans) {
00949 buf[k] = tmpbuf[j];
00950 k++;
00951 }
00952 cum_samps_read += nsamps_read / nchans;
00953 }
00954 else {
00955 E_ERROR("unknown input file format\n");
00956 return (0);
00957 }
00958 free(tmpbuf);
00959 }
00960 else {
00961 tmpbuf = (int16 *) calloc(nsamps, sizeof(int16));
00962 actsamps = nsamps / nchans;
00963 cum_samps_read = 0;
00964
00965 if (actsamps * nchans != nsamps) {
00966 E_WARN
00967 ("Blocksize %d is not an integer multiple of Number of channels %d\n",
00968 nsamps, nchans);
00969 }
00970
00971 if (P->input_format == RAW
00972 || P->input_format == MSWAV || P->input_format == NIST) {
00973 for (i = 0; i < nchans; i++) {
00974
00975 offset = i * actsamps;
00976 k = 0;
00977
00978 if ((nsamps_read =
00979 fread(tmpbuf, 2, actsamps, fh)) != actsamps) {
00980 E_ERROR
00981 ("error reading block (got %d not %d)\n",
00982 nsamps_read, actsamps);
00983 return (0);
00984 }
00985
00986 for (j = whichchan - 1; j < nsamps; j = j + nchans) {
00987 buf[offset + k] = tmpbuf[j];
00988 k++;
00989 }
00990 cum_samps_read += nsamps_read / nchans;
00991 }
00992 }
00993 else {
00994 E_ERROR("unknown input file format\n");
00995 return (0);
00996 }
00997 free(tmpbuf);
00998 }
00999 }
01000
01001 else {
01002 E_ERROR("unknown number of channels!\n");
01003 return (0);
01004 }
01005
01006 if (P->input_endian != P->machine_endian) {
01007 for (i = 0; i < nsamps; i++)
01008 SWAP_INT16(&buf[i]);
01009 }
01010
01011 return cum_samps_read;
01012
01013 }
01014
01015 int32
01016 fe_writeblock_feat(globals_t * P, fe_t * FE, FILE *fh, int32 nframes,
01017 mfcc_t ** feat)
01018 {
01019
01020 int32 i, length;
01021 float32 **ffeat;
01022
01023 length = nframes * fe_get_output_size(FE);
01024
01025 ffeat = (float32 **) feat;
01026 fe_mfcc_to_float(FE, feat, ffeat, nframes);
01027 if (P->output_endian != P->machine_endian) {
01028 for (i = 0; i < length; ++i)
01029 SWAP_FLOAT32(ffeat[0] + i);
01030 }
01031
01032 if (fwrite(ffeat[0], 4, length, fh) != length) {
01033 fclose(fh);
01034 E_FATAL("Error writing block of features\n");
01035 }
01036
01037 return (length);
01038 }
01039
01040
01041 int32
01042 fe_closefiles(globals_t *P, FILE *fh_in, FILE *fh_out)
01043 {
01044 int32 nfloats;
01045
01046 fclose(fh_in);
01047
01048 nfloats = ftell(fh_out) / 4 - 1;
01049 if (P->output_endian != P->machine_endian)
01050 SWAP_INT32(&nfloats);
01051 fseek(fh_out, 0, SEEK_SET);
01052 fwrite(&nfloats, 4, 1, fh_out);
01053 fclose(fh_out);
01054
01055 return 0;
01056 }
01057
01058 int32
01059 fe_convert_with_dct(globals_t * P, fe_t * FE, char *infile, char *outfile)
01060 {
01061 FILE *ifh, *ofh;
01062 int32 ifsize, nfloats, swap = 0;
01063 int32 input_ncoeffs, output_ncoeffs;
01064 float32 *logspec;
01065
01066 if ((ifh = fopen(infile, "rb")) == NULL) {
01067 E_ERROR_SYSTEM("Cannot read %s", infile);
01068 return (FE_INPUT_FILE_READ_ERROR);
01069 }
01070 if ((ofh = fopen(outfile, "wb")) == NULL) {
01071 E_ERROR_SYSTEM("Unable to open %s for writing features", outfile);
01072 return (FE_OUTPUT_FILE_OPEN_ERROR);
01073 }
01074
01075 fseek(ifh, 0, SEEK_END);
01076 ifsize = ftell(ifh);
01077 fseek(ifh, 0, SEEK_SET);
01078 fread(&nfloats, 4, 1, ifh);
01079 if (nfloats != ifsize / 4 - 1) {
01080 E_INFO("Will byteswap %s (%x != %x)\n",
01081 infile, nfloats, ifsize / 4 - 1);
01082 SWAP_INT32(&nfloats);
01083 swap = 1;
01084 }
01085 if (nfloats != ifsize / 4 - 1) {
01086 E_ERROR("Size of file doesn't match header: %d != %d\n",
01087 nfloats, ifsize / 4 - 1);
01088 return (FE_INPUT_FILE_READ_ERROR);
01089 }
01090 if (P->convert == CEP2SPEC) {
01091 input_ncoeffs = cmd_ln_int32_r(P->config, "-ncep");
01092 output_ncoeffs = cmd_ln_int32_r(P->config, "-nfilt");
01093 }
01094 else {
01095 input_ncoeffs = cmd_ln_int32_r(P->config, "-nfilt");
01096 output_ncoeffs = cmd_ln_int32_r(P->config, "-ncep");
01097 }
01098 nfloats = nfloats * output_ncoeffs / input_ncoeffs;
01099
01100 if (swap)
01101 SWAP_INT32(&nfloats);
01102 fwrite(&nfloats, 4, 1, ofh);
01103
01104 logspec = ckd_calloc(cmd_ln_int32_r(P->config, "-nfilt"),
01105 sizeof(*logspec));
01106
01107 while (fread(logspec, 4, input_ncoeffs, ifh) == input_ncoeffs) {
01108 int32 i;
01109 if (swap) {
01110 for (i = 0; i < input_ncoeffs; ++i) {
01111 SWAP_FLOAT32(logspec + i);
01112 }
01113 }
01114 fe_float_to_mfcc(FE, &logspec, (mfcc_t **)&logspec, 1);
01115 if (P->convert == CEP2SPEC) {
01116 fe_mfcc_dct3(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01117 }
01118 else {
01119 if (0 == strcmp(cmd_ln_str_r(P->config, "-transform"), "legacy"))
01120 fe_logspec_to_mfcc(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01121 else
01122 fe_logspec_dct2(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01123 }
01124 fe_mfcc_to_float(FE, (mfcc_t **)&logspec, &logspec, 1);
01125 if (swap) {
01126 for (i = 0; i < output_ncoeffs; ++i) {
01127 SWAP_FLOAT32(logspec + i);
01128 }
01129 }
01130 if (fwrite(logspec, 4, output_ncoeffs, ofh) < output_ncoeffs) {
01131 E_ERROR_SYSTEM("Failed to write %d coeffs to %s",
01132 output_ncoeffs, outfile);
01133 ckd_free(logspec);
01134 return (FE_OUTPUT_FILE_WRITE_ERROR);
01135 }
01136 }
01137 if (!feof(ifh)) {
01138 E_ERROR("Short read in input file %s\n", infile);
01139 ckd_free(logspec);
01140 return (FE_INPUT_FILE_READ_ERROR);
01141 }
01142 fclose(ifh);
01143 fclose(ofh);
01144 ckd_free(logspec);
01145
01146 return FE_SUCCESS;
01147 }
01148
01149
01151 #if defined(_WIN32_WCE)
01152 #pragma comment(linker,"/entry:mainWCRTStartup")
01153
01154
01155 int wmain(int32 argc, wchar_t *wargv[]) {
01156 char** argv;
01157 size_t wlen;
01158 size_t len;
01159 int i;
01160
01161 argv = malloc(argc*sizeof(char*));
01162 for (i=0; i<argc; i++){
01163 wlen = lstrlenW(wargv[i]);
01164 len = wcstombs(NULL, wargv[i], wlen);
01165 argv[i] = malloc(len+1);
01166 wcstombs(argv[i], wargv[i], wlen);
01167 }
01168
01169
01170 return main(argc, argv);
01171 }
01172 #endif
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228