src/sphinx_fe/wave2feat.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1996-2004 Carnegie Mellon University.  All rights 
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <string.h>
00040 #include <time.h>
00041 #include <assert.h>
00042 
00043 #ifdef _WIN32
00044 #pragma warning (disable: 4996 4018)
00045 #endif
00046 
00047 #if defined(WIN32) && !defined(GNUWINCE) && !defined(_WIN32_WCE)
00048 #include <errno.h>
00049 #endif
00050 
00051 #ifdef HAVE_CONFIG_H
00052 #include <config.h>
00053 #endif
00054 
00055 #include "fe.h"
00056 #include "strfuncs.h"
00057 #include "cmd_ln.h"
00058 #include "err.h"
00059 #include "ckd_alloc.h"
00060 #include "byteorder.h"
00061 
00062 #include "wave2feat.h"
00063 #include "cmd_ln_defn.h"
00064 
00065 struct globals_s {
00066     cmd_ln_t *config;
00067     int32 nskip;
00068     int32 runlen;
00069     char const *wavfile;
00070     char const *cepfile;
00071     char const *ctlfile;
00072     char const *wavdir;
00073     char const *cepdir;
00074     char const *wavext;
00075     char const *cepext;
00076     int32 input_format;
00077     int32 is_batch;
00078     int32 is_single;
00079     int32 blocksize;
00080     int32 machine_endian;
00081     int32 input_endian;
00082     int32 output_endian;
00083     int32 nchans;
00084     int32 whichchan;
00085     int32 convert;
00086     int32 verbose;
00087     int32 logspec;
00088 };
00089 typedef struct globals_s globals_t;
00090 
00091 globals_t *fe_parse_options(int argc, char **argv);
00092 int32 fe_convert_files(globals_t * P);
00093 int32 fe_build_filenames(globals_t * P, char *fileroot, char **infilename,
00094                          char **outfilename);
00095 int32 fe_openfiles(globals_t * P, fe_t * FE, char *infile, FILE **fh_in,
00096                    int32 * nsamps, int32 * nframes, int32 * nblocks,
00097                    char *outfile, FILE **fh_out);
00098 int32 fe_readblock_spch(globals_t * P, FILE *fh, int32 nsamps,
00099                         int16 * buf);
00100 int32 fe_writeblock_feat(globals_t * P, fe_t * FE, FILE *fh, int32 nframes,
00101                          mfcc_t ** feat);
00102 int32 fe_closefiles(globals_t *P, FILE *fh_in, FILE *fh_out);
00103 int32 fe_convert_with_dct(globals_t * P, fe_t * FE, char *infile, char *outfile);
00104 
00105 /*       
00106          7-Feb-00 M. Seltzer - wrapper created for new front end -
00107          does blockstyle processing if necessary. If input stream is
00108          greater than DEFAULT_BLOCKSIZE samples (currently 200000)
00109          then it will read and write in DEFAULT_BLOCKSIZE chunks. 
00110          
00111          Had to change fe_process_utt(). Now the 2d feature array
00112          is allocated internally to that function rather than
00113          externally in the wrapper. 
00114          
00115          Added usage display with -help switch for help
00116 
00117          14-Feb-00 M. Seltzer - added NIST header parsing for 
00118          big endian/little endian parsing. kind of a hack.
00119 
00120          changed -wav switch to -nist to avoid future confusion with
00121          MS wav files
00122          
00123          added -mach_endian switch to specify machine's byte format
00124 */
00125 
00126 int32
00127 main(int32 argc, char **argv)
00128 {
00129     globals_t *P;
00130 
00131     P = fe_parse_options(argc, argv);
00132     if (fe_convert_files(P) != FE_SUCCESS) {
00133         E_FATAL("error converting files...exiting\n");
00134     }
00135     free(P);
00136     return (0);
00137 }
00138 
00139 
00140 int32
00141 fe_convert_files(globals_t * P)
00142 {
00143 
00144     fe_t *FE;
00145     char *infile, *outfile, fileroot[MAXCHARS];
00146     FILE *ctlfile;
00147     int16 *spdata = NULL;
00148     int32 splen =
00149         0, total_samps, frames_proc, nframes, nblocks, last_frame;
00150     int32 last_blocksize = 0, curr_block, total_frames;
00151     FILE *fh_in, *fh_out;
00152     mfcc_t **cep = NULL, **last_frame_cep;
00153     int32 return_value;
00154     int32 warn_zero_energy = 0;
00155     int32 process_utt_return_value;
00156 
00157     if ((FE = fe_init_auto_r(P->config)) == NULL) {
00158         E_ERROR("memory alloc failed...exiting\n");
00159         return (FE_MEM_ALLOC_ERROR);
00160     }
00161 
00162     if (P->is_batch) {
00163         int32 nskip = P->nskip;
00164         int32 runlen = P->runlen;
00165 
00166         if ((ctlfile = fopen(P->ctlfile, "r")) == NULL) {
00167             E_ERROR("Unable to open control file %s\n", P->ctlfile);
00168             fe_free(FE);
00169             return (FE_CONTROL_FILE_ERROR);
00170         }
00171         while (fscanf(ctlfile, "%s", fileroot) != EOF) {
00172             if (nskip > 0) {
00173                 --nskip;
00174                 continue;
00175             }
00176             if (runlen > 0) {
00177                 --runlen;
00178             }
00179             else if (runlen == 0) {
00180                 break;
00181             }
00182 
00183             fe_build_filenames(P, fileroot, &infile, &outfile);
00184 
00185             if (P->verbose)
00186                 E_INFO("%s\n", infile);
00187 
00188             if (P->convert) {
00189                 /* Special case for doing various DCTs */
00190                 return_value = fe_convert_with_dct(P, FE, infile, outfile);
00191                 ckd_free(infile);
00192                 ckd_free(outfile);
00193                 infile = outfile = NULL;
00194                 if (return_value != FE_SUCCESS) {
00195                     fe_free(FE);
00196                     return return_value;
00197                 }
00198                 continue;
00199             }
00200             return_value =
00201                 fe_openfiles(P, FE, infile, &fh_in,
00202                              &total_samps, &nframes, &nblocks,
00203                              outfile, &fh_out);
00204             ckd_free(infile);
00205             ckd_free(outfile);
00206             infile = outfile = NULL;
00207             if (return_value != FE_SUCCESS) {
00208                 fe_free(FE);
00209                 return (return_value);
00210             }
00211 
00212             warn_zero_energy = 0;
00213 
00214             if (nblocks * P->blocksize >= total_samps)
00215                 last_blocksize =
00216                     total_samps - (nblocks - 1) * P->blocksize;
00217 
00218             if (!fe_start_utt(FE)) {
00219                 curr_block = 1;
00220                 total_frames = frames_proc = 0;
00221                 /*execute this loop only if there is more than 1 block to
00222                    be processed */
00223                 while (curr_block < nblocks) {
00224                     splen = P->blocksize;
00225                     if ((spdata =
00226                          (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00227                         E_ERROR
00228                             ("Unable to allocate memory block of %d shorts for input speech\n",
00229                              splen);
00230                         fe_free(FE);
00231                         return (FE_MEM_ALLOC_ERROR);
00232                     }
00233                     if (fe_readblock_spch
00234                         (P, fh_in, splen, spdata) != splen) {
00235                         E_ERROR("error reading speech data\n");
00236                         fe_free(FE);
00237                         return (FE_INPUT_FILE_READ_ERROR);
00238                     }
00239                     process_utt_return_value =
00240                         fe_process_utt(FE, spdata,
00241                                        splen, &cep, &frames_proc);
00242                     if (process_utt_return_value != FE_SUCCESS) {
00243                         if (FE_ZERO_ENERGY_ERROR ==
00244                             process_utt_return_value) {
00245                             warn_zero_energy = 1;
00246                         }
00247                         else {
00248                             fe_free(FE);
00249                             return (process_utt_return_value);
00250                         }
00251                     }
00252                     if (frames_proc > 0)
00253                         fe_writeblock_feat(P, FE,
00254                                            fh_out, frames_proc, cep);
00255                     if (cep != NULL) {
00256                         ckd_free_2d((void **) cep);
00257                         cep = NULL;
00258                     }
00259                     curr_block++;
00260                     total_frames += frames_proc;
00261                     free(spdata);
00262                     spdata = NULL;
00263                 }
00264                 /* process last (or only) block */
00265                 free(spdata);
00266                 spdata = NULL;
00267                 splen = last_blocksize;
00268 
00269                 if ((spdata =
00270                      (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00271                     E_ERROR
00272                         ("Unable to allocate memory block of %d shorts for input speech\n",
00273                          splen);
00274                     fe_free(FE);
00275                     return (FE_MEM_ALLOC_ERROR);
00276                 }
00277 
00278                 if (fe_readblock_spch(P, fh_in, splen, spdata) != splen) {
00279                     E_ERROR("error reading speech data\n");
00280                     fe_free(FE);
00281                     return (FE_INPUT_FILE_READ_ERROR);
00282                 }
00283 
00284                 process_utt_return_value =
00285                     fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00286                 if (process_utt_return_value != FE_SUCCESS) {
00287                     if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00288                         warn_zero_energy = 1;
00289                     }
00290                     else {
00291                         fe_free(FE);
00292                         return (process_utt_return_value);
00293                     }
00294                 }
00295                 if (frames_proc > 0)
00296                     fe_writeblock_feat(P, FE, fh_out, frames_proc, cep);
00297                 if (cep != NULL) {
00298                     ckd_free_2d((void **) cep);
00299                     cep = NULL;
00300                 }
00301                 curr_block++;
00302                 last_frame_cep =
00303                     (mfcc_t **) ckd_calloc_2d(1,
00304                                               fe_get_output_size(FE),
00305                                               sizeof(float32));
00306                 process_utt_return_value =
00307                     fe_end_utt(FE, last_frame_cep[0], &last_frame);
00308                 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00309                     warn_zero_energy = 1;
00310                 }
00311                 else {
00312                     assert(process_utt_return_value == FE_SUCCESS);
00313                 }
00314                 if (last_frame > 0) {
00315                     fe_writeblock_feat(P, FE, fh_out,
00316                                        last_frame, last_frame_cep);
00317                     frames_proc++;
00318                 }
00319                 total_frames += frames_proc;
00320 
00321                 fe_closefiles(P, fh_in, fh_out);
00322                 free(spdata);
00323                 spdata = NULL;
00324                 if (last_frame_cep != NULL) {
00325                     ckd_free_2d((void **)
00326                                 last_frame_cep);
00327                     last_frame_cep = NULL;
00328                 }
00329                 if (warn_zero_energy) {
00330                     E_WARN
00331                         ("File %s has some frames with zero energy. Consider using dither\n",
00332                          infile);
00333                 }
00334             }
00335             else {
00336                 E_ERROR("fe_start_utt() failed\n");
00337                 return (FE_START_ERROR);
00338             }
00339         }
00340     }
00341     else if (P->is_single) {
00342 
00343         fe_build_filenames(P, fileroot, &infile, &outfile);
00344         if (P->verbose)
00345             printf("%s\n", infile);
00346 
00347         /* Special case for doing various DCTs. */
00348         if (P->convert != WAV2FEAT) {
00349             int rv;
00350 
00351             rv = fe_convert_with_dct(P, FE, infile, outfile);
00352             ckd_free(infile);
00353             ckd_free(outfile);
00354             infile = outfile = NULL;
00355             fe_free(FE);
00356             return rv;
00357         }
00358 
00359         return_value =
00360             fe_openfiles(P, FE, infile, &fh_in, &total_samps,
00361                          &nframes, &nblocks, outfile, &fh_out);
00362         ckd_free(infile);
00363         ckd_free(outfile);
00364         infile = outfile = NULL;
00365         if (return_value != FE_SUCCESS) {
00366             fe_free(FE);
00367             return (return_value);
00368         }
00369 
00370         warn_zero_energy = 0;
00371 
00372         if (nblocks * P->blocksize >= total_samps)
00373             last_blocksize = total_samps - (nblocks - 1) * P->blocksize;
00374 
00375         if (!fe_start_utt(FE)) {
00376             curr_block = 1;
00377             total_frames = frames_proc = 0;
00378             /*execute this loop only if there are more than 1 block to
00379                be processed */
00380             while (curr_block < nblocks) {
00381                 splen = P->blocksize;
00382                 if ((spdata =
00383                      (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00384                     E_ERROR
00385                         ("Unable to allocate memory block of %d shorts for input speech\n",
00386                          splen);
00387                     fe_free(FE);
00388                     return (FE_MEM_ALLOC_ERROR);
00389                 }
00390                 if (fe_readblock_spch(P, fh_in, splen, spdata) != splen) {
00391                     E_ERROR("Error reading speech data\n");
00392                     fe_free(FE);
00393                     return (FE_INPUT_FILE_READ_ERROR);
00394                 }
00395                 process_utt_return_value =
00396                     fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00397                 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00398                     warn_zero_energy = 1;
00399                 }
00400                 else {
00401                     assert(process_utt_return_value == FE_SUCCESS);
00402                 }
00403                 if (frames_proc > 0)
00404                     fe_writeblock_feat(P, FE, fh_out, frames_proc, cep);
00405                 if (cep != NULL) {
00406                     ckd_free_2d((void **) cep);
00407                     cep = NULL;
00408                 }
00409                 curr_block++;
00410                 total_frames += frames_proc;
00411                 if (spdata != NULL) {
00412                     free(spdata);
00413                     spdata = NULL;
00414                 }
00415             }
00416             /* process last (or only) block */
00417             if (spdata != NULL) {
00418                 free(spdata);
00419                 spdata = NULL;
00420             }
00421             splen = last_blocksize;
00422             if ((spdata = (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00423                 E_ERROR
00424                     ("Unable to allocate memory block of %d shorts for input speech\n",
00425                      splen);
00426                 fe_free(FE);
00427                 return (FE_MEM_ALLOC_ERROR);
00428             }
00429             if (fe_readblock_spch(P, fh_in, splen, spdata) != splen) {
00430                 E_ERROR("Error reading speech data\n");
00431                 fe_free(FE);
00432                 return (FE_INPUT_FILE_READ_ERROR);
00433             }
00434             process_utt_return_value =
00435                 fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00436             free(spdata);
00437             spdata = NULL;
00438             if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00439                 warn_zero_energy = 1;
00440             }
00441             else {
00442                 assert(process_utt_return_value == FE_SUCCESS);
00443             }
00444             if (frames_proc > 0)
00445                 fe_writeblock_feat(P, FE, fh_out, frames_proc, cep);
00446             if (cep != NULL) {
00447                 ckd_free_2d((void **) cep);
00448                 cep = NULL;
00449             }
00450 
00451             curr_block++;
00452             last_frame_cep =
00453                 (mfcc_t **) ckd_calloc_2d(1,
00454                                           fe_get_output_size(FE),
00455                                           sizeof(float32));
00456             process_utt_return_value =
00457                 fe_end_utt(FE, last_frame_cep[0], &last_frame);
00458             if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00459                 warn_zero_energy = 1;
00460             }
00461             else {
00462                 assert(process_utt_return_value == FE_SUCCESS);
00463             }
00464             if (last_frame > 0) {
00465                 fe_writeblock_feat(P, FE, fh_out,
00466                                    last_frame, last_frame_cep);
00467                 frames_proc++;
00468             }
00469             total_frames += frames_proc;
00470 
00471             fe_closefiles(P, fh_in, fh_out);
00472             if (last_frame_cep != NULL) {
00473                 ckd_free_2d((void **) last_frame_cep);
00474                 last_frame_cep = NULL;
00475             }
00476         }
00477         else {
00478             E_ERROR("fe_start_utt() failed\n");
00479             fe_free(FE);
00480             return (FE_START_ERROR);
00481         }
00482 
00483         if (warn_zero_energy) {
00484             E_WARN
00485                 ("File %s has some frames with zero energy. Consider using dither\n",
00486                  infile);
00487         }
00488     }
00489     else {
00490         E_ERROR("Unknown mode - single or batch?\n");
00491         fe_free(FE);
00492         return (FE_UNKNOWN_SINGLE_OR_BATCH);
00493     }
00494 
00495     fe_free(FE);
00496     return (FE_SUCCESS);
00497 }
00498 
00499 void
00500 fe_validate_parameters(globals_t * P)
00501 {
00502 
00503     if ((P->is_batch) && (P->is_single)) {
00504         E_FATAL("You cannot define an input file and a control file\n");
00505     }
00506 
00507     if (P->wavfile == NULL && P->wavdir == NULL) {
00508         E_FATAL("No input file or file directory given\n");
00509     }
00510 
00511     if (P->cepfile == NULL && P->cepdir == NULL) {
00512         E_FATAL("No cepstra file or file directory given\n");
00513     }
00514 
00515     if (P->ctlfile == NULL && P->cepfile == NULL && P->wavfile == NULL) {
00516         E_FATAL("No control file given\n");
00517     }
00518 
00519     if (P->nchans > 1) {
00520         E_INFO("Files have %d channels of data\n", P->nchans);
00521         E_INFO("Will extract features for channel %d\n", P->whichchan);
00522     }
00523 
00524     if (P->whichchan > P->nchans) {
00525         E_FATAL("You cannot select channel %d out of %d\n",
00526                 P->whichchan, P->nchans);
00527     }
00528 
00529     if ((cmd_ln_float32_r(P->config, "-upperf") * 2)
00530         > cmd_ln_float32_r(P->config, "-samprate")) {
00531         E_WARN("Upper frequency higher than Nyquist frequency\n");
00532     }
00533 
00534     if (cmd_ln_boolean_r(P->config, "-doublebw")) {
00535         E_INFO("Will use double bandwidth filters\n");
00536     }
00537 
00538 }
00539 
00540 
00541 globals_t *
00542 fe_parse_options(int32 argc, char **argv)
00543 {
00544     globals_t *P;
00545     int32 format;
00546     char const *endian;
00547 
00548     P = ckd_calloc(1, sizeof(*P));
00549     P->config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE);
00550 
00551     /* Load arguments from a feat.params file if requested. */
00552     if (cmd_ln_str_r(P->config, "-argfile")) {
00553         P->config = cmd_ln_parse_file_r(P->config, defn,
00554                                         cmd_ln_str_r(P->config, "-argfile"),
00555                                         FALSE);
00556     }
00557 
00558     P->nskip = P->runlen = -1;
00559     P->wavfile = cmd_ln_str_r(P->config, "-i");
00560     if (P->wavfile != NULL) {
00561         P->is_single = 1;
00562     }
00563     P->cepfile = cmd_ln_str_r(P->config, "-o");
00564     P->ctlfile = cmd_ln_str_r(P->config, "-c");
00565     if (P->ctlfile != NULL) {
00566         char const *nskip;
00567         char const *runlen;
00568 
00569         P->is_batch = 1;
00570 
00571         nskip = cmd_ln_str_r(P->config, "-nskip");
00572         runlen = cmd_ln_str_r(P->config, "-runlen");
00573         if (nskip != NULL) {
00574             P->nskip = atoi(nskip);
00575         }
00576         if (runlen != NULL) {
00577             P->runlen = atoi(runlen);
00578         }
00579     }
00580     P->wavdir = cmd_ln_str_r(P->config, "-di");
00581     P->cepdir = cmd_ln_str_r(P->config, "-do");
00582     P->wavext = cmd_ln_str_r(P->config, "-ei");
00583     P->cepext = cmd_ln_str_r(P->config, "-eo");
00584     format = cmd_ln_int32_r(P->config, "-raw");
00585     if (format) {
00586         P->input_format = RAW;
00587     }
00588     format = cmd_ln_int32_r(P->config, "-nist");
00589     if (format) {
00590         P->input_format = NIST;
00591     }
00592     format = cmd_ln_int32_r(P->config, "-mswav");
00593     if (format) {
00594         P->input_format = MSWAV;
00595     }
00596 
00597     P->nchans = cmd_ln_int32_r(P->config, "-nchans");
00598     P->whichchan = cmd_ln_int32_r(P->config, "-whichchan");
00599     P->output_endian = BIG;
00600     P->blocksize = cmd_ln_int32_r(P->config, "-blocksize");
00601     endian = cmd_ln_str_r(P->config, "-mach_endian");
00602     if (!strcmp("big", endian)) {
00603         P->machine_endian = BIG;
00604     }
00605     else {
00606         if (!strcmp("little", endian)) {
00607             P->machine_endian = LITTLE;
00608         }
00609         else {
00610             E_FATAL("Machine must be big or little Endian\n");
00611         }
00612     }
00613     endian = cmd_ln_str_r(P->config, "-input_endian");
00614     if (!strcmp("big", endian)) {
00615         P->input_endian = BIG;
00616     }
00617     else {
00618         if (!strcmp("little", endian)) {
00619             P->input_endian = LITTLE;
00620         }
00621         else {
00622             E_FATAL("Input must be big or little Endian\n");
00623         }
00624     }
00625 
00626     if (cmd_ln_boolean_r(P->config, "-logspec")
00627         || cmd_ln_boolean_r(P->config, "-smoothspec"))
00628         P->logspec = TRUE;
00629     if (cmd_ln_boolean_r(P->config, "-spec2cep"))
00630         P->convert = SPEC2CEP;
00631     if (cmd_ln_boolean_r(P->config, "-cep2spec"))
00632         P->convert = CEP2SPEC;
00633 
00634     fe_validate_parameters(P);
00635 
00636     return (P);
00637 
00638 }
00639 
00640 int32
00641 fe_build_filenames(globals_t * P, char *fileroot, char **infilename,
00642                    char **outfilename)
00643 {
00644     char chanlabel[32];
00645 
00646     if (P->nchans > 1)
00647         sprintf(chanlabel, ".ch%d", P->whichchan);
00648 
00649     if (P->is_batch) {
00650         if (infilename != NULL) {
00651             *infilename = string_join(P->wavdir, "/",
00652                                       fileroot, ".",
00653                                       P->wavext, NULL);
00654         }
00655 
00656         if (outfilename != NULL) {
00657             if (P->nchans > 1)
00658                 *outfilename = string_join(P->cepdir, "/",
00659                                            fileroot, chanlabel,
00660                                            ".", P->cepext, NULL);
00661             else
00662                 *outfilename = string_join(P->cepdir, "/",
00663                                            fileroot, ".",
00664                                            P->cepext, NULL);
00665         }
00666     }
00667     else if (P->is_single) {
00668         if (infilename != NULL) {
00669             *infilename = ckd_salloc(P->wavfile);
00670         }
00671         if (outfilename != NULL) {
00672             *outfilename = ckd_salloc(P->cepfile);
00673         }
00674     }
00675     else {
00676         E_FATAL("Unspecified Batch or Single Mode\n");
00677     }
00678 
00679     return 0;
00680 }
00681 
00682 int32
00683 fe_openfiles(globals_t * P, fe_t * FE, char *infile, FILE **fh_in,
00684              int32 * nsamps, int32 * nframes, int32 * nblocks,
00685              char *outfile, FILE **fh_out)
00686 {
00687     int len = 0, outlen, numframes, numblocks;
00688     FILE *fp2, *fh;
00689     char line[MAXCHARS];
00690     int got_it = 0;
00691 
00692 
00693     /* Note: this is kind of a hack to read the byte format from the
00694        NIST header */
00695     if (P->input_format == NIST) {
00696         if ((fp2 = fopen(infile, "rb")) == NULL) {
00697             E_ERROR_SYSTEM("Cannot read %s", infile);
00698             return (FE_INPUT_FILE_READ_ERROR);
00699         }
00700         *line = 0;
00701         got_it = 0;
00702         while (strcmp(line, "end_head") && !got_it) {
00703             fscanf(fp2, "%s", line);
00704             if (!strcmp(line, "sample_byte_format")) {
00705                 fscanf(fp2, "%s", line);
00706                 if (!strcmp(line, "-s2")) {
00707                     fscanf(fp2, "%s", line);
00708                     if (!strcmp(line, "01")) {
00709                         P->input_endian = LITTLE;
00710                         got_it = 1;
00711                     }
00712                     else if (!strcmp(line, "10")) {
00713                         P->input_endian = BIG;
00714                         got_it = 1;
00715                     }
00716                     else
00717                         E_ERROR("Unknown/unsupported byte order\n");
00718                 }
00719                 else
00720                     E_ERROR("Error determining byte format\n");
00721             }
00722         }
00723         if (!got_it) {
00724             E_WARN
00725                 ("Can't find byte format in header, setting to machine's endian\n");
00726             P->input_endian = P->machine_endian;
00727         }
00728         fclose(fp2);
00729     }
00730     else if (P->input_format == RAW) {
00731         /*
00732            P->input_endian = P->machine_endian;
00733          */
00734     }
00735     else if (P->input_format == MSWAV) {
00736         P->input_endian = LITTLE;       // Default for MS WAV riff files
00737     }
00738 
00739     if ((fh = fopen(infile, "rb")) == NULL) {
00740         fprintf(stderr, "Cannot open %s\n", infile);
00741         return (FE_INPUT_FILE_OPEN_ERROR);
00742     }
00743     else {
00744         long fsize;
00745 
00746         fseek(fh, 0, SEEK_END);
00747         fsize = ftell(fh);
00748         fseek(fh, 0, SEEK_SET);
00749 
00750         if (P->input_format == NIST) {
00751             short *hdr_buf;
00752 
00753             len = (fsize - HEADER_BYTES) / sizeof(short);
00754             /* eat header */
00755             hdr_buf =
00756                 (short *) calloc(HEADER_BYTES / sizeof(short),
00757                                  sizeof(short));
00758             if (fread(hdr_buf, 1, HEADER_BYTES, fh) != HEADER_BYTES) {
00759                 E_ERROR("Cannot read %s\n", infile);
00760                 return (FE_INPUT_FILE_READ_ERROR);
00761             }
00762             free(hdr_buf);
00763         }
00764         else if (P->input_format == RAW) {
00765             len = fsize / sizeof(int16);
00766         }
00767         else if (P->input_format == MSWAV) {
00768             /* Read the header */
00769             MSWAV_hdr *hdr_buf = NULL;
00770             /* MC: read till just before datatag */
00771             const int hdr_len_to_read = ((char *) (&hdr_buf->datatag))
00772                 - (char *) hdr_buf;
00773             int data_start;
00774 
00775             if ((hdr_buf =
00776                  (MSWAV_hdr *) calloc(1, sizeof(MSWAV_hdr))) == NULL) {
00777                 E_ERROR("Cannot allocate for input file header\n");
00778                 return (FE_INPUT_FILE_READ_ERROR);
00779             }
00780             if (fread(hdr_buf, 1, hdr_len_to_read, fh) != hdr_len_to_read) {
00781                 E_ERROR("Cannot allocate for input file header\n");
00782                 return (FE_INPUT_FILE_READ_ERROR);
00783             }
00784             /* Check header */
00785             if (strncmp(hdr_buf->rifftag, "RIFF", 4) != 0 ||
00786                 strncmp(hdr_buf->wavefmttag, "WAVEfmt", 7) != 0) {
00787                 E_ERROR("Error in mswav file header\n");
00788                 return (FE_INPUT_FILE_READ_ERROR);
00789             }
00790             {
00791                 /* There may be other "chunks" before the data chunk,
00792                  * which we can ignore. We have to find the start of
00793                  * the data chunk, which begins with the string
00794                  * "data".
00795                  */
00796                 int16 found = 0;
00797                 char readChar;
00798                 char *dataString = "data";
00799                 int16 charPointer = 0;
00800                 while (!found) {
00801                     if (fread(&readChar, 1, 1, fh) != 1) {
00802                         E_ERROR("Failed reading wav file.\n");
00803                         return (FE_INPUT_FILE_READ_ERROR);
00804                     }
00805                     if (readChar == dataString[charPointer]) {
00806                         charPointer++;
00807                     }
00808                     if (charPointer == (int) strlen(dataString)) {
00809                         found = 1;
00810                         strcpy(hdr_buf->datatag, dataString);
00811                         if (fread(&(hdr_buf->datalength), sizeof(int32), 1, fh) != 1) {
00812                             E_ERROR("Failed reading wav file.\n");
00813                             return (FE_INPUT_FILE_READ_ERROR);
00814                         }
00815                     }
00816                 }
00817             }
00818             data_start = ftell(fh);
00819             if (P->input_endian != P->machine_endian) { // If machine is Big Endian
00820                 hdr_buf->datalength = SWAP_INT32(&(hdr_buf->datalength));
00821                 hdr_buf->data_format = SWAP_INT16(&(hdr_buf->data_format));
00822                 hdr_buf->numchannels = SWAP_INT16(&(hdr_buf->numchannels));
00823                 hdr_buf->BitsPerSample =
00824                     SWAP_INT16(&(hdr_buf->BitsPerSample));
00825                 hdr_buf->SamplingFreq =
00826                     SWAP_INT32(&(hdr_buf->SamplingFreq));
00827                 hdr_buf->BytesPerSec = SWAP_INT32(&(hdr_buf->BytesPerSec));
00828             }
00829             /* Check Format */
00830             if (hdr_buf->data_format != 1 || hdr_buf->BitsPerSample != 16) {
00831                 E_ERROR("MS WAV file not in 16-bit PCM format\n");
00832                 return (FE_INPUT_FILE_READ_ERROR);
00833             }
00834             /* This number may be bogus.  Check for a truncated file. */
00835             len = hdr_buf->datalength / sizeof(short);
00836             if (len > (fsize - data_start) / sizeof(short))
00837                 len = (fsize - data_start) / sizeof(short);
00838             
00839             P->nchans = hdr_buf->numchannels;
00840             /* DEBUG: Dump Info */
00841             if (P->verbose) {
00842                 E_INFO("Reading MS Wav file %s:\n", infile);
00843                 E_INFO
00844                     ("\t16 bit PCM data, %d channels %d samples\n",
00845                      P->nchans, len);
00846                 E_INFO("\tSampled at %d\n", hdr_buf->SamplingFreq);
00847             }
00848             free(hdr_buf);
00849         }
00850         else {
00851             E_ERROR("Unknown input file format\n");
00852             return (FE_INPUT_FILE_OPEN_ERROR);
00853         }
00854     }
00855 
00856 
00857     len = len / P->nchans;
00858     *nsamps = len;
00859     *fh_in = fh;
00860 
00861     numblocks = (int) ((float) len / (float) P->blocksize);
00862     if (numblocks * P->blocksize < len)
00863         numblocks++;
00864 
00865     *nblocks = numblocks;
00866 
00867     if ((fh = fopen(outfile, "wb")) < 0) {
00868         E_ERROR("Unable to open %s for writing features\n", outfile);
00869         return (FE_OUTPUT_FILE_OPEN_ERROR);
00870     }
00871     else {
00872         size_t nsamps = len;
00873         int frame_shift, frame_size;
00874 
00875         /* Compute number of frames and write cepfile header */
00876         fe_process_frames(FE, NULL, &nsamps, NULL, &numframes);
00877         /* This is sort of hacky... we need to figure out if there
00878            will be a trailing frame from fe_end_utt() or not.  */
00879         fe_get_input_size(FE, &frame_shift, &frame_size);
00880         /* Don't ask me why this has to be <= rather than <, it just does... */
00881         if (frame_size + (numframes - 1) * frame_shift <= len)
00882             ++numframes;
00883 
00884         /* This is potentially bogus and will be overwritten when
00885          * closing the file. */
00886         outlen = numframes * fe_get_output_size(FE);
00887         if (P->output_endian != P->machine_endian)
00888             SWAP_INT32(&outlen);
00889         if (fwrite(&outlen, 4, 1, fh) != 1) {
00890             E_ERROR("Data write error on %s\n", outfile);
00891             fclose(fh);
00892             return (FE_OUTPUT_FILE_WRITE_ERROR);
00893         }
00894         if (P->output_endian != P->machine_endian)
00895             SWAP_INT32(&outlen);
00896     }
00897 
00898     *nframes = numframes;
00899     *fh_out = fh;
00900 
00901     return 0;
00902 }
00903 
00904 int32
00905 fe_readblock_spch(globals_t * P, FILE *fh, int32 nsamps, int16 * buf)
00906 {
00907     int32 nsamps_read, cum_samps_read, actsamps, offset, i,
00908         j, k;
00909     int16 *tmpbuf;
00910     int32 nchans, whichchan;
00911 
00912     nchans = P->nchans;
00913     whichchan = P->whichchan;
00914 
00915     if (nchans == 1) {
00916         if (P->input_format == RAW
00917             || P->input_format == NIST
00918             || P->input_format == MSWAV) {
00919             if ((nsamps_read = fread(buf, 2, nsamps, fh)) != nsamps) {
00920                 E_ERROR_SYSTEM("error reading block: %ld != %d",
00921                                nsamps_read, nsamps);
00922                 return (0);
00923             }
00924         }
00925         else {
00926             E_ERROR("unknown input file format\n");
00927             return (0);
00928         }
00929         cum_samps_read = nsamps_read;
00930     }
00931     else if (nchans > 1) {
00932         if (nsamps < P->blocksize) {
00933             actsamps = nsamps * nchans;
00934             tmpbuf = (int16 *) calloc(nsamps * nchans, sizeof(int16));
00935             cum_samps_read = 0;
00936             if (P->input_format == RAW
00937                 || P->input_format == MSWAV || P->input_format == NIST) {
00938 
00939                 k = 0;
00940                 if ((nsamps_read =
00941                      fread(tmpbuf, 2, actsamps, fh)) != actsamps) {
00942                     E_ERROR
00943                         ("error reading block (got %d not %d)\n",
00944                          nsamps_read, actsamps);
00945                     return (0);
00946                 }
00947 
00948                 for (j = whichchan - 1; j < actsamps; j = j + nchans) {
00949                     buf[k] = tmpbuf[j];
00950                     k++;
00951                 }
00952                 cum_samps_read += nsamps_read / nchans;
00953             }
00954             else {
00955                 E_ERROR("unknown input file format\n");
00956                 return (0);
00957             }
00958             free(tmpbuf);
00959         }
00960         else {
00961             tmpbuf = (int16 *) calloc(nsamps, sizeof(int16));
00962             actsamps = nsamps / nchans;
00963             cum_samps_read = 0;
00964 
00965             if (actsamps * nchans != nsamps) {
00966                 E_WARN
00967                     ("Blocksize %d is not an integer multiple of Number of channels %d\n",
00968                      nsamps, nchans);
00969             }
00970 
00971             if (P->input_format == RAW
00972                 || P->input_format == MSWAV || P->input_format == NIST) {
00973                 for (i = 0; i < nchans; i++) {
00974 
00975                     offset = i * actsamps;
00976                     k = 0;
00977 
00978                     if ((nsamps_read =
00979                          fread(tmpbuf, 2, actsamps, fh)) != actsamps) {
00980                         E_ERROR
00981                             ("error reading block (got %d not %d)\n",
00982                              nsamps_read, actsamps);
00983                         return (0);
00984                     }
00985 
00986                     for (j = whichchan - 1; j < nsamps; j = j + nchans) {
00987                         buf[offset + k] = tmpbuf[j];
00988                         k++;
00989                     }
00990                     cum_samps_read += nsamps_read / nchans;
00991                 }
00992             }
00993             else {
00994                 E_ERROR("unknown input file format\n");
00995                 return (0);
00996             }
00997             free(tmpbuf);
00998         }
00999     }
01000 
01001     else {
01002         E_ERROR("unknown number of channels!\n");
01003         return (0);
01004     }
01005 
01006     if (P->input_endian != P->machine_endian) {
01007         for (i = 0; i < nsamps; i++)
01008             SWAP_INT16(&buf[i]);
01009     }
01010 
01011     return cum_samps_read;
01012 
01013 }
01014 
01015 int32
01016 fe_writeblock_feat(globals_t * P, fe_t * FE, FILE *fh, int32 nframes,
01017                    mfcc_t ** feat)
01018 {
01019 
01020     int32 i, length;
01021     float32 **ffeat;
01022 
01023     length = nframes * fe_get_output_size(FE);
01024 
01025     ffeat = (float32 **) feat;
01026     fe_mfcc_to_float(FE, feat, ffeat, nframes);
01027     if (P->output_endian != P->machine_endian) {
01028         for (i = 0; i < length; ++i)
01029             SWAP_FLOAT32(ffeat[0] + i);
01030     }
01031 
01032     if (fwrite(ffeat[0], 4, length, fh) != length) {
01033         fclose(fh);
01034         E_FATAL("Error writing block of features\n");
01035     }
01036 
01037     return (length);
01038 }
01039 
01040 
01041 int32
01042 fe_closefiles(globals_t *P, FILE *fh_in, FILE *fh_out)
01043 {
01044     int32 nfloats;
01045 
01046     fclose(fh_in);
01047 
01048     nfloats = ftell(fh_out) / 4 - 1;
01049     if (P->output_endian != P->machine_endian)
01050         SWAP_INT32(&nfloats);
01051     fseek(fh_out, 0, SEEK_SET);
01052     fwrite(&nfloats, 4, 1, fh_out);
01053     fclose(fh_out);
01054 
01055     return 0;
01056 }
01057 
01058 int32
01059 fe_convert_with_dct(globals_t * P, fe_t * FE, char *infile, char *outfile)
01060 {
01061     FILE *ifh, *ofh;
01062     int32 ifsize, nfloats, swap = 0;
01063     int32 input_ncoeffs, output_ncoeffs;
01064     float32 *logspec;
01065 
01066     if ((ifh = fopen(infile, "rb")) == NULL) {
01067         E_ERROR_SYSTEM("Cannot read %s", infile);
01068         return (FE_INPUT_FILE_READ_ERROR);
01069     }
01070     if ((ofh = fopen(outfile, "wb")) == NULL) {
01071         E_ERROR_SYSTEM("Unable to open %s for writing features", outfile);
01072         return (FE_OUTPUT_FILE_OPEN_ERROR);
01073     }
01074 
01075     fseek(ifh, 0, SEEK_END);
01076     ifsize = ftell(ifh);
01077     fseek(ifh, 0, SEEK_SET);
01078     fread(&nfloats, 4, 1, ifh);
01079     if (nfloats != ifsize / 4 - 1) {
01080         E_INFO("Will byteswap %s (%x != %x)\n",
01081                infile, nfloats, ifsize / 4 - 1);
01082         SWAP_INT32(&nfloats);
01083         swap = 1;
01084     }
01085     if (nfloats != ifsize / 4 - 1) {
01086         E_ERROR("Size of file doesn't match header: %d != %d\n",
01087                 nfloats, ifsize / 4 - 1);
01088         return (FE_INPUT_FILE_READ_ERROR);
01089     }
01090     if (P->convert == CEP2SPEC) {
01091         input_ncoeffs = cmd_ln_int32_r(P->config, "-ncep");
01092         output_ncoeffs = cmd_ln_int32_r(P->config, "-nfilt");
01093     }
01094     else {
01095         input_ncoeffs = cmd_ln_int32_r(P->config, "-nfilt");
01096         output_ncoeffs = cmd_ln_int32_r(P->config, "-ncep");
01097     }
01098     nfloats = nfloats * output_ncoeffs / input_ncoeffs;
01099 
01100     if (swap)
01101         SWAP_INT32(&nfloats);
01102     fwrite(&nfloats, 4, 1, ofh);
01103     /* Always use the largest size since it's done inplace */
01104     logspec = ckd_calloc(cmd_ln_int32_r(P->config, "-nfilt"),
01105                          sizeof(*logspec));
01106 
01107     while (fread(logspec, 4, input_ncoeffs, ifh) == input_ncoeffs) {
01108         int32 i;
01109         if (swap) {
01110             for (i = 0; i < input_ncoeffs; ++i) {
01111                 SWAP_FLOAT32(logspec + i);
01112             }
01113         }
01114         fe_float_to_mfcc(FE, &logspec, (mfcc_t **)&logspec, 1);
01115         if (P->convert == CEP2SPEC) {
01116             fe_mfcc_dct3(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01117         }
01118         else {
01119             if (0 == strcmp(cmd_ln_str_r(P->config, "-transform"), "legacy"))
01120                 fe_logspec_to_mfcc(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01121             else
01122                 fe_logspec_dct2(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01123         }
01124         fe_mfcc_to_float(FE, (mfcc_t **)&logspec, &logspec, 1);
01125         if (swap) {
01126             for (i = 0; i < output_ncoeffs; ++i) {
01127                 SWAP_FLOAT32(logspec + i);
01128             }
01129         }
01130         if (fwrite(logspec, 4, output_ncoeffs, ofh) < output_ncoeffs) {
01131             E_ERROR_SYSTEM("Failed to write %d coeffs to %s",
01132                            output_ncoeffs, outfile);
01133             ckd_free(logspec);
01134             return (FE_OUTPUT_FILE_WRITE_ERROR);
01135         }
01136     }
01137     if (!feof(ifh)) {
01138         E_ERROR("Short read in input file %s\n", infile);
01139         ckd_free(logspec);
01140         return (FE_INPUT_FILE_READ_ERROR);
01141     }
01142     fclose(ifh);
01143     fclose(ofh);
01144     ckd_free(logspec);
01145 
01146     return FE_SUCCESS;
01147 }
01148 
01149 
01151 #if defined(_WIN32_WCE)
01152 #pragma comment(linker,"/entry:mainWCRTStartup")
01153 
01154 //Windows Mobile has the Unicode main only
01155 int wmain(int32 argc, wchar_t *wargv[]) {
01156     char** argv;
01157     size_t wlen;
01158     size_t len;
01159     int i;
01160 
01161     argv = malloc(argc*sizeof(char*));
01162     for (i=0; i<argc; i++){
01163         wlen = lstrlenW(wargv[i]);
01164         len = wcstombs(NULL, wargv[i], wlen);
01165         argv[i] = malloc(len+1);
01166         wcstombs(argv[i], wargv[i], wlen);
01167     }
01168 
01169     //assuming ASCII parameters
01170     return main(argc, argv);
01171 }
01172 #endif
01173 
01174 /*
01175  * Log record.  Maintained by RCS.
01176  *
01177  * $Log: wave2feat.c,v $
01178  * Revision 1.35  2006/02/25 00:53:48  egouvea
01179  * Added the flag "-seed". If dither is being used and the seed is less
01180  * than zero, the random number generator is initialized with time(). If
01181  * it is at least zero, it's initialized with the provided seed. This way
01182  * we have the benefit of having dither, and the benefit of being
01183  * repeatable.
01184  *
01185  * This is consistent with what sphinx3 does. Well, almost. The random
01186  * number generator is still what the compiler provides.
01187  *
01188  * Also, moved fe_init_params to fe_interface.c, so one can initialize a
01189  * variable of type param_t with meaningful values.
01190  *
01191  * Revision 1.34  2006/02/20 23:55:51  egouvea
01192  * Moved fe_dither() to the "library" side rather than the app side, so
01193  * the function can be code when using the front end as a library.
01194  *
01195  * Revision 1.33  2006/02/17 00:31:34  egouvea
01196  * Removed switch -melwarp. Changed the default for window length to
01197  * 0.025625 from 0.256 (so that a window at 16kHz sampling rate has
01198  * exactly 410 samples). Cleaned up include's. Replaced some E_FATAL()
01199  * with E_WARN() and return.
01200  *
01201  * Revision 1.32  2006/02/16 20:11:20  egouvea
01202  * Fixed the code that prints a warning if any zero-energy frames are
01203  * found, and recommending the user to add dither. Previously, it would
01204  * only report the zero energy frames if they happened in the last
01205  * utterance. Now, it reports for each utterance.
01206  *
01207  * Revision 1.31  2006/02/16 00:18:26  egouvea
01208  * Implemented flexible warping function. The user can specify at run
01209  * time which of several shapes they want to use. Currently implemented
01210  * are an affine function (y = ax + b), an inverse linear (y = a/x) and a
01211  * piecewise linear (y = ax, up to a frequency F, and then it "breaks" so
01212  * Nyquist frequency matches in both scales.
01213  *
01214  * Added two switches, -warp_type and -warp_params. The first specifies
01215  * the type, which valid values:
01216  *
01217  * -inverse or inverse_linear
01218  * -linear or affine
01219  * -piecewise or piecewise_linear
01220  *
01221  * The inverse_linear is the same as implemented by EHT. The -mel_warp
01222  * switch was kept for compatibility (maybe remove it in the
01223  * future?). The code is compatible with EHT's changes: cepstra created
01224  * from code after his changes should be the same as now. Scripts that
01225  * worked with his changes should work now without changes. Tested a few
01226  * cases, same results.
01227  *
01228  */

Generated on Mon Jan 24 21:36:19 2011 for SphinxBase by  doxygen 1.4.7