src/libsphinxbase/util/pio.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 #include <config.h>
00039 
00040 #include <stdio.h>
00041 #include <stdlib.h>
00042 #include <string.h>
00043 #ifdef HAVE_UNISTD_H
00044 #include <unistd.h>
00045 #endif
00046 #include <assert.h>
00047 
00048 #include "pio.h"
00049 #include "err.h"
00050 #include "strfuncs.h"
00051 #include "ckd_alloc.h"
00052 
00053 #ifndef EXEEXT
00054 #define EXEEXT ""
00055 #endif
00056 
00057 enum {
00058     COMP_NONE,
00059     COMP_COMPRESS,
00060     COMP_GZIP,
00061     COMP_BZIP2
00062 };
00063 
00064 static void
00065 guess_comptype(char const *file, int32 *ispipe, int32 *isgz)
00066 {
00067     int k;
00068 
00069     k = strlen(file);
00070     *ispipe = 0;
00071     *isgz = COMP_NONE;
00072     if ((k > 2)
00073         && ((strcmp(file + k - 2, ".Z") == 0)
00074             || (strcmp(file + k - 2, ".z") == 0))) {
00075         *ispipe = 1;
00076         *isgz = COMP_COMPRESS;
00077     }
00078     else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0)
00079                         || (strcmp(file + k - 3, ".GZ") == 0))) {
00080         *ispipe = 1;
00081         *isgz = COMP_GZIP;
00082     }
00083     else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0)
00084                         || (strcmp(file + k - 4, ".BZ2") == 0))) {
00085         *ispipe = 1;
00086         *isgz = COMP_BZIP2;
00087     }
00088 }
00089 
00090 FILE *
00091 fopen_comp(const char *file, const char *mode, int32 * ispipe)
00092 {
00093     FILE *fp;
00094 
00095 #ifndef HAVE_POPEN
00096     *ispipe = 0; /* No popen() on WinCE */
00097 #else /* HAVE_POPEN */
00098     int32 isgz;
00099     guess_comptype(file, ispipe, &isgz);
00100 #endif /* HAVE_POPEN */
00101 
00102     if (*ispipe) {
00103 #ifndef HAVE_POPEN
00104         /* Shouldn't get here, anyway */
00105         E_FATAL("No popen() on WinCE\n");
00106 #else
00107         char *command = NULL;
00108 
00109         if (strcmp(mode, "r") == 0) {
00110             switch (isgz) {
00111             case COMP_GZIP:
00112                 command = string_join("gunzip" EXEEXT, " -c ", file, NULL);
00113                 break;
00114             case COMP_COMPRESS:
00115                 command = string_join("zcat" EXEEXT, " ", file, NULL);
00116                 break;
00117             case COMP_BZIP2:
00118                 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL);
00119                 break;
00120             default:
00121                 E_FATAL("Unknown  compression type %d\n", isgz);
00122             }
00123             if ((fp = popen(command, mode)) == NULL) {
00124                 E_ERROR_SYSTEM("popen (%s,%s) failed\n", command, mode);
00125                 ckd_free(command);
00126                 return NULL;
00127             }
00128         }
00129         else if (strcmp(mode, "w") == 0) {
00130             switch (isgz) {
00131             case COMP_GZIP:
00132                 command = string_join("gzip" EXEEXT, " > ", file, NULL);
00133                 break;
00134             case COMP_COMPRESS:
00135                 command = string_join("compress" EXEEXT, " -c > ", file, NULL);
00136                 break;
00137             case COMP_BZIP2:
00138                 command = string_join("bzip2" EXEEXT, " > ", file, NULL);
00139                 break;
00140             default:
00141                 E_FATAL("Unknown compression type %d\n", isgz);
00142             }
00143             if ((fp = popen(command, mode)) == NULL) {
00144                 E_ERROR_SYSTEM("popen (%s,%s) failed\n", command, mode);
00145                 ckd_free(command);
00146                 return NULL;
00147             }
00148             ckd_free(command);
00149         }
00150         else {
00151             E_ERROR("fopen_comp not implemented for mode = %s\n", mode);
00152             return NULL;
00153         }
00154 #endif /* HAVE_POPEN */
00155     }
00156     else {
00157         fp = fopen(file, mode);
00158     }
00159 
00160     return (fp);
00161 }
00162 
00163 
00164 void
00165 fclose_comp(FILE * fp, int32 ispipe)
00166 {
00167     if (ispipe) {
00168 #ifdef HAVE_POPEN
00169 #if defined(WIN32)
00170         _pclose(fp);
00171 #else
00172         pclose(fp);
00173 #endif
00174 #endif
00175     }
00176     else
00177         fclose(fp);
00178 }
00179 
00180 
00181 FILE *
00182 fopen_compchk(const char *file, int32 * ispipe)
00183 {
00184 #ifndef HAVE_POPEN
00185     *ispipe = 0; /* No popen() on WinCE */
00186     /* And therefore the rest of this function is useless. */
00187     return (fopen_comp(file, "r", ispipe));
00188 #else /* HAVE_POPEN */
00189     int32 isgz;
00190     FILE *fh;
00191 
00192     /* First just try to fopen_comp() it */
00193     if ((fh = fopen_comp(file, "r", ispipe)) != NULL)
00194         return fh;
00195     else {
00196         char *tmpfile;
00197         int k;
00198 
00199         /* File doesn't exist; try other compressed/uncompressed form, as appropriate */
00200         guess_comptype(file, ispipe, &isgz);
00201         k = strlen(file);
00202         tmpfile = ckd_calloc(k+5, 1);
00203         strcpy(tmpfile, file);
00204         switch (isgz) {
00205         case COMP_GZIP:
00206             tmpfile[k - 3] = '\0';
00207             break;
00208         case COMP_BZIP2:
00209             tmpfile[k - 4] = '\0';
00210             break;
00211         case COMP_COMPRESS:
00212             tmpfile[k - 2] = '\0';
00213             break;
00214         case COMP_NONE:
00215             strcpy(tmpfile + k, ".gz");
00216             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00217                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00218                 ckd_free(tmpfile);
00219                 return fh;
00220             }
00221             strcpy(tmpfile + k, ".bz2");
00222             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00223                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00224                 ckd_free(tmpfile);
00225                 return fh;
00226             }
00227             strcpy(tmpfile + k, ".Z");
00228             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00229                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00230                 ckd_free(tmpfile);
00231                 return fh;
00232             }
00233             ckd_free(tmpfile);
00234             return NULL;
00235         }
00236         E_WARN("Using %s instead of %s\n", tmpfile, file);
00237         fh = fopen_comp(tmpfile, "r", ispipe);
00238         ckd_free(tmpfile);
00239         return NULL;
00240     }
00241 #endif /* HAVE_POPEN */
00242 }
00243 
00244 lineiter_t *
00245 lineiter_start(FILE *fh)
00246 {
00247     lineiter_t *li;
00248 
00249     li = ckd_calloc(1, sizeof(*li));
00250     li->buf = ckd_malloc(128);
00251     li->buf[0] = '\0';
00252     li->bsiz = 128;
00253     li->len = 0;
00254     li->fh = fh;
00255 
00256     return lineiter_next(li);
00257 }
00258 
00259 lineiter_t *
00260 lineiter_next(lineiter_t *li)
00261 {
00262     /* Read a line and check for EOF. */
00263     if (fgets(li->buf, li->bsiz, li->fh) == NULL) {
00264         lineiter_free(li);
00265         return NULL;
00266     }
00267     /* If we managed to read the whole thing, then we are done
00268      * (this will be by far the most common result). */
00269     li->len = strlen(li->buf);
00270     if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
00271         return li;
00272 
00273     /* Otherwise we have to reallocate and keep going. */
00274     while (1) {
00275         li->bsiz *= 2;
00276         li->buf = ckd_realloc(li->buf, li->bsiz);
00277         /* If we get an EOF, we are obviously done. */
00278         if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) {
00279             li->len += strlen(li->buf + li->len);
00280             return li;
00281         }
00282         li->len += strlen(li->buf + li->len);
00283         /* If we managed to read the whole thing, then we are done. */
00284         if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
00285             return li;
00286     }
00287 
00288     /* Shouldn't get here. */
00289     return li;
00290 }
00291 
00292 void
00293 lineiter_free(lineiter_t *li)
00294 {
00295     ckd_free(li->buf);
00296     ckd_free(li);
00297 }
00298 
00299 char *
00300 fread_line(FILE *stream, size_t *out_len)
00301 {
00302     char *output, *outptr;
00303     char buf[128];
00304 
00305     output = outptr = NULL;
00306     while (fgets(buf, sizeof(buf), stream)) {
00307         size_t len = strlen(buf);
00308         /* Append this data to the buffer. */
00309         if (output == NULL) {
00310             output = ckd_malloc(len + 1);
00311             outptr = output;
00312         }
00313         else {
00314             size_t cur = outptr - output;
00315             output = ckd_realloc(output, cur + len + 1);
00316             outptr = output + cur;
00317         }
00318         memcpy(outptr, buf, len + 1);
00319         outptr += len;
00320         /* Stop on a short read or end of line. */
00321         if (len < sizeof(buf)-1 || buf[len-1] == '\n')
00322             break;
00323     }
00324     if (out_len) *out_len = outptr - output;
00325     return output;
00326 }
00327 
00328 
00329 #define FREAD_RETRY_COUNT       60
00330 
00331 int32
00332 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream)
00333 {
00334     char *data;
00335     uint32 n_items_read;
00336     uint32 n_items_rem;
00337     uint32 n_retry_rem;
00338     int32 loc;
00339 
00340     n_retry_rem = FREAD_RETRY_COUNT;
00341 
00342     data = pointer;
00343     loc = 0;
00344     n_items_rem = num_items;
00345 
00346     do {
00347         n_items_read = fread(&data[loc], size, n_items_rem, stream);
00348 
00349         n_items_rem -= n_items_read;
00350 
00351         if (n_items_rem > 0) {
00352             /* an incomplete read occurred */
00353 
00354             if (n_retry_rem == 0)
00355                 return -1;
00356 
00357             if (n_retry_rem == FREAD_RETRY_COUNT) {
00358                 E_ERROR_SYSTEM("fread() failed; retrying...\n");
00359             }
00360 
00361             --n_retry_rem;
00362 
00363             loc += n_items_read * size;
00364 #ifdef HAVE_UNISTD_H
00365             sleep(1);
00366 #endif
00367         }
00368     } while (n_items_rem > 0);
00369 
00370     return num_items;
00371 }
00372 
00373 
00374 /* Silvio Moioli: updated to use Unicode */
00375 #ifdef _WIN32_WCE /* No stat() on WinCE */
00376 int32
00377 stat_retry(const char *file, struct stat * statbuf)
00378 {
00379     WIN32_FIND_DATAW file_data;
00380     HANDLE *h;
00381     wchar_t *wfile;
00382     size_t len;
00383 
00384     len = mbstowcs(NULL, file, 0) + 1;
00385     wfile = ckd_calloc(len, sizeof(*wfile));
00386     mbstowcs(wfile, file, len);
00387     if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) {
00388         ckd_free(wfile);
00389         return -1;
00390     }
00391     ckd_free(wfile);
00392     memset(statbuf, 0, sizeof(statbuf));
00393     statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime;
00394     statbuf->st_size = file_data.nFileSizeLow;
00395     FindClose(h);
00396 
00397     return 0;
00398 }
00399 
00400 
00401 int32
00402 stat_mtime(const char *file)
00403 {
00404     struct stat statbuf;
00405 
00406     if (stat_retry(file, &statbuf) != 0)
00407         return -1;
00408 
00409     return ((int32) statbuf.st_mtime);
00410 }
00411 #else
00412 #define STAT_RETRY_COUNT        10
00413 int32
00414 stat_retry(const char *file, struct stat * statbuf)
00415 {
00416     int32 i;
00417 
00418     
00419     
00420     for (i = 0; i < STAT_RETRY_COUNT; i++) {
00421 
00422 #ifndef HAVE_SYS_STAT_H
00423                 FILE *fp;
00424 
00425                 if ((fp=(FILE *)fopen(file, "r"))!= 0)
00426                 {
00427                     fseek( fp, 0, SEEK_END);
00428                     statbuf->st_size = ftell( fp );
00429                     fclose(fp);
00430                     return 0;
00431                 }
00432         
00433 #else /* HAVE_SYS_STAT_H */
00434         if (stat(file, statbuf) == 0)
00435             return 0;
00436 #endif
00437         if (i == 0) {
00438             E_ERROR_SYSTEM("stat(%s) failed; retrying...\n", file);
00439         }
00440 #ifdef HAVE_UNISTD_H
00441         sleep(1);
00442 #endif
00443     }
00444 
00445     return -1;
00446 }
00447 
00448 int32
00449 stat_mtime(const char *file)
00450 {
00451     struct stat statbuf;
00452 
00453 #ifdef HAVE_SYS_STAT_H
00454     if (stat(file, &statbuf) != 0)
00455         return -1;
00456 #else /* HAVE_SYS_STAT_H */
00457     if (stat_retry(file, &statbuf) != 0)
00458         return -1;
00459 #endif /* HAVE_SYS_STAT_H */
00460 
00461     return ((int32) statbuf.st_mtime);
00462 }
00463 #endif /* !_WIN32_WCE */
00464 
00465 struct bit_encode_s {
00466     FILE *fh;
00467     unsigned char buf, bbits;
00468     int16 refcount;
00469 };
00470 
00471 bit_encode_t *
00472 bit_encode_attach(FILE *outfh)
00473 {
00474     bit_encode_t *be;
00475 
00476     be = ckd_calloc(1, sizeof(*be));
00477     be->refcount = 1;
00478     be->fh = outfh;
00479     return be;
00480 }
00481 
00482 bit_encode_t *
00483 bit_encode_retain(bit_encode_t *be)
00484 {
00485     ++be->refcount;
00486     return be;
00487 }
00488 
00489 int
00490 bit_encode_free(bit_encode_t *be)
00491 {
00492     if (be == NULL)
00493         return 0;
00494     if (--be->refcount > 0)
00495         return be->refcount;
00496     ckd_free(be);
00497 
00498     return 0;
00499 }
00500 
00501 int
00502 bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
00503 {
00504     int tbits;
00505 
00506     tbits = nbits + be->bbits;
00507     if (tbits < 8)  {
00508         /* Append to buffer. */
00509         be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits));
00510     }
00511     else {
00512         int i = 0;
00513         while (tbits >= 8) {
00514             /* Shift bits out of the buffer and splice with high-order bits */
00515             fputc(be->buf | ((bits[i]) >> be->bbits), be->fh);
00516             /* Put low-order bits back into buffer */
00517             be->buf = (bits[i] << (8 - be->bbits)) & 0xff;
00518             tbits -= 8;
00519             ++i;
00520         }
00521     }
00522     /* tbits contains remaining number of  bits. */
00523     be->bbits = tbits;
00524 
00525     return nbits;
00526 }
00527 
00528 int
00529 bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
00530 {
00531     unsigned char bits[4];
00532     codeword <<= (32 - nbits);
00533     bits[0] = (codeword >> 24) & 0xff;
00534     bits[1] = (codeword >> 16) & 0xff;
00535     bits[2] = (codeword >> 8) & 0xff;
00536     bits[3] = codeword & 0xff;
00537     return bit_encode_write(be, bits, nbits);
00538 }
00539 
00540 int
00541 bit_encode_flush(bit_encode_t *be)
00542 {
00543     if (be->bbits) {
00544         fputc(be->buf, be->fh);
00545         be->bbits = 0;
00546     }
00547     return 0;
00548 }
00549 

Generated on Mon Jan 24 21:36:19 2011 for SphinxBase by  doxygen 1.4.7