Julius 4.2
libsent/src/wav2mfcc/para.c
説明を見る。
00001 
00022 /*
00023  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00024  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00025  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00026  * All rights reserved
00027  */
00028 
00029 #include <sent/mfcc.h>
00030 #include <sent/speech.h>
00031 
00038 void
00039 undef_para(Value *para)
00040 {
00041   para->smp_period = -1;
00042   para->smp_freq   = -1;
00043   para->framesize  = -1;
00044   para->frameshift = -1;
00045   para->preEmph    = -1;
00046   para->mfcc_dim   = -1;
00047   para->lifter     = -1;
00048   para->fbank_num  = -1;
00049   para->delWin     = -1;
00050   para->accWin     = -1;
00051   para->silFloor   = -1;
00052   para->escale     = -1;
00053   para->enormal    = -1;
00054   para->hipass     = -2;        /* undef */
00055   para->lopass     = -2;        /* undef */
00056   para->cmn        = -1;
00057   para->cvn        = -1;
00058   para->raw_e      = -1;
00059   para->c0         = -1;
00060   //para->ss_alpha   = -1;
00061   //para->ss_floor   = -1;
00062   para->vtln_alpha = -1;
00063   para->vtln_upper = -1;
00064   para->vtln_lower = -1;
00065   para->zmeanframe = -1;
00066   para->usepower   = -1;
00067   para->delta      = -1;
00068   para->acc        = -1;
00069   para->energy     = -1;
00070   para->absesup    = -1;
00071   para->baselen    = -1;
00072   para->vecbuflen  = -1;
00073   para->veclen     = -1;
00074 
00075   para->loaded     = 0;
00076 }
00077 
00084 void
00085 make_default_para(Value *para)
00086 {
00087   para->smp_period = 625;       /* 16kHz = 625 100ns unit */
00088   para->smp_freq   = 16000;     /* 16kHz = 625 100ns unit */
00089   para->framesize  = DEF_FRAMESIZE;
00090   para->frameshift = DEF_FRAMESHIFT;
00091   para->preEmph    = DEF_PREENPH;
00092   para->fbank_num  = DEF_FBANK;
00093   para->lifter     = DEF_CEPLIF;
00094   para->delWin     = DEF_DELWIN;
00095   para->accWin     = DEF_ACCWIN;
00096   para->raw_e      = FALSE;
00097   para->enormal    = FALSE;
00098   para->escale     = DEF_ESCALE;
00099   para->silFloor   = DEF_SILFLOOR;
00100   para->cvn        = FALSE;
00101   para->hipass     = -1;        /* disabled */
00102   para->lopass     = -1;        /* disabled */
00103   //para->ss_alpha    = DEF_SSALPHA;
00104   //para->ss_floor    = DEF_SSFLOOR;
00105   para->vtln_alpha = 1.0;       /* disabled */
00106   para->zmeanframe = FALSE;
00107   para->usepower   = FALSE;
00108 }
00109 
00117 void
00118 make_default_para_htk(Value *para)
00119 {
00120   para->framesize  = 256000.0;  /* dummy! */
00121   para->preEmph    = 0.97;
00122   para->fbank_num  = 20;
00123   para->lifter     = 22;
00124   para->delWin     = 2;
00125   para->accWin     = 2;
00126   para->raw_e      = TRUE;
00127   para->enormal    = TRUE;
00128   para->escale     = 0.1;
00129   para->silFloor   = 50.0;
00130   para->hipass     = -1;        /* disabled */
00131   para->lopass     = -1;        /* disabled */
00132   para->vtln_alpha = 1.0;       /* disabled */
00133   para->zmeanframe = FALSE;
00134   para->usepower   = FALSE;
00135 }
00136 
00144 void
00145 apply_para(Value *dst, Value *src)
00146 {
00147   if (dst->smp_period == -1) dst->smp_period = src->smp_period;
00148   if (dst->smp_freq   == -1) dst->smp_freq = src->smp_freq; 
00149   if (dst->framesize  == -1) dst->framesize = src->framesize; 
00150   if (dst->frameshift == -1) dst->frameshift = src->frameshift; 
00151   if (dst->preEmph    == -1) dst->preEmph = src->preEmph; 
00152   if (dst->mfcc_dim   == -1) dst->mfcc_dim = src->mfcc_dim; 
00153   if (dst->lifter     == -1) dst->lifter = src->lifter; 
00154   if (dst->fbank_num  == -1) dst->fbank_num = src->fbank_num; 
00155   if (dst->delWin     == -1) dst->delWin = src->delWin; 
00156   if (dst->accWin     == -1) dst->accWin = src->accWin; 
00157   if (dst->silFloor   == -1) dst->silFloor = src->silFloor; 
00158   if (dst->escale     == -1) dst->escale = src->escale; 
00159   if (dst->enormal    == -1) dst->enormal = src->enormal; 
00160   if (dst->hipass     == -2) dst->hipass = src->hipass;
00161   if (dst->lopass     == -2) dst->lopass = src->lopass;
00162   if (dst->cmn        == -1) dst->cmn = src->cmn; 
00163   if (dst->cvn        == -1) dst->cvn = src->cvn; 
00164   if (dst->raw_e      == -1) dst->raw_e = src->raw_e; 
00165   if (dst->c0         == -1) dst->c0 = src->c0; 
00166   //if (dst->ss_alpha   == -1) dst->ss_alpha = src->ss_alpha; 
00167   //if (dst->ss_floor   == -1) dst->ss_floor = src->ss_floor; 
00168   if (dst->vtln_alpha == -1) dst->vtln_alpha = src->vtln_alpha; 
00169   if (dst->vtln_upper == -1) dst->vtln_upper = src->vtln_upper; 
00170   if (dst->vtln_lower == -1) dst->vtln_lower = src->vtln_lower; 
00171   if (dst->zmeanframe == -1) dst->zmeanframe = src->zmeanframe; 
00172   if (dst->usepower   == -1) dst->usepower = src->usepower; 
00173   if (dst->delta      == -1) dst->delta = src->delta; 
00174   if (dst->acc        == -1) dst->acc = src->acc; 
00175   if (dst->energy     == -1) dst->energy = src->energy; 
00176   if (dst->absesup    == -1) dst->absesup = src->absesup; 
00177   if (dst->baselen    == -1) dst->baselen = src->baselen; 
00178   if (dst->vecbuflen  == -1) dst->vecbuflen = src->vecbuflen; 
00179   if (dst->veclen     == -1) dst->veclen = src->veclen; 
00180 }
00181 
00182 #define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n') ///< Determine token characters
00183 
00192 boolean
00193 htk_config_file_parse(char *HTKconffile, Value *para)
00194 {
00195   FILE *fp;
00196   char buf[512];
00197   char *p, *d, *a;
00198   float srate;
00199   boolean skipped;
00200 
00201   jlog("Stat: para: parsing HTK Config file: %s\n", HTKconffile);
00202   
00203   /* convert the content into argument list c_argv[1..c_argc-1] */
00204   /* c_argv[0] will be the original conffile name */
00205   if ((fp = fopen(HTKconffile, "r")) == NULL) {
00206     jlog("Error: para: failed to open HTK Config file: %s\n", HTKconffile);
00207     return FALSE;
00208   }
00209 
00210   srate = 0.0;
00211 
00212   while (getl_fp(buf, 512, fp) != NULL) {
00213     p = buf;
00214     if (*p == 35) { /* skip comment line */
00215       continue;
00216     }
00217 
00218     /* parse the input line to get directive and argument */
00219     while (*p != '\0' && ISTOKEN(*p)) p++;
00220     if (*p == '\0') continue;
00221     d = p;
00222     while (*p != '\0' && (!ISTOKEN(*p)) && *p != '=') p++;
00223     if (*p == '\0') continue;
00224     *p = '\0'; p++;
00225     while (*p != '\0' && ((ISTOKEN(*p)) || *p == '=')) p++;
00226     if (*p == '\0') continue;
00227     a = p;
00228     while (*p != '\0' && (!ISTOKEN(*p))) p++;
00229     *p = '\0';
00230 
00231     /* process arguments */
00232     skipped = FALSE;
00233     if (strmatch(d, "SOURCERATE")) { /* -smpPeriod */
00234       srate = atof(a);
00235     } else if (strmatch(d, "TARGETRATE")) { /* -fshift */
00236       para->frameshift = atof(a);
00237     } else if (strmatch(d, "WINDOWSIZE")) { /* -fsize */
00238       para->framesize = atof(a);
00239     } else if (strmatch(d, "ZMEANSOURCE")) { /* -zmeansource */
00240       para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE;
00241     } else if (strmatch(d, "USEPOWER")) { /* -usepower */
00242       para->usepower = (a[0] == 'T') ? TRUE : FALSE;
00243     } else if (strmatch(d, "PREEMCOEF")) { /* -preemph */
00244       para->preEmph = atof(a);
00245     } else if (strmatch(d, "USEHAMMING")) { /* (fixed to T) */
00246       if (a[0] != 'T') {
00247         jlog("Error: para: USEHAMMING should be T\n", HTKconffile);
00248         return FALSE;
00249       }
00250     } else if (strmatch(d, "NUMCHANS")) { /* -fbank */
00251       para->fbank_num = atoi(a);
00252     } else if (strmatch(d, "CEPLIFTER")) { /* -ceplif */
00253       para->lifter = atoi(a);
00254     } else if (strmatch(d, "DELTAWINDOW")) { /* -delwin */
00255       para->delWin = atoi(a);
00256     } else if (strmatch(d, "ACCWINDOW")) { /* -accwin */
00257       para->accWin = atoi(a);
00258     } else if (strmatch(d, "LOFREQ")) { /* -lofreq */
00259       para->lopass = atof(a);
00260     } else if (strmatch(d, "HIFREQ")) { /* -hifreq */
00261       para->hipass = atof(a);
00262     } else if (strmatch(d, "RAWENERGY")) { /* -rawe */
00263       para->raw_e = (a[0] == 'T') ? TRUE : FALSE;
00264     } else if (strmatch(d, "ENORMALISE")) { /* -enormal */
00265       para->enormal = (a[0] == 'T') ? TRUE : FALSE;
00266     } else if (strmatch(d, "ESCALE")) { /* -escale */
00267       para->escale = atof(a);
00268     } else if (strmatch(d, "SILFLOOR")) { /* -silfloor */
00269       para->silFloor = atof(a);
00270     } else if (strmatch(d, "WARPFREQ")) { /* -vtln (1) */
00271       para->vtln_alpha = atof(a);
00272     } else if (strmatch(d, "WARPLCUTOFF")) { /* -vtln (2) */
00273       para->vtln_lower = atof(a);
00274     } else if (strmatch(d, "WARPUCUTOFF")) { /* -vtln (3) */
00275       para->vtln_upper = atof(a);
00276     } else if (strmatch(d, "TARGETKIND")) {
00277       jlog("Warning: para: TARGETKIND skipped (will be determined by AM header)\n");
00278       skipped = TRUE;
00279     } else if (strmatch(d, "NUMCEPS")) {
00280       jlog("Warning: para: NUMCEPS skipped (will be determined by AM header)\n");
00281       skipped = TRUE;
00282     } else {
00283       jlog("Warning: para: \"%s\" ignored (not supported, or irrelevant)\n", d);
00284       skipped = TRUE;
00285     }
00286     if (!skipped) {
00287       jlog("Stat: para: %s=%s\n", d, a);
00288     }
00289   }
00290 
00291   if (srate == 0.0) {
00292     jlog("Warning: no SOURCERATE found\n");
00293     jlog("Warning: assume source waveform sample rate to 625 (16kHz)\n");
00294     srate = 625;
00295   }
00296 
00297   para->smp_period = srate;
00298   para->smp_freq = period2freq(para->smp_period);
00299   para->frameshift /= srate;
00300   para->framesize /= srate;
00301 
00302   if (fclose(fp) == -1) {
00303     jlog("Error: para: failed to close file\n");
00304     return FALSE;
00305   }
00306 
00307   para->loaded = 1;
00308 
00309   return TRUE;
00310 }
00311 
00319 void
00320 calc_para_from_header(Value *para, short param_type, short vec_size)
00321 {
00322   int dim;
00323 
00324   /* decode required parameter extraction types */
00325   para->delta = (param_type & F_DELTA) ? TRUE : FALSE;
00326   para->acc = (param_type & F_ACCL) ? TRUE : FALSE;
00327   para->energy = (param_type & F_ENERGY) ? TRUE : FALSE;
00328   para->c0 = (param_type & F_ZEROTH) ? TRUE : FALSE;
00329   para->absesup = (param_type & F_ENERGY_SUP) ? TRUE : FALSE;
00330   para->cmn = (param_type & F_CEPNORM) ? TRUE : FALSE;
00331   /* guess MFCC dimension from the vector size and parameter type in the
00332      acoustic HMM */
00333   dim = vec_size;
00334   if (para->absesup) dim++;
00335   dim /= 1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0);
00336   if (para->energy) dim--;
00337   if (para->c0) dim--;
00338   para->mfcc_dim = dim;
00339     
00340   /* determine base size */
00341   para->baselen = para->mfcc_dim + (para->c0 ? 1 : 0) + (para->energy ? 1 : 0);
00342   /* set required size of parameter vector for MFCC computation */
00343   para->vecbuflen = para->baselen * (1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0));
00344   /* set size of final parameter vector */
00345   para->veclen = para->vecbuflen - (para->absesup ? 1 : 0);
00346 }
00347 
00355 void
00356 put_para(FILE *fp, Value *para)
00357 {
00358   fprintf(fp, " Acoustic analysis condition:\n");
00359   fprintf(fp, "\t       parameter = MFCC");
00360   if (para->c0) fprintf(fp, "_0");
00361   if (para->energy) fprintf(fp, "_E");
00362   if (para->delta) fprintf(fp, "_D");
00363   if (para->acc) fprintf(fp, "_A");
00364   if (para->absesup) fprintf(fp, "_N");
00365   if (para->cmn) fprintf(fp, "_Z");
00366   fprintf(fp, " (%d dim. from %d cepstrum", para->veclen, para->mfcc_dim);
00367   if (para->c0) fprintf(fp, " + c0");
00368   if (para->energy) fprintf(fp, " + energy");
00369   if (para->absesup) fprintf(fp, ", abs energy supressed");
00370   if (para->cmn) fprintf(fp, " with CMN");
00371   fprintf(fp, ")\n");
00372   fprintf(fp, "\tsample frequency = %5ld Hz\n", para->smp_freq);
00373   fprintf(fp, "\t   sample period = %4ld  (1 = 100ns)\n", para->smp_period);
00374   fprintf(fp, "\t     window size = %4d samples (%.1f ms)\n", para->framesize,
00375            (float)para->smp_period * (float)para->framesize / 10000.0);
00376   fprintf(fp, "\t     frame shift = %4d samples (%.1f ms)\n", para->frameshift,
00377            (float)para->smp_period * (float)para->frameshift / 10000.0);
00378   fprintf(fp, "\t    pre-emphasis = %.2f\n", para->preEmph);
00379   fprintf(fp, "\t    # filterbank = %d\n", para->fbank_num);
00380   fprintf(fp, "\t   cepst. lifter = %d\n", para->lifter);
00381   fprintf(fp, "\t      raw energy = %s\n", para->raw_e ? "True" : "False");
00382   if (para->enormal) {
00383     fprintf(fp, "\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", para->escale, para->silFloor);
00384   } else {
00385     fprintf(fp, "\tenergy normalize = False\n");
00386   }
00387   if (para->delta) {
00388     fprintf(fp, "\t    delta window = %d frames (%.1f ms) around\n", para->delWin,  (float)para->delWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00389   }
00390   if (para->acc) {
00391     fprintf(fp, "\t      acc window = %d frames (%.1f ms) around\n", para->accWin, (float)para->accWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00392   }
00393   fprintf(fp, "\t     hi freq cut = ");
00394   if (para->hipass < 0) fprintf(fp, "OFF\n"); 
00395   else fprintf(fp, "%5d Hz\n", para->hipass);
00396   fprintf(fp, "\t     lo freq cut = ");
00397   if (para->lopass < 0) fprintf(fp, "OFF\n"); 
00398   else fprintf(fp, "%5d Hz\n", para->lopass);
00399   fprintf(fp, "\t zero mean frame = ");
00400   if (para->zmeanframe) fprintf(fp, "ON\n");
00401   else fprintf(fp, "OFF\n");
00402   fprintf(fp, "\t       use power = ");
00403   if (para->usepower) fprintf(fp, "ON\n");
00404   else fprintf(fp, "OFF\n");
00405   fprintf(fp, "\t             CVN = ");
00406   switch (para->cvn) {
00407   case TRUE:
00408     fprintf(fp, "ON\n");
00409     break;
00410   case FALSE:
00411     fprintf(fp, "OFF\n");
00412     break;
00413   default:
00414     fprintf(fp, "UNKNOWN\n");
00415     break;
00416   }
00417   fprintf(fp, "\t            VTLN = ");
00418   if(para->vtln_alpha != 1.0) {
00419     fprintf(fp, "ON, alpha=%.3f, f_low=%.1f, f_high=%.1f\n", para->vtln_alpha, para->vtln_lower, para->vtln_upper);
00420   } else fprintf(fp, "OFF\n");
00421 }