Julius 4.2
|
00001 00022 /* 00023 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00024 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00025 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00026 * All rights reserved 00027 */ 00028 00029 #include <sent/mfcc.h> 00030 #include <sent/speech.h> 00031 00038 void 00039 undef_para(Value *para) 00040 { 00041 para->smp_period = -1; 00042 para->smp_freq = -1; 00043 para->framesize = -1; 00044 para->frameshift = -1; 00045 para->preEmph = -1; 00046 para->mfcc_dim = -1; 00047 para->lifter = -1; 00048 para->fbank_num = -1; 00049 para->delWin = -1; 00050 para->accWin = -1; 00051 para->silFloor = -1; 00052 para->escale = -1; 00053 para->enormal = -1; 00054 para->hipass = -2; /* undef */ 00055 para->lopass = -2; /* undef */ 00056 para->cmn = -1; 00057 para->cvn = -1; 00058 para->raw_e = -1; 00059 para->c0 = -1; 00060 //para->ss_alpha = -1; 00061 //para->ss_floor = -1; 00062 para->vtln_alpha = -1; 00063 para->vtln_upper = -1; 00064 para->vtln_lower = -1; 00065 para->zmeanframe = -1; 00066 para->usepower = -1; 00067 para->delta = -1; 00068 para->acc = -1; 00069 para->energy = -1; 00070 para->absesup = -1; 00071 para->baselen = -1; 00072 para->vecbuflen = -1; 00073 para->veclen = -1; 00074 00075 para->loaded = 0; 00076 } 00077 00084 void 00085 make_default_para(Value *para) 00086 { 00087 para->smp_period = 625; /* 16kHz = 625 100ns unit */ 00088 para->smp_freq = 16000; /* 16kHz = 625 100ns unit */ 00089 para->framesize = DEF_FRAMESIZE; 00090 para->frameshift = DEF_FRAMESHIFT; 00091 para->preEmph = DEF_PREENPH; 00092 para->fbank_num = DEF_FBANK; 00093 para->lifter = DEF_CEPLIF; 00094 para->delWin = DEF_DELWIN; 00095 para->accWin = DEF_ACCWIN; 00096 para->raw_e = FALSE; 00097 para->enormal = FALSE; 00098 para->escale = DEF_ESCALE; 00099 para->silFloor = DEF_SILFLOOR; 00100 para->cvn = FALSE; 00101 para->hipass = -1; /* disabled */ 00102 para->lopass = -1; /* disabled */ 00103 //para->ss_alpha = DEF_SSALPHA; 00104 //para->ss_floor = DEF_SSFLOOR; 00105 para->vtln_alpha = 1.0; /* disabled */ 00106 para->zmeanframe = FALSE; 00107 para->usepower = FALSE; 00108 } 00109 00117 void 00118 make_default_para_htk(Value *para) 00119 { 00120 para->framesize = 256000.0; /* dummy! */ 00121 para->preEmph = 0.97; 00122 para->fbank_num = 20; 00123 para->lifter = 22; 00124 para->delWin = 2; 00125 para->accWin = 2; 00126 para->raw_e = TRUE; 00127 para->enormal = TRUE; 00128 para->escale = 0.1; 00129 para->silFloor = 50.0; 00130 para->hipass = -1; /* disabled */ 00131 para->lopass = -1; /* disabled */ 00132 para->vtln_alpha = 1.0; /* disabled */ 00133 para->zmeanframe = FALSE; 00134 para->usepower = FALSE; 00135 } 00136 00144 void 00145 apply_para(Value *dst, Value *src) 00146 { 00147 if (dst->smp_period == -1) dst->smp_period = src->smp_period; 00148 if (dst->smp_freq == -1) dst->smp_freq = src->smp_freq; 00149 if (dst->framesize == -1) dst->framesize = src->framesize; 00150 if (dst->frameshift == -1) dst->frameshift = src->frameshift; 00151 if (dst->preEmph == -1) dst->preEmph = src->preEmph; 00152 if (dst->mfcc_dim == -1) dst->mfcc_dim = src->mfcc_dim; 00153 if (dst->lifter == -1) dst->lifter = src->lifter; 00154 if (dst->fbank_num == -1) dst->fbank_num = src->fbank_num; 00155 if (dst->delWin == -1) dst->delWin = src->delWin; 00156 if (dst->accWin == -1) dst->accWin = src->accWin; 00157 if (dst->silFloor == -1) dst->silFloor = src->silFloor; 00158 if (dst->escale == -1) dst->escale = src->escale; 00159 if (dst->enormal == -1) dst->enormal = src->enormal; 00160 if (dst->hipass == -2) dst->hipass = src->hipass; 00161 if (dst->lopass == -2) dst->lopass = src->lopass; 00162 if (dst->cmn == -1) dst->cmn = src->cmn; 00163 if (dst->cvn == -1) dst->cvn = src->cvn; 00164 if (dst->raw_e == -1) dst->raw_e = src->raw_e; 00165 if (dst->c0 == -1) dst->c0 = src->c0; 00166 //if (dst->ss_alpha == -1) dst->ss_alpha = src->ss_alpha; 00167 //if (dst->ss_floor == -1) dst->ss_floor = src->ss_floor; 00168 if (dst->vtln_alpha == -1) dst->vtln_alpha = src->vtln_alpha; 00169 if (dst->vtln_upper == -1) dst->vtln_upper = src->vtln_upper; 00170 if (dst->vtln_lower == -1) dst->vtln_lower = src->vtln_lower; 00171 if (dst->zmeanframe == -1) dst->zmeanframe = src->zmeanframe; 00172 if (dst->usepower == -1) dst->usepower = src->usepower; 00173 if (dst->delta == -1) dst->delta = src->delta; 00174 if (dst->acc == -1) dst->acc = src->acc; 00175 if (dst->energy == -1) dst->energy = src->energy; 00176 if (dst->absesup == -1) dst->absesup = src->absesup; 00177 if (dst->baselen == -1) dst->baselen = src->baselen; 00178 if (dst->vecbuflen == -1) dst->vecbuflen = src->vecbuflen; 00179 if (dst->veclen == -1) dst->veclen = src->veclen; 00180 } 00181 00182 #define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n') ///< Determine token characters 00183 00192 boolean 00193 htk_config_file_parse(char *HTKconffile, Value *para) 00194 { 00195 FILE *fp; 00196 char buf[512]; 00197 char *p, *d, *a; 00198 float srate; 00199 boolean skipped; 00200 00201 jlog("Stat: para: parsing HTK Config file: %s\n", HTKconffile); 00202 00203 /* convert the content into argument list c_argv[1..c_argc-1] */ 00204 /* c_argv[0] will be the original conffile name */ 00205 if ((fp = fopen(HTKconffile, "r")) == NULL) { 00206 jlog("Error: para: failed to open HTK Config file: %s\n", HTKconffile); 00207 return FALSE; 00208 } 00209 00210 srate = 0.0; 00211 00212 while (getl_fp(buf, 512, fp) != NULL) { 00213 p = buf; 00214 if (*p == 35) { /* skip comment line */ 00215 continue; 00216 } 00217 00218 /* parse the input line to get directive and argument */ 00219 while (*p != '\0' && ISTOKEN(*p)) p++; 00220 if (*p == '\0') continue; 00221 d = p; 00222 while (*p != '\0' && (!ISTOKEN(*p)) && *p != '=') p++; 00223 if (*p == '\0') continue; 00224 *p = '\0'; p++; 00225 while (*p != '\0' && ((ISTOKEN(*p)) || *p == '=')) p++; 00226 if (*p == '\0') continue; 00227 a = p; 00228 while (*p != '\0' && (!ISTOKEN(*p))) p++; 00229 *p = '\0'; 00230 00231 /* process arguments */ 00232 skipped = FALSE; 00233 if (strmatch(d, "SOURCERATE")) { /* -smpPeriod */ 00234 srate = atof(a); 00235 } else if (strmatch(d, "TARGETRATE")) { /* -fshift */ 00236 para->frameshift = atof(a); 00237 } else if (strmatch(d, "WINDOWSIZE")) { /* -fsize */ 00238 para->framesize = atof(a); 00239 } else if (strmatch(d, "ZMEANSOURCE")) { /* -zmeansource */ 00240 para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE; 00241 } else if (strmatch(d, "USEPOWER")) { /* -usepower */ 00242 para->usepower = (a[0] == 'T') ? TRUE : FALSE; 00243 } else if (strmatch(d, "PREEMCOEF")) { /* -preemph */ 00244 para->preEmph = atof(a); 00245 } else if (strmatch(d, "USEHAMMING")) { /* (fixed to T) */ 00246 if (a[0] != 'T') { 00247 jlog("Error: para: USEHAMMING should be T\n", HTKconffile); 00248 return FALSE; 00249 } 00250 } else if (strmatch(d, "NUMCHANS")) { /* -fbank */ 00251 para->fbank_num = atoi(a); 00252 } else if (strmatch(d, "CEPLIFTER")) { /* -ceplif */ 00253 para->lifter = atoi(a); 00254 } else if (strmatch(d, "DELTAWINDOW")) { /* -delwin */ 00255 para->delWin = atoi(a); 00256 } else if (strmatch(d, "ACCWINDOW")) { /* -accwin */ 00257 para->accWin = atoi(a); 00258 } else if (strmatch(d, "LOFREQ")) { /* -lofreq */ 00259 para->lopass = atof(a); 00260 } else if (strmatch(d, "HIFREQ")) { /* -hifreq */ 00261 para->hipass = atof(a); 00262 } else if (strmatch(d, "RAWENERGY")) { /* -rawe */ 00263 para->raw_e = (a[0] == 'T') ? TRUE : FALSE; 00264 } else if (strmatch(d, "ENORMALISE")) { /* -enormal */ 00265 para->enormal = (a[0] == 'T') ? TRUE : FALSE; 00266 } else if (strmatch(d, "ESCALE")) { /* -escale */ 00267 para->escale = atof(a); 00268 } else if (strmatch(d, "SILFLOOR")) { /* -silfloor */ 00269 para->silFloor = atof(a); 00270 } else if (strmatch(d, "WARPFREQ")) { /* -vtln (1) */ 00271 para->vtln_alpha = atof(a); 00272 } else if (strmatch(d, "WARPLCUTOFF")) { /* -vtln (2) */ 00273 para->vtln_lower = atof(a); 00274 } else if (strmatch(d, "WARPUCUTOFF")) { /* -vtln (3) */ 00275 para->vtln_upper = atof(a); 00276 } else if (strmatch(d, "TARGETKIND")) { 00277 jlog("Warning: para: TARGETKIND skipped (will be determined by AM header)\n"); 00278 skipped = TRUE; 00279 } else if (strmatch(d, "NUMCEPS")) { 00280 jlog("Warning: para: NUMCEPS skipped (will be determined by AM header)\n"); 00281 skipped = TRUE; 00282 } else { 00283 jlog("Warning: para: \"%s\" ignored (not supported, or irrelevant)\n", d); 00284 skipped = TRUE; 00285 } 00286 if (!skipped) { 00287 jlog("Stat: para: %s=%s\n", d, a); 00288 } 00289 } 00290 00291 if (srate == 0.0) { 00292 jlog("Warning: no SOURCERATE found\n"); 00293 jlog("Warning: assume source waveform sample rate to 625 (16kHz)\n"); 00294 srate = 625; 00295 } 00296 00297 para->smp_period = srate; 00298 para->smp_freq = period2freq(para->smp_period); 00299 para->frameshift /= srate; 00300 para->framesize /= srate; 00301 00302 if (fclose(fp) == -1) { 00303 jlog("Error: para: failed to close file\n"); 00304 return FALSE; 00305 } 00306 00307 para->loaded = 1; 00308 00309 return TRUE; 00310 } 00311 00319 void 00320 calc_para_from_header(Value *para, short param_type, short vec_size) 00321 { 00322 int dim; 00323 00324 /* decode required parameter extraction types */ 00325 para->delta = (param_type & F_DELTA) ? TRUE : FALSE; 00326 para->acc = (param_type & F_ACCL) ? TRUE : FALSE; 00327 para->energy = (param_type & F_ENERGY) ? TRUE : FALSE; 00328 para->c0 = (param_type & F_ZEROTH) ? TRUE : FALSE; 00329 para->absesup = (param_type & F_ENERGY_SUP) ? TRUE : FALSE; 00330 para->cmn = (param_type & F_CEPNORM) ? TRUE : FALSE; 00331 /* guess MFCC dimension from the vector size and parameter type in the 00332 acoustic HMM */ 00333 dim = vec_size; 00334 if (para->absesup) dim++; 00335 dim /= 1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0); 00336 if (para->energy) dim--; 00337 if (para->c0) dim--; 00338 para->mfcc_dim = dim; 00339 00340 /* determine base size */ 00341 para->baselen = para->mfcc_dim + (para->c0 ? 1 : 0) + (para->energy ? 1 : 0); 00342 /* set required size of parameter vector for MFCC computation */ 00343 para->vecbuflen = para->baselen * (1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0)); 00344 /* set size of final parameter vector */ 00345 para->veclen = para->vecbuflen - (para->absesup ? 1 : 0); 00346 } 00347 00355 void 00356 put_para(FILE *fp, Value *para) 00357 { 00358 fprintf(fp, " Acoustic analysis condition:\n"); 00359 fprintf(fp, "\t parameter = MFCC"); 00360 if (para->c0) fprintf(fp, "_0"); 00361 if (para->energy) fprintf(fp, "_E"); 00362 if (para->delta) fprintf(fp, "_D"); 00363 if (para->acc) fprintf(fp, "_A"); 00364 if (para->absesup) fprintf(fp, "_N"); 00365 if (para->cmn) fprintf(fp, "_Z"); 00366 fprintf(fp, " (%d dim. from %d cepstrum", para->veclen, para->mfcc_dim); 00367 if (para->c0) fprintf(fp, " + c0"); 00368 if (para->energy) fprintf(fp, " + energy"); 00369 if (para->absesup) fprintf(fp, ", abs energy supressed"); 00370 if (para->cmn) fprintf(fp, " with CMN"); 00371 fprintf(fp, ")\n"); 00372 fprintf(fp, "\tsample frequency = %5ld Hz\n", para->smp_freq); 00373 fprintf(fp, "\t sample period = %4ld (1 = 100ns)\n", para->smp_period); 00374 fprintf(fp, "\t window size = %4d samples (%.1f ms)\n", para->framesize, 00375 (float)para->smp_period * (float)para->framesize / 10000.0); 00376 fprintf(fp, "\t frame shift = %4d samples (%.1f ms)\n", para->frameshift, 00377 (float)para->smp_period * (float)para->frameshift / 10000.0); 00378 fprintf(fp, "\t pre-emphasis = %.2f\n", para->preEmph); 00379 fprintf(fp, "\t # filterbank = %d\n", para->fbank_num); 00380 fprintf(fp, "\t cepst. lifter = %d\n", para->lifter); 00381 fprintf(fp, "\t raw energy = %s\n", para->raw_e ? "True" : "False"); 00382 if (para->enormal) { 00383 fprintf(fp, "\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", para->escale, para->silFloor); 00384 } else { 00385 fprintf(fp, "\tenergy normalize = False\n"); 00386 } 00387 if (para->delta) { 00388 fprintf(fp, "\t delta window = %d frames (%.1f ms) around\n", para->delWin, (float)para->delWin * (float)para->smp_period * (float)para->frameshift / 10000.0); 00389 } 00390 if (para->acc) { 00391 fprintf(fp, "\t acc window = %d frames (%.1f ms) around\n", para->accWin, (float)para->accWin * (float)para->smp_period * (float)para->frameshift / 10000.0); 00392 } 00393 fprintf(fp, "\t hi freq cut = "); 00394 if (para->hipass < 0) fprintf(fp, "OFF\n"); 00395 else fprintf(fp, "%5d Hz\n", para->hipass); 00396 fprintf(fp, "\t lo freq cut = "); 00397 if (para->lopass < 0) fprintf(fp, "OFF\n"); 00398 else fprintf(fp, "%5d Hz\n", para->lopass); 00399 fprintf(fp, "\t zero mean frame = "); 00400 if (para->zmeanframe) fprintf(fp, "ON\n"); 00401 else fprintf(fp, "OFF\n"); 00402 fprintf(fp, "\t use power = "); 00403 if (para->usepower) fprintf(fp, "ON\n"); 00404 else fprintf(fp, "OFF\n"); 00405 fprintf(fp, "\t CVN = "); 00406 switch (para->cvn) { 00407 case TRUE: 00408 fprintf(fp, "ON\n"); 00409 break; 00410 case FALSE: 00411 fprintf(fp, "OFF\n"); 00412 break; 00413 default: 00414 fprintf(fp, "UNKNOWN\n"); 00415 break; 00416 } 00417 fprintf(fp, "\t VTLN = "); 00418 if(para->vtln_alpha != 1.0) { 00419 fprintf(fp, "ON, alpha=%.3f, f_low=%.1f, f_high=%.1f\n", para->vtln_alpha, para->vtln_lower, para->vtln_upper); 00420 } else fprintf(fp, "OFF\n"); 00421 }