00001
00021
00022
00023
00024
00025
00026
00027
00028 #include <sent/mfcc.h>
00029 #include <sent/speech.h>
00030
00037 void
00038 undef_para(Value *para)
00039 {
00040 para->smp_period = -1;
00041 para->smp_freq = -1;
00042 para->framesize = -1;
00043 para->frameshift = -1;
00044 para->preEmph = -1;
00045 para->mfcc_dim = -1;
00046 para->lifter = -1;
00047 para->fbank_num = -1;
00048 para->delWin = -1;
00049 para->accWin = -1;
00050 para->silFloor = -1;
00051 para->escale = -1;
00052 para->enormal = -1;
00053 para->hipass = -2;
00054 para->lopass = -2;
00055 para->cmn = -1;
00056 para->raw_e = -1;
00057 para->c0 = -1;
00058 para->ss_alpha = -1;
00059 para->ss_floor = -1;
00060 para->zmeanframe = -1;
00061 para->delta = -1;
00062 para->acc = -1;
00063 para->energy = -1;
00064 para->absesup = -1;
00065 para->baselen = -1;
00066 para->vecbuflen = -1;
00067 para->veclen = -1;
00068
00069 para->loaded = 0;
00070 }
00071
00078 void
00079 make_default_para(Value *para)
00080 {
00081 para->smp_period = 625;
00082 para->smp_freq = 16000;
00083 para->framesize = DEF_FRAMESIZE;
00084 para->frameshift = DEF_FRAMESHIFT;
00085 para->preEmph = DEF_PREENPH;
00086 para->fbank_num = DEF_FBANK;
00087 para->lifter = DEF_CEPLIF;
00088 para->delWin = DEF_DELWIN;
00089 para->accWin = DEF_ACCWIN;
00090 para->raw_e = FALSE;
00091 para->enormal = FALSE;
00092 para->escale = DEF_ESCALE;
00093 para->silFloor = DEF_SILFLOOR;
00094 para->hipass = -1;
00095 para->lopass = -1;
00096 para->ss_alpha = DEF_SSALPHA;
00097 para->ss_floor = DEF_SSFLOOR;
00098 para->zmeanframe = FALSE;
00099 }
00100
00108 void
00109 make_default_para_htk(Value *para)
00110 {
00111 para->framesize = 256000.0;
00112 para->preEmph = 0.97;
00113 para->fbank_num = 20;
00114 para->lifter = 22;
00115 para->delWin = 2;
00116 para->accWin = 2;
00117 para->raw_e = TRUE;
00118 para->enormal = TRUE;
00119 para->escale = 0.1;
00120 para->silFloor = 50.0;
00121 para->hipass = -1;
00122 para->lopass = -1;
00123 para->zmeanframe = FALSE;
00124 }
00125
00133 void
00134 apply_para(Value *dst, Value *src)
00135 {
00136 if (dst->smp_period == -1) dst->smp_period = src->smp_period;
00137 if (dst->smp_freq == -1) dst->smp_freq = src->smp_freq;
00138 if (dst->framesize == -1) dst->framesize = src->framesize;
00139 if (dst->frameshift == -1) dst->frameshift = src->frameshift;
00140 if (dst->preEmph == -1) dst->preEmph = src->preEmph;
00141 if (dst->mfcc_dim == -1) dst->mfcc_dim = src->mfcc_dim;
00142 if (dst->lifter == -1) dst->lifter = src->lifter;
00143 if (dst->fbank_num == -1) dst->fbank_num = src->fbank_num;
00144 if (dst->delWin == -1) dst->delWin = src->delWin;
00145 if (dst->accWin == -1) dst->accWin = src->accWin;
00146 if (dst->silFloor == -1) dst->silFloor = src->silFloor;
00147 if (dst->escale == -1) dst->escale = src->escale;
00148 if (dst->enormal == -1) dst->enormal = src->enormal;
00149 if (dst->hipass == -2) dst->hipass = src->hipass;
00150 if (dst->lopass == -2) dst->lopass = src->lopass;
00151 if (dst->cmn == -1) dst->cmn = src->cmn;
00152 if (dst->raw_e == -1) dst->raw_e = src->raw_e;
00153 if (dst->c0 == -1) dst->c0 = src->c0;
00154 if (dst->ss_alpha == -1) dst->ss_alpha = src->ss_alpha;
00155 if (dst->ss_floor == -1) dst->ss_floor = src->ss_floor;
00156 if (dst->zmeanframe == -1) dst->zmeanframe = src->zmeanframe;
00157 if (dst->delta == -1) dst->delta = src->delta;
00158 if (dst->acc == -1) dst->acc = src->acc;
00159 if (dst->energy == -1) dst->energy = src->energy;
00160 if (dst->absesup == -1) dst->absesup = src->absesup;
00161 if (dst->baselen == -1) dst->baselen = src->baselen;
00162 if (dst->vecbuflen == -1) dst->vecbuflen = src->vecbuflen;
00163 if (dst->veclen == -1) dst->veclen = src->veclen;
00164 }
00165
00166
00167 #define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n')
00168
00169
00177 boolean
00178 htk_config_file_parse(char *HTKconffile, Value *para)
00179 {
00180 FILE *fp;
00181 char buf[512];
00182 char *p, *d, *a;
00183 float srate;
00184 boolean skipped;
00185
00186 j_printerr("include HTK Config: %s\n", HTKconffile);
00187
00188
00189
00190 if ((fp = fopen(HTKconffile, "r")) == NULL) {
00191 j_printerr("Error: rdhtkconf: failed to open HTK Config file: %s\n", HTKconffile);
00192 }
00193
00194 while (getl_fp(buf, 512, fp) != NULL) {
00195 p = buf;
00196 if (*p == 35) {
00197 continue;
00198 }
00199
00200
00201 while (*p != '\0' && ISTOKEN(*p)) p++;
00202 if (*p == '\0') continue;
00203 d = p;
00204 while (*p != '\0' && (!ISTOKEN(*p)) && *p != '=') p++;
00205 if (*p == '\0') continue;
00206 *p = '\0'; p++;
00207 while (*p != '\0' && ((ISTOKEN(*p)) || *p == '=')) p++;
00208 if (*p == '\0') continue;
00209 a = p;
00210 while (*p != '\0' && (!ISTOKEN(*p))) p++;
00211 *p = '\0';
00212
00213
00214 skipped = FALSE;
00215 if (strmatch(d, "SOURCERATE")) {
00216 srate = atof(a);
00217 } else if (strmatch(d, "TARGETRATE")) {
00218 para->frameshift = atof(a);
00219 } else if (strmatch(d, "WINDOWSIZE")) {
00220 para->framesize = atof(a);
00221 } else if (strmatch(d, "ZMEANSOURCE")) {
00222 para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE;
00223 } else if (strmatch(d, "PREEMCOEF")) {
00224 para->preEmph = atof(a);
00225 } else if (strmatch(d, "USEHAMMING")) {
00226 if (a[0] != 'T') {
00227 j_error("\nError: in HTK Config \"%s\": USEHAMMING should be T\n", HTKconffile);
00228 }
00229 } else if (strmatch(d, "NUMCHANS")) {
00230 para->fbank_num = atoi(a);
00231 } else if (strmatch(d, "CEPLIFTER")) {
00232 para->lifter = atoi(a);
00233 } else if (strmatch(d, "DELTAWINDOW")) {
00234 para->delWin = atoi(a);
00235 } else if (strmatch(d, "ACCWINDOW")) {
00236 para->accWin = atoi(a);
00237 } else if (strmatch(d, "LOFREQ")) {
00238 para->lopass = atof(a);
00239 } else if (strmatch(d, "HIFREQ")) {
00240 para->hipass = atof(a);
00241 } else if (strmatch(d, "RAWENERGY")) {
00242 para->raw_e = (a[0] == 'T') ? TRUE : FALSE;
00243 } else if (strmatch(d, "ENORMALISE")) {
00244 para->enormal = (a[0] == 'T') ? TRUE : FALSE;
00245 } else if (strmatch(d, "ESCALE")) {
00246 para->escale = atof(a);
00247 } else if (strmatch(d, "SILFLOOR")) {
00248 para->silFloor = atof(a);
00249 } else if (strmatch(d, "TARGETKIND")) {
00250 j_printerr("TARGETKIND specified but skipped (will be set from AM header)\n");
00251 skipped = TRUE;
00252 } else if (strmatch(d, "NUMCEPS")) {
00253 j_printerr("NUMCEPS specified but skipped (will be set from AM header)\n");
00254 skipped = TRUE;
00255 } else {
00256 skipped = TRUE;
00257 }
00258 if (!skipped) {
00259 j_printerr("%s=%s\n", d, a);
00260 }
00261 }
00262
00263 para->smp_period = srate;
00264 para->smp_freq = period2freq(para->smp_period);
00265 para->frameshift /= srate;
00266 para->framesize /= srate;
00267
00268 if (fclose(fp) == -1) {
00269 j_printerr("Error: rdhtkconf: jconf file cannot close\n");
00270 }
00271
00272 para->loaded = 1;
00273
00274 return TRUE;
00275 }
00276
00277
00285 void
00286 calc_para_from_header(Value *para, short param_type, short vec_size)
00287 {
00288 int dim;
00289
00290
00291 para->delta = (param_type & F_DELTA) ? TRUE : FALSE;
00292 para->acc = (param_type & F_ACCL) ? TRUE : FALSE;
00293 para->energy = (param_type & F_ENERGY) ? TRUE : FALSE;
00294 para->c0 = (param_type & F_ZEROTH) ? TRUE : FALSE;
00295 para->absesup = (param_type & F_ENERGY_SUP) ? TRUE : FALSE;
00296 para->cmn = (param_type & F_CEPNORM) ? TRUE : FALSE;
00297
00298
00299 dim = vec_size;
00300 if (para->absesup) dim++;
00301 dim /= 1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0);
00302 if (para->energy) dim--;
00303 if (para->c0) dim--;
00304 para->mfcc_dim = dim;
00305
00306
00307 para->baselen = para->mfcc_dim + (para->c0 ? 1 : 0) + (para->energy ? 1 : 0);
00308
00309 para->vecbuflen = para->baselen * (1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0));
00310
00311 para->veclen = para->vecbuflen - (para->absesup ? 1 : 0);
00312 }
00313
00314
00321 void
00322 put_para(Value *para)
00323 {
00324 j_printf("Acoustic analysis condition:\n");
00325 j_printf("\t parameter = MFCC");
00326 if (para->c0) j_printf("_0");
00327 if (para->energy) j_printf("_E");
00328 if (para->delta) j_printf("_D");
00329 if (para->acc) j_printf("_A");
00330 if (para->absesup) j_printf("_N");
00331 if (para->cmn) j_printf("_Z");
00332 j_printf(" (%d dimension from %d cepstrum)\n", para->veclen, para->mfcc_dim);
00333 j_printf("\tsample frequency = %5d Hz\n", para->smp_freq);
00334 j_printf("\t sample period = %4d (100ns unit)\n", para->smp_period);
00335 j_printf("\t window size = %4d samples (%.1f ms)\n", para->framesize,
00336 (float)para->smp_period * (float)para->framesize / 10000.0);
00337 j_printf("\t frame shift = %4d samples (%.1f ms)\n", para->frameshift,
00338 (float)para->smp_period * (float)para->frameshift / 10000.0);
00339 j_printf("\t pre-emphasis = %.2f\n", para->preEmph);
00340 j_printf("\t # filterbank = %d\n", para->fbank_num);
00341 j_printf("\t cepst. lifter = %d\n", para->lifter);
00342 j_printf("\t raw energy = %s\n", para->raw_e ? "True" : "False");
00343 if (para->enormal) {
00344 j_printf("\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", para->escale, para->silFloor);
00345 } else {
00346 j_printf("\tenergy normalize = False\n");
00347 }
00348 if (para->delta) {
00349 j_printf("\t delta window = %d frames (%.1f ms) around\n", para->delWin, (float)para->delWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00350 }
00351 if (para->acc) {
00352 j_printf("\t acc window = %d frames (%.1f ms) around\n", para->accWin, (float)para->accWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00353 }
00354 j_printf("\t hi freq. = ");
00355 if (para->hipass < 0) j_printf("OFF\n");
00356 else j_printf("%5d Hz\n", para->hipass);
00357 j_printf("\t lo freq. = ");
00358 if (para->lopass < 0) j_printf("OFF\n");
00359 else j_printf("%5d Hz\n", para->lopass);
00360 j_printf("\t zero mean frame = ");
00361 if (para->zmeanframe) j_printf("ON\n");
00362 else j_printf("OFF\n");
00363 }