Julius 4.2
|
00001 00023 /* 00024 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00025 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00026 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00027 * All rights reserved 00028 */ 00029 00030 #include <sent/stddefs.h> 00031 #include <sent/mfcc.h> 00032 00033 #ifdef MFCC_SINCOS_TABLE 00034 00041 static void 00042 make_costbl_hamming(MFCCWork *w, int framesize) 00043 { 00044 int i; 00045 float a; 00046 00047 w->costbl_hamming = (double *)mymalloc(sizeof(double) * framesize); 00048 a = 2.0 * PI / (framesize - 1); 00049 for(i=1;i<=framesize;i++) { 00050 /*costbl_hamming[i-1] = 0.54 - 0.46 * cos(2 * PI * (i - 1) / (float)(framesize - 1));*/ 00051 w->costbl_hamming[i-1] = 0.54 - 0.46 * cos(a * (i - 1)); 00052 } 00053 w->costbl_hamming_len = framesize; 00054 #ifdef MFCC_TABLE_DEBUG 00055 jlog("Stat: mfcc-core: generated Hamming cos table (%d bytes)\n", 00056 w->costbl_hamming_len * sizeof(double)); 00057 #endif 00058 } 00059 00066 static void 00067 make_fft_table(MFCCWork *w, int n) 00068 { 00069 int m; 00070 int me, me1; 00071 00072 w->costbl_fft = (double *)mymalloc(sizeof(double) * n); 00073 w->sintbl_fft = (double *)mymalloc(sizeof(double) * n); 00074 for (m = 1; m <= n; m++) { 00075 me = 1 << m; 00076 me1 = me / 2; 00077 w->costbl_fft[m-1] = cos(PI / me1); 00078 w->sintbl_fft[m-1] = -sin(PI / me1); 00079 } 00080 w->tbllen = n; 00081 #ifdef MFCC_TABLE_DEBUG 00082 jlog("Stat: mfcc-core: generated FFT sin/cos table (%d bytes)\n", w->tbllen * sizeof(double)); 00083 #endif 00084 } 00085 00093 static void 00094 make_costbl_makemfcc(MFCCWork *w, int fbank_num, int mfcc_dim) 00095 { 00096 int size; 00097 int i, j, k; 00098 float B, C; 00099 00100 size = fbank_num * mfcc_dim; 00101 w->costbl_makemfcc = (double *)mymalloc(sizeof(double) * size); 00102 00103 B = PI / fbank_num; 00104 k = 0; 00105 for(i=1;i<=mfcc_dim;i++) { 00106 C = i * B; 00107 for(j=1;j<=fbank_num;j++) { 00108 w->costbl_makemfcc[k] = cos(C * (j - 0.5)); 00109 k++; 00110 } 00111 } 00112 w->costbl_makemfcc_len = size; 00113 #ifdef MFCC_TABLE_DEBUG 00114 jlog("Stat: mfcc-core: generated MakeMFCC cos table (%d bytes)\n", 00115 w->costbl_makemfcc_len * sizeof(double)); 00116 #endif 00117 } 00118 00126 static void 00127 make_sintbl_wcep(MFCCWork *w, int lifter, int mfcc_dim) 00128 { 00129 int i; 00130 float a, b; 00131 00132 w->sintbl_wcep = (double *)mymalloc(sizeof(double) * mfcc_dim); 00133 if (lifter > 0) { 00134 a = PI / lifter; 00135 b = lifter / 2.0; 00136 for(i=0;i<mfcc_dim;i++) { 00137 w->sintbl_wcep[i] = 1.0 + b * sin((i+1) * a); 00138 } 00139 } else { 00140 for(i=0;i<mfcc_dim;i++) { 00141 w->sintbl_wcep[i] = 1.0; 00142 } 00143 } 00144 w->sintbl_wcep_len = mfcc_dim; 00145 #ifdef MFCC_TABLE_DEBUG 00146 jlog("Stat: mfcc-core: generated WeightCepstrum sin table (%d bytes)\n", 00147 w->sintbl_wcep_len * sizeof(double)); 00148 #endif 00149 } 00150 00151 #endif /* MFCC_SINCOS_TABLE */ 00152 00161 float Mel(int k, float fres) 00162 { 00163 return(1127 * log(1 + (k-1) * fres)); 00164 } 00165 00176 static boolean 00177 VTLN_recreate_fbank_cf(float *cf, Value *para, float mlo, float mhi, int maxChan) 00178 { 00179 int chan; 00180 float minf, maxf, cf_orig, cf_new; 00181 float scale, cu, cl, au, al; 00182 00183 /* restore frequency range to non-Mel */ 00184 minf = 700.0 * (exp(mlo / 1127.0) - 1.0); 00185 maxf = 700.0 * (exp(mhi / 1127.0) - 1.0); 00186 00187 if (para->vtln_upper > maxf) { 00188 jlog("Error: VTLN upper cut-off greater than upper frequency bound: %.1f > %.1f\n", para->vtln_upper, maxf); 00189 return FALSE; 00190 } 00191 if (para->vtln_lower < minf) { 00192 jlog("Error: VTLN lower cut-off smaller than lower frequency bound: %.1f < %.1f\n", para->vtln_lower, minf); 00193 return FALSE; 00194 } 00195 00196 /* prepare variables for warping */ 00197 scale = 1.0 / para->vtln_alpha; 00198 cu = para->vtln_upper * 2 / ( 1 + scale); 00199 cl = para->vtln_lower * 2 / ( 1 + scale); 00200 au = (maxf - cu * scale) / (maxf - cu); 00201 al = (cl * scale - minf) / (cl - minf); 00202 00203 for (chan = 1; chan <= maxChan; chan++) { 00204 /* get center frequency, restore to non-Mel */ 00205 cf_orig = 700.0 * (exp(cf[chan] / 1127.0) - 1.0); 00206 /* do warping */ 00207 if( cf_orig > cu ){ 00208 cf_new = au * (cf_orig - cu) + scale * cu; 00209 } else if ( cf_orig < cl){ 00210 cf_new = al * (cf_orig - minf) + minf; 00211 } else { 00212 cf_new = scale * cf_orig; 00213 } 00214 /* convert the new center frequency to Mel and store */ 00215 cf[chan] = 1127.0 * log (1.0 + cf_new / 700.0); 00216 } 00217 return TRUE; 00218 } 00219 00228 boolean 00229 InitFBank(MFCCWork *w, Value *para) 00230 { 00231 float mlo, mhi, ms, melk; 00232 int k, chan, maxChan, nv2; 00233 00234 /* Calculate FFT size */ 00235 w->fb.fftN = 2; w->fb.n = 1; 00236 while(para->framesize > w->fb.fftN){ 00237 w->fb.fftN *= 2; w->fb.n++; 00238 } 00239 00240 nv2 = w->fb.fftN / 2; 00241 w->fb.fres = 1.0E7 / (para->smp_period * w->fb.fftN * 700.0); 00242 maxChan = para->fbank_num + 1; 00243 w->fb.klo = 2; w->fb.khi = nv2; 00244 mlo = 0; mhi = Mel(nv2 + 1, w->fb.fres); 00245 00246 /* lo pass filter */ 00247 if (para->lopass >= 0) { 00248 mlo = 1127*log(1+(float)para->lopass/700.0); 00249 w->fb.klo = ((float)para->lopass * para->smp_period * 1.0e-7 * w->fb.fftN) + 2.5; 00250 if (w->fb.klo<2) w->fb.klo = 2; 00251 } 00252 /* hi pass filter */ 00253 if (para->hipass >= 0) { 00254 mhi = 1127*log(1+(float)para->hipass/700.0); 00255 w->fb.khi = ((float)para->hipass * para->smp_period * 1.0e-7 * w->fb.fftN) + 0.5; 00256 if (w->fb.khi>nv2) w->fb.khi = nv2; 00257 } 00258 00259 /* Create vector of fbank centre frequencies */ 00260 w->fb.cf = (float *)mymalloc((maxChan + 1) * sizeof(float)); 00261 ms = mhi - mlo; 00262 for (chan = 1; chan <= maxChan; chan++) 00263 w->fb.cf[chan] = ((float)chan / maxChan)*ms + mlo; 00264 00265 if (para->vtln_alpha != 1.0) { 00266 /* Modify fbank center frequencies for VTLN */ 00267 if (VTLN_recreate_fbank_cf(w->fb.cf, para, mlo, mhi, maxChan) == FALSE) { 00268 return FALSE; 00269 } 00270 } 00271 00272 /* Create loChan map, loChan[fftindex] -> lower channel index */ 00273 w->fb.loChan = (short *)mymalloc((nv2 + 1) * sizeof(short)); 00274 for(k = 1, chan = 1; k <= nv2; k++){ 00275 if (k < w->fb.klo || k > w->fb.khi) w->fb.loChan[k] = -1; 00276 else { 00277 melk = Mel(k, w->fb.fres); 00278 while (w->fb.cf[chan] < melk && chan <= maxChan) ++chan; 00279 w->fb.loChan[k] = chan - 1; 00280 } 00281 } 00282 00283 /* Create vector of lower channel weights */ 00284 w->fb.loWt = (float *)mymalloc((nv2 + 1) * sizeof(float)); 00285 for(k = 1; k <= nv2; k++) { 00286 chan = w->fb.loChan[k]; 00287 if (k < w->fb.klo || k > w->fb.khi) w->fb.loWt[k] = 0.0; 00288 else { 00289 if (chan > 0) 00290 w->fb.loWt[k] = (w->fb.cf[chan + 1] - Mel(k, w->fb.fres)) / (w->fb.cf[chan + 1] - w->fb.cf[chan]); 00291 else 00292 w->fb.loWt[k] = (w->fb.cf[1] - Mel(k, w->fb.fres)) / (w->fb.cf[1] - mlo); 00293 } 00294 } 00295 00296 /* Create workspace for fft */ 00297 w->fb.Re = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float)); 00298 w->fb.Im = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float)); 00299 00300 w->sqrt2var = sqrt(2.0 / para->fbank_num); 00301 00302 return TRUE; 00303 } 00304 00310 void 00311 FreeFBank(FBankInfo *fb) 00312 { 00313 free(fb->cf); 00314 free(fb->loChan); 00315 free(fb->loWt); 00316 free(fb->Re); 00317 free(fb->Im); 00318 } 00319 00327 void 00328 ZMeanFrame(float *wave, int framesize) 00329 { 00330 int i; 00331 float mean; 00332 00333 mean = 0.0; 00334 for(i = 1; i <= framesize; i++) mean += wave[i]; 00335 mean /= framesize; 00336 for(i = 1; i <= framesize; i++) wave[i] -= mean; 00337 } 00338 00347 float CalcLogRawE(float *wave, int framesize) 00348 { 00349 int i; 00350 double raw_E = 0.0; 00351 float energy; 00352 00353 for(i = 1; i <= framesize; i++) 00354 raw_E += wave[i] * wave[i]; 00355 energy = (float)log(raw_E); 00356 00357 return(energy); 00358 } 00359 00367 void PreEmphasise (float *wave, int framesize, float preEmph) 00368 { 00369 int i; 00370 00371 for(i = framesize; i >= 2; i--) 00372 wave[i] -= wave[i - 1] * preEmph; 00373 wave[1] *= 1.0 - preEmph; 00374 } 00375 00383 void Hamming(float *wave, int framesize, MFCCWork *w) 00384 { 00385 int i; 00386 #ifdef MFCC_SINCOS_TABLE 00387 for(i = 1; i <= framesize; i++) 00388 wave[i] *= w->costbl_hamming[i-1]; 00389 #else 00390 float a; 00391 a = 2 * PI / (framesize - 1); 00392 for(i = 1; i <= framesize; i++) 00393 wave[i] *= 0.54 - 0.46 * cos(a * (i - 1)); 00394 #endif 00395 } 00396 00405 void FFT(float *xRe, float *xIm, int p, MFCCWork *w) 00406 { 00407 int i, ip, j, k, m, me, me1, n, nv2; 00408 double uRe, uIm, vRe, vIm, wRe, wIm, tRe, tIm; 00409 00410 n = 1<<p; 00411 nv2 = n / 2; 00412 00413 j = 0; 00414 for(i = 0; i < n-1; i++){ 00415 if(j > i){ 00416 tRe = xRe[j]; tIm = xIm[j]; 00417 xRe[j] = xRe[i]; xIm[j] = xIm[i]; 00418 xRe[i] = tRe; xIm[i] = tIm; 00419 } 00420 k = nv2; 00421 while(j >= k){ 00422 j -= k; k /= 2; 00423 } 00424 j += k; 00425 } 00426 00427 for(m = 1; m <= p; m++){ 00428 me = 1<<m; me1 = me / 2; 00429 uRe = 1.0; uIm = 0.0; 00430 #ifdef MFCC_SINCOS_TABLE 00431 wRe = w->costbl_fft[m-1]; wIm = w->sintbl_fft[m-1]; 00432 #else 00433 wRe = cos(PI / me1); wIm = -sin(PI / me1); 00434 #endif 00435 for(j = 0; j < me1; j++){ 00436 for(i = j; i < n; i += me){ 00437 ip = i + me1; 00438 tRe = xRe[ip] * uRe - xIm[ip] * uIm; 00439 tIm = xRe[ip] * uIm + xIm[ip] * uRe; 00440 xRe[ip] = xRe[i] - tRe; xIm[ip] = xIm[i] - tIm; 00441 xRe[i] += tRe; xIm[i] += tIm; 00442 } 00443 vRe = uRe * wRe - uIm * wIm; vIm = uRe * wIm + uIm * wRe; 00444 uRe = vRe; uIm = vIm; 00445 } 00446 } 00447 } 00448 00449 00457 void 00458 MakeFBank(float *wave, MFCCWork *w, Value *para) 00459 { 00460 int k, bin, i; 00461 double Re, Im, A, P, NP, H, temp; 00462 00463 for(k = 1; k <= para->framesize; k++){ 00464 w->fb.Re[k - 1] = wave[k]; w->fb.Im[k - 1] = 0.0; /* copy to workspace */ 00465 } 00466 for(k = para->framesize + 1; k <= w->fb.fftN; k++){ 00467 w->fb.Re[k - 1] = 0.0; w->fb.Im[k - 1] = 0.0; /* pad with zeroes */ 00468 } 00469 00470 /* Take FFT */ 00471 FFT(w->fb.Re, w->fb.Im, w->fb.n, w); 00472 00473 if (w->ssbuf != NULL) { 00474 /* Spectral Subtraction */ 00475 for(k = 1; k <= w->fb.fftN; k++){ 00476 Re = w->fb.Re[k - 1]; Im = w->fb.Im[k - 1]; 00477 P = sqrt(Re * Re + Im * Im); 00478 NP = w->ssbuf[k - 1]; 00479 if((P * P - w->ss_alpha * NP * NP) < 0){ 00480 H = w->ss_floor; 00481 }else{ 00482 H = sqrt(P * P - w->ss_alpha * NP * NP) / P; 00483 } 00484 w->fb.Re[k - 1] = H * Re; 00485 w->fb.Im[k - 1] = H * Im; 00486 } 00487 } 00488 00489 /* Fill filterbank channels */ 00490 for(i = 1; i <= para->fbank_num; i++) 00491 w->fbank[i] = 0.0; 00492 00493 if (para->usepower) { 00494 for(k = w->fb.klo; k <= w->fb.khi; k++){ 00495 Re = w->fb.Re[k-1]; Im = w->fb.Im[k-1]; 00496 A = Re * Re + Im * Im; 00497 bin = w->fb.loChan[k]; 00498 Re = w->fb.loWt[k] * A; 00499 if(bin > 0) w->fbank[bin] += Re; 00500 if(bin < para->fbank_num) w->fbank[bin + 1] += A - Re; 00501 } 00502 } else { 00503 for(k = w->fb.klo; k <= w->fb.khi; k++){ 00504 Re = w->fb.Re[k-1]; Im = w->fb.Im[k-1]; 00505 A = sqrt(Re * Re + Im * Im); 00506 bin = w->fb.loChan[k]; 00507 Re = w->fb.loWt[k] * A; 00508 if(bin > 0) w->fbank[bin] += Re; 00509 if(bin < para->fbank_num) w->fbank[bin + 1] += A - Re; 00510 } 00511 } 00512 00513 /* Take logs */ 00514 for(bin = 1; bin <= para->fbank_num; bin++){ 00515 temp = w->fbank[bin]; 00516 if(temp < 1.0) temp = 1.0; 00517 w->fbank[bin] = log(temp); 00518 } 00519 } 00520 00529 float CalcC0(MFCCWork *w, Value *para) 00530 { 00531 int i; 00532 float S; 00533 00534 S = 0.0; 00535 for(i = 1; i <= para->fbank_num; i++) 00536 S += w->fbank[i]; 00537 return S * w->sqrt2var; 00538 } 00539 00547 void MakeMFCC(float *mfcc, Value *para, MFCCWork *w) 00548 { 00549 #ifdef MFCC_SINCOS_TABLE 00550 int i, j, k; 00551 k = 0; 00552 /* Take DCT */ 00553 for(i = 0; i < para->mfcc_dim; i++){ 00554 mfcc[i] = 0.0; 00555 for(j = 1; j <= para->fbank_num; j++) 00556 mfcc[i] += w->fbank[j] * w->costbl_makemfcc[k++]; 00557 mfcc[i] *= w->sqrt2var; 00558 } 00559 #else 00560 int i, j; 00561 float B, C; 00562 00563 B = PI / para->fbank_num; 00564 /* Take DCT */ 00565 for(i = 1; i <= para->mfcc_dim; i++){ 00566 mfcc[i - 1] = 0.0; 00567 C = i * B; 00568 for(j = 1; j <= para->fbank_num; j++) 00569 mfcc[i - 1] += w->fbank[j] * cos(C * (j - 0.5)); 00570 mfcc[i - 1] *= w->sqrt2var; 00571 } 00572 #endif 00573 } 00574 00582 void WeightCepstrum (float *mfcc, Value *para, MFCCWork *w) 00583 { 00584 #ifdef MFCC_SINCOS_TABLE 00585 int i; 00586 for(i=0;i<para->mfcc_dim;i++) { 00587 mfcc[i] *= w->sintbl_wcep[i]; 00588 } 00589 #else 00590 int i; 00591 float a, b, *cepWin; 00592 00593 cepWin = (float *)mymalloc(para->mfcc_dim * sizeof(float)); 00594 a = PI / para->lifter; 00595 b = para->lifter / 2.0; 00596 00597 for(i = 0; i < para->mfcc_dim; i++){ 00598 cepWin[i] = 1.0 + b * sin((i + 1) * a); 00599 mfcc[i] *= cepWin[i]; 00600 } 00601 00602 free(cepWin); 00603 #endif 00604 } 00605 00606 /************************************************************************/ 00607 /************************************************************************/ 00608 /************************************************************************/ 00609 /************************************************************************/ 00610 /************************************************************************/ 00619 MFCCWork * 00620 WMP_work_new(Value *para) 00621 { 00622 MFCCWork *w; 00623 00624 /* newly allocated area should be cleared */ 00625 w = (MFCCWork *)mymalloc(sizeof(MFCCWork)); 00626 memset(w, 0, sizeof(MFCCWork)); 00627 00628 /* set filterbank information */ 00629 if (InitFBank(w, para) == FALSE) return NULL; 00630 00631 #ifdef MFCC_SINCOS_TABLE 00632 /* prepare tables */ 00633 make_costbl_hamming(w, para->framesize); 00634 make_fft_table(w, w->fb.n); 00635 if (para->mfcc_dim >= 0) { 00636 make_costbl_makemfcc(w, para->fbank_num, para->mfcc_dim); 00637 make_sintbl_wcep(w, para->lifter, para->mfcc_dim); 00638 } 00639 #endif 00640 00641 /* prepare some buffers */ 00642 w->fbank = (double *)mymalloc((para->fbank_num+1)*sizeof(double)); 00643 w->bf = (float *)mymalloc(w->fb.fftN * sizeof(float)); 00644 w->bflen = w->fb.fftN; 00645 00646 return w; 00647 } 00648 00657 void 00658 WMP_calc(MFCCWork *w, float *mfcc, Value *para) 00659 { 00660 float energy = 0.0; 00661 float c0 = 0.0; 00662 int p; 00663 00664 if (para->zmeanframe) { 00665 ZMeanFrame(w->bf, para->framesize); 00666 } 00667 00668 if (para->energy && para->raw_e) { 00669 /* calculate log raw energy */ 00670 energy = CalcLogRawE(w->bf, para->framesize); 00671 } 00672 /* pre-emphasize */ 00673 PreEmphasise(w->bf, para->framesize, para->preEmph); 00674 /* hamming window */ 00675 Hamming(w->bf, para->framesize, w); 00676 if (para->energy && ! para->raw_e) { 00677 /* calculate log energy */ 00678 energy = CalcLogRawE(w->bf, para->framesize); 00679 } 00680 /* filterbank */ 00681 MakeFBank(w->bf, w, para); 00682 /* 0'th cepstral parameter */ 00683 if (para->c0) c0 = CalcC0(w, para); 00684 /* MFCC */ 00685 MakeMFCC(mfcc, para, w); 00686 /* weight cepstrum */ 00687 WeightCepstrum(mfcc, para, w); 00688 /* set energy to mfcc */ 00689 p = para->mfcc_dim; 00690 if (para->c0) mfcc[p++] = c0; 00691 if (para->energy) mfcc[p++] = energy; 00692 } 00693 00699 void 00700 WMP_free(MFCCWork *w) 00701 { 00702 if (w->fbank) { 00703 FreeFBank(&(w->fb)); 00704 free(w->fbank); 00705 free(w->bf); 00706 w->fbank = NULL; 00707 w->bf = NULL; 00708 } 00709 #ifdef MFCC_SINCOS_TABLE 00710 if (w->costbl_hamming) { 00711 free(w->costbl_hamming); 00712 w->costbl_hamming = NULL; 00713 } 00714 if (w->costbl_fft) { 00715 free(w->costbl_fft); 00716 w->costbl_fft = NULL; 00717 } 00718 if (w->sintbl_fft) { 00719 free(w->sintbl_fft); 00720 w->sintbl_fft = NULL; 00721 } 00722 if (w->costbl_makemfcc) { 00723 free(w->costbl_makemfcc); 00724 w->costbl_makemfcc = NULL; 00725 } 00726 if (w->sintbl_wcep) { 00727 free(w->sintbl_wcep); 00728 w->sintbl_wcep = NULL; 00729 } 00730 #endif 00731 free(w); 00732 } 00733