Julius 4.2
|
00001 00031 /************************************************************************/ 00032 /* wav2mfcc.c Convert Speech file to MFCC_E_D_(Z) file */ 00033 /*----------------------------------------------------------------------*/ 00034 /* Author : Yuichiro Nakano */ 00035 /* */ 00036 /* Copyright(C) Yuichiro Nakano 1996-1998 */ 00037 /*----------------------------------------------------------------------*/ 00038 /************************************************************************/ 00039 00040 00041 #include <sent/stddefs.h> 00042 #include <sent/mfcc.h> 00043 00056 int 00057 Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w) 00058 { 00059 int i, k, t; 00060 int end = 0, start = 1; 00061 int frame_num; /* Number of samples in output file */ 00062 00063 /* set noise spectrum if any */ 00064 if (w->ssbuf != NULL) { 00065 /* check ssbuf length */ 00066 if (w->ssbuflen != w->bflen) { 00067 jlog("Error: mfcc-core: noise spectrum length not match\n"); 00068 return FALSE; 00069 } 00070 } 00071 00072 frame_num = (int)((nSamples - para->framesize) / para->frameshift) + 1; 00073 00074 for(t = 0; t < frame_num; t++){ 00075 if(end != 0) start = end - (para->framesize - para->frameshift) - 1; 00076 00077 k = 1; 00078 for(i = start; i <= start + para->framesize; i++){ 00079 w->bf[k] = (float)wave[i - 1]; k++; 00080 } 00081 end = i; 00082 00083 /* Calculate base MFCC coefficients */ 00084 WMP_calc(w, mfcc[t], para); 00085 } 00086 00087 /* Normalise Log Energy */ 00088 if (para->energy && para->enormal) NormaliseLogE(mfcc, frame_num, para); 00089 00090 /* Delta (consider energy suppress) */ 00091 if (para->delta) Delta(mfcc, frame_num, para); 00092 00093 /* Acceleration */ 00094 if (para->acc) Accel(mfcc, frame_num, para); 00095 00096 /* Cepstrum Mean and/or Variance Normalization */ 00097 if (para->cmn && ! para->cvn) CMN(mfcc, frame_num, para->mfcc_dim); 00098 else if (para->cmn || para->cvn) MVN(mfcc, frame_num, para); 00099 00100 return(frame_num); 00101 } 00102 00110 void NormaliseLogE(float **mfcc, int frame_num, Value *para) 00111 { 00112 float max, min, f; 00113 int t; 00114 int l; 00115 00116 l = para->mfcc_dim; 00117 if (para->c0) l++; 00118 00119 /* find max log energy */ 00120 max = mfcc[0][l]; 00121 for(t = 0; t < frame_num; t++) 00122 if(mfcc[t][l] > max) max = mfcc[t][l]; 00123 00124 /* set the silence floor */ 00125 min = max - (para->silFloor * LOG_TEN) / 10.0; 00126 00127 /* normalise */ 00128 for(t = 0; t < frame_num; t++){ 00129 f = mfcc[t][l]; 00130 if (f < min) f = min; 00131 mfcc[t][l] = 1.0 - (max - f) * para->escale; 00132 } 00133 } 00134 00142 void Delta(float **c, int frame, Value *para) 00143 { 00144 int theta, t, n, B = 0; 00145 float A1, A2, sum; 00146 00147 for(theta = 1; theta <= para->delWin; theta++) 00148 B += theta * theta; 00149 00150 for(n = para->baselen - 1; n >=0; n--){ 00151 for(t = 0; t < frame; t++){ 00152 sum = 0; 00153 for(theta = 1; theta <= para->delWin; theta++){ 00154 /* Replicate the first or last vector */ 00155 /* at the beginning and end of speech */ 00156 if (t - theta < 0) A1 = c[0][n]; 00157 else A1 = c[t - theta][n]; 00158 if (t + theta >= frame) A2 = c[frame - 1][n]; 00159 else A2 = c[t + theta][n]; 00160 sum += theta * (A2 - A1); 00161 } 00162 sum /= (2.0 * B); 00163 if (para->absesup) { 00164 c[t][para->baselen + n - 1] = sum; 00165 } else { 00166 c[t][para->baselen + n] = sum; 00167 } 00168 } 00169 } 00170 } 00171 00172 00180 void Accel(float **c, int frame, Value *para) 00181 { 00182 int theta, t, n, B = 0; 00183 int src, dst; 00184 float A1, A2, sum; 00185 00186 for(theta = 1; theta <= para->accWin; theta++) 00187 B += theta * theta; 00188 00189 for(t = 0; t < frame; t++){ 00190 src = para->baselen * 2 - 1; 00191 if (para->absesup) src--; 00192 dst = src + para->baselen; 00193 for(n = 0; n < para->baselen; n++){ 00194 sum = 0; 00195 for(theta = 1; theta <= para->accWin; theta++){ 00196 /* Replicate the first or last vector */ 00197 /* at the beginning and end of speech */ 00198 if (t - theta < 0) A1 = c[0][src]; 00199 else A1 = c[t - theta][src]; 00200 if (t + theta >= frame) A2 = c[frame - 1][src]; 00201 else A2 = c[t + theta][src]; 00202 sum += theta * (A2 - A1); 00203 } 00204 c[t][dst] = sum / (2 * B); 00205 src--; 00206 dst--; 00207 } 00208 } 00209 } 00210 00219 void CMN(float **mfcc, int frame_num, int dim) 00220 { 00221 int i, t; 00222 float *mfcc_ave, *sum; 00223 00224 mfcc_ave = (float *)mycalloc(dim, sizeof(float)); 00225 sum = (float *)mycalloc(dim, sizeof(float)); 00226 00227 for(i = 0; i < dim; i++){ 00228 sum[i] = 0.0; 00229 for(t = 0; t < frame_num; t++) 00230 sum[i] += mfcc[t][i]; 00231 mfcc_ave[i] = sum[i] / frame_num; 00232 } 00233 for(t = 0; t < frame_num; t++){ 00234 for(i = 0; i < dim; i++) 00235 mfcc[t][i] = mfcc[t][i] - mfcc_ave[i]; 00236 } 00237 free(sum); 00238 free(mfcc_ave); 00239 } 00240 00248 void MVN(float **mfcc, int frame_num, Value *para) 00249 { 00250 int i, t; 00251 float *mfcc_mean, *mfcc_sd; 00252 float x; 00253 int basedim; 00254 00255 basedim = para->mfcc_dim; // + (para->c0 ? 1 : 0); 00256 00257 mfcc_mean = (float *)mycalloc(para->veclen, sizeof(float)); 00258 if (para->cvn) mfcc_sd = (float *)mycalloc(para->veclen, sizeof(float)); 00259 00260 /* get mean */ 00261 for(i = 0; i < para->veclen; i++){ 00262 mfcc_mean[i] = 0.0; 00263 for(t = 0; t < frame_num; t++) 00264 mfcc_mean[i] += mfcc[t][i]; 00265 mfcc_mean[i] /= (float)frame_num; 00266 } 00267 if (para->cvn) { 00268 /* get standard deviation */ 00269 for(i = 0; i < para->veclen; i++){ 00270 mfcc_sd[i] = 0.0; 00271 for(t = 0; t < frame_num; t++) { 00272 x = mfcc[t][i] - mfcc_mean[i]; 00273 mfcc_sd[i] += x * x; 00274 } 00275 mfcc_sd[i] = sqrt(mfcc_sd[i] / (float)frame_num); 00276 } 00277 } 00278 for(t = 0; t < frame_num; t++){ 00279 if (para->cmn) { 00280 /* mean normalization (base MFCC only) */ 00281 for(i = 0; i < basedim; i++) mfcc[t][i] -= mfcc_mean[i]; 00282 } 00283 if (para->cvn) { 00284 /* variance normalization (full MFCC) */ 00285 for(i = 0; i < para->veclen; i++) mfcc[t][i] /= mfcc_sd[i]; 00286 } 00287 } 00288 00289 if (para->cvn) free(mfcc_sd); 00290 free(mfcc_mean); 00291 }