Julius 4.2
|
00001 00026 /* 00027 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00028 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00029 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00030 * All rights reserved 00031 */ 00032 00033 /* wav2mfcc-pipe.c --- split Wav2MFCC to perform per-frame-basis, 00034 and also realtime CMN for 1st-pass pipe-lining */ 00035 00036 /************************************************************************/ 00037 /* wav2mfcc.c Convert Speech file to MFCC_E_D_(Z) file */ 00038 /*----------------------------------------------------------------------*/ 00039 /* Author : Yuichiro Nakano */ 00040 /* */ 00041 /* Copyright(C) Yuichiro Nakano 1996-1998 */ 00042 /*----------------------------------------------------------------------*/ 00043 /************************************************************************/ 00044 00045 00046 #include <sent/stddefs.h> 00047 #include <sent/mfcc.h> 00048 #include <sent/htk_param.h> 00049 00050 /***********************************************************************/ 00059 DeltaBuf * 00060 WMP_deltabuf_new(int veclen, int windowlen) 00061 { 00062 int i; 00063 DeltaBuf *db; 00064 00065 db = (DeltaBuf *)mymalloc(sizeof(DeltaBuf)); 00066 db->veclen = veclen; 00067 db->win = windowlen; 00068 db->len = windowlen * 2 + 1; 00069 db->mfcc = (float **)mymalloc(sizeof(float *) * db->len); 00070 db->is_on = (boolean *) mymalloc(sizeof(boolean) * db->len); 00071 for (i=0;i<db->len;i++) { 00072 db->mfcc[i] = (float *)mymalloc(sizeof(float) * veclen * 2); 00073 } 00074 db->B = 0; 00075 for(i = 1; i <= windowlen; i++) db->B += i * i; 00076 db->B *= 2; 00077 00078 return (db); 00079 } 00080 00086 void 00087 WMP_deltabuf_free(DeltaBuf *db) 00088 { 00089 int i; 00090 00091 for (i=0;i<db->len;i++) { 00092 free(db->mfcc[i]); 00093 } 00094 free(db->is_on); 00095 free(db->mfcc); 00096 free(db); 00097 } 00098 00104 void 00105 WMP_deltabuf_prepare(DeltaBuf *db) 00106 { 00107 int i; 00108 db->store = 0; 00109 for (i=0;i<db->len;i++) { 00110 db->is_on[i] = FALSE; 00111 } 00112 } 00113 00120 static void 00121 WMP_deltabuf_calc(DeltaBuf *db, int cur) 00122 { 00123 int n, theta, p; 00124 float A1, A2, sum; 00125 int last_valid_left, last_valid_right; 00126 00127 for (n = 0; n < db->veclen; n++) { 00128 sum = 0.0; 00129 last_valid_left = last_valid_right = cur; 00130 for (theta = 1; theta <= db->win; theta++) { 00131 p = cur - theta; 00132 if (p < 0) p += db->len; 00133 if (db->is_on[p]) { 00134 A1 = db->mfcc[p][n]; 00135 last_valid_left = p; 00136 } else { 00137 A1 = db->mfcc[last_valid_left][n]; 00138 } 00139 p = cur + theta; 00140 if (p >= db->len) p -= db->len; 00141 if (db->is_on[p]) { 00142 A2 = db->mfcc[p][n]; 00143 last_valid_right = p; 00144 } else { 00145 A2 = db->mfcc[last_valid_right][n]; 00146 } 00147 sum += theta * (A2 - A1); 00148 } 00149 db->mfcc[cur][db->veclen + n] = sum / db->B; 00150 } 00151 } 00152 00163 boolean 00164 WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc) 00165 { 00166 int cur; 00167 boolean ret; 00168 00169 /* copy data to store point */ 00170 memcpy(db->mfcc[db->store], new_mfcc, sizeof(float) * db->veclen); 00171 db->is_on[db->store] = TRUE; 00172 00173 /* get current calculation point */ 00174 cur = db->store - db->win; 00175 if (cur < 0) cur += db->len; 00176 00177 /* if the current point is fulfilled, compute delta */ 00178 if (db->is_on[cur]) { 00179 WMP_deltabuf_calc(db, cur); 00180 db->vec = db->mfcc[cur]; 00181 ret = TRUE; 00182 } else { 00183 ret = FALSE; 00184 } 00185 00186 /* move store pointer to next */ 00187 db->store++; 00188 if (db->store >= db->len) db->store -= db->len; 00189 00190 /* return TRUE if delta computed for current, or -1 if not calculated yet */ 00191 return (ret); 00192 } 00193 00205 boolean 00206 WMP_deltabuf_flush(DeltaBuf *db) 00207 { 00208 int cur; 00209 boolean ret; 00210 00211 /* clear store point */ 00212 db->is_on[db->store] = FALSE; 00213 00214 /* get current calculation point */ 00215 cur = db->store - db->win; 00216 if (cur < 0) cur += db->len; 00217 00218 /* if the current point if fulfilled, compute delta */ 00219 if (db->is_on[cur]) { 00220 WMP_deltabuf_calc(db, cur); 00221 db->vec = db->mfcc[cur]; 00222 ret = TRUE; 00223 } else { 00224 ret = FALSE; 00225 } 00226 00227 /* move store pointer to next */ 00228 db->store++; 00229 if (db->store >= db->len) db->store -= db->len; 00230 00231 /* return TRUE if delta computed for current, or -1 if not calculated yet */ 00232 return (ret); 00233 } 00234 00235 /***********************************************************************/ 00236 /* MAP-CMN */ 00237 /***********************************************************************/ 00238 00246 CMNWork * 00247 CMN_realtime_new(Value *para, float weight) 00248 { 00249 int i; 00250 00251 CMNWork *c; 00252 00253 c = (CMNWork *)mymalloc(sizeof(CMNWork)); 00254 00255 c->cweight = weight; 00256 c->mfcc_dim = para->mfcc_dim; // + (para->c0 ? 1 : 0); 00257 c->veclen = para->veclen; 00258 c->mean = para->cmn ? TRUE : FALSE; 00259 c->var = para->cvn ? TRUE : FALSE; 00260 c->clist_max = CPSTEP; 00261 c->clist_num = 0; 00262 c->clist = (CMEAN *)mymalloc(sizeof(CMEAN) * c->clist_max); 00263 for(i=0;i<c->clist_max;i++) { 00264 c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen); 00265 if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen); 00266 c->clist[i].framenum = 0; 00267 } 00268 c->now.mfcc_sum = (float *)mymalloc(sizeof(float) * c->veclen); 00269 if (c->var) c->now.mfcc_var = (float *)mymalloc(sizeof(float) * c->veclen); 00270 00271 c->cmean_init = (float *)mymalloc(sizeof(float) * c->veclen); 00272 if (c->var) c->cvar_init = (float *)mymalloc(sizeof(float) * c->veclen); 00273 c->cmean_init_set = FALSE; 00274 00275 return c; 00276 } 00277 00284 void 00285 CMN_realtime_free(CMNWork *c) 00286 { 00287 int i; 00288 00289 free(c->cmean_init); 00290 free(c->now.mfcc_sum); 00291 if (c->var) { 00292 free(c->cvar_init); 00293 free(c->now.mfcc_var); 00294 } 00295 for(i=0;i<c->clist_max;i++) { 00296 if (c->var) free(c->clist[i].mfcc_var); 00297 free(c->clist[i].mfcc_sum); 00298 } 00299 free(c->clist); 00300 free(c); 00301 } 00302 00308 void 00309 CMN_realtime_prepare(CMNWork *c) 00310 { 00311 int d; 00312 00313 for(d=0;d<c->veclen;d++) c->now.mfcc_sum[d] = 0.0; 00314 if (c->var) { 00315 for(d=0;d<c->veclen;d++) c->now.mfcc_var[d] = 0.0; 00316 } 00317 c->now.framenum = 0; 00318 } 00319 00327 void 00328 CMN_realtime(CMNWork *c, float *mfcc) 00329 { 00330 int d; 00331 double x, y; 00332 00333 c->now.framenum++; 00334 if (c->cmean_init_set) { 00335 /* initial data exists */ 00336 for(d=0;d<c->veclen;d++) { 00337 /* accumulate current MFCC to sum */ 00338 c->now.mfcc_sum[d] += mfcc[d]; 00339 /* calculate map-mean */ 00340 x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d]; 00341 y = (double)c->now.framenum + c->cweight; 00342 x /= y; 00343 if (c->var) { 00344 /* calculate map-var */ 00345 c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x); 00346 } 00347 if (c->mean && d < c->mfcc_dim) { 00348 /* mean normalization */ 00349 mfcc[d] -= x; 00350 } 00351 if (c->var) { 00352 /* variance normalization */ 00353 x = c->now.mfcc_var[d] + c->cweight * c->cvar_init[d]; 00354 y = (double)c->now.framenum + c->cweight; 00355 mfcc[d] /= sqrt(x / y); 00356 } 00357 } 00358 } else { 00359 /* no initial data */ 00360 for(d=0;d<c->veclen;d++) { 00361 /* accumulate current MFCC to sum */ 00362 c->now.mfcc_sum[d] += mfcc[d]; 00363 /* calculate current mean */ 00364 x = c->now.mfcc_sum[d] / c->now.framenum; 00365 if (c->var) { 00366 /* calculate current variance */ 00367 c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x); 00368 } 00369 if (c->mean && d < c->mfcc_dim) { 00370 /* mean normalization */ 00371 mfcc[d] -= x; 00372 } 00373 #if 0 /* not perform variance normalization on no initial data */ 00374 if (c->var) { 00375 /* variance normalization */ 00376 mfcc[d] /= sqrt(c->now.mfcc_var[d] / c->now.framenum); 00377 } 00378 #endif 00379 } 00380 } 00381 } 00382 00388 void 00389 CMN_realtime_update(CMNWork *c, HTK_Param *param) 00390 { 00391 float *tmp, *tmp2; 00392 int i, d; 00393 int frames; 00394 00395 /* if CMN_realtime was never called before this, return immediately */ 00396 /* this may occur by pausing just after startup */ 00397 if (c->now.framenum == 0) return; 00398 00399 /* re-calculate variance based on the final mean at the given param */ 00400 if (c->var && param != NULL) { 00401 float m, x; 00402 if (param->samplenum != c->now.framenum) { 00403 jlog("InternalError: CMN_realtime_update: param->samplenum != c->now.framenum\n"); 00404 } else if (param->veclen != c->veclen) { 00405 jlog("InternalError: CMN_realtime_update: param->veclen != c->veclen\n"); 00406 } else { 00407 for(d=0;d<c->veclen;d++) { 00408 m = c->now.mfcc_sum[d] / (float) c->now.framenum; 00409 x = 0; 00410 for(i=0;i<param->samplenum;i++) { 00411 x += (param->parvec[i][d] - m) * (param->parvec[i][d] - m); 00412 } 00413 c->now.mfcc_var[d] = x; 00414 } 00415 } 00416 } 00417 00418 /* compute cepstral mean from now and previous sums up to CPMAX frames */ 00419 for(d=0;d<c->veclen;d++) c->cmean_init[d] = c->now.mfcc_sum[d]; 00420 if (c->var) { 00421 for(d=0;d<c->veclen;d++) c->cvar_init[d] = c->now.mfcc_var[d]; 00422 } 00423 frames = c->now.framenum; 00424 for(i=0;i<c->clist_num;i++) { 00425 for(d=0;d<c->veclen;d++) c->cmean_init[d] += c->clist[i].mfcc_sum[d]; 00426 if (c->var) { 00427 for(d=0;d<c->veclen;d++) c->cvar_init[d] += c->clist[i].mfcc_var[d]; 00428 } 00429 frames += c->clist[i].framenum; 00430 if (frames >= CPMAX) break; 00431 } 00432 for(d=0;d<c->veclen;d++) c->cmean_init[d] /= (float) frames; 00433 if (c->var) { 00434 for(d=0;d<c->veclen;d++) c->cvar_init[d] /= (float) frames; 00435 } 00436 00437 c->cmean_init_set = TRUE; 00438 00439 /* expand clist if neccessary */ 00440 if (c->clist_num == c->clist_max && frames < CPMAX) { 00441 c->clist_max += CPSTEP; 00442 c->clist = (CMEAN *)myrealloc(c->clist, sizeof(CMEAN) * c->clist_max); 00443 for(i=c->clist_num;i<c->clist_max;i++) { 00444 c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen); 00445 if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen); 00446 c->clist[i].framenum = 0; 00447 } 00448 } 00449 00450 /* shift clist */ 00451 tmp = c->clist[c->clist_max-1].mfcc_sum; 00452 if (c->var) tmp2 = c->clist[c->clist_max-1].mfcc_var; 00453 memmove(&(c->clist[1]), &(c->clist[0]), sizeof(CMEAN) * (c->clist_max - 1)); 00454 c->clist[0].mfcc_sum = tmp; 00455 if (c->var) c->clist[0].mfcc_var = tmp2; 00456 /* copy now to clist[0] */ 00457 memcpy(c->clist[0].mfcc_sum, c->now.mfcc_sum, sizeof(float) * c->veclen); 00458 if (c->var) memcpy(c->clist[0].mfcc_var, c->now.mfcc_var, sizeof(float) * c->veclen); 00459 c->clist[0].framenum = c->now.framenum; 00460 00461 if (c->clist_num < c->clist_max) c->clist_num++; 00462 00463 } 00464 00475 static boolean 00476 myread(void *buf, size_t unitbyte, int unitnum, FILE *fp) 00477 { 00478 if (myfread(buf, unitbyte, unitnum, fp) < (size_t)unitnum) { 00479 return(FALSE); 00480 } 00481 #ifndef WORDS_BIGENDIAN 00482 swap_bytes(buf, unitbyte, unitnum); 00483 #endif 00484 return(TRUE); 00485 } 00486 00497 static boolean 00498 mywrite(void *buf, size_t unitbyte, size_t unitnum, int fd) 00499 { 00500 #ifndef WORDS_BIGENDIAN 00501 swap_bytes(buf, unitbyte, unitnum); 00502 #endif 00503 if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) { 00504 return(FALSE); 00505 } 00506 #ifndef WORDS_BIGENDIAN 00507 swap_bytes(buf, unitbyte, unitnum); 00508 #endif 00509 return(TRUE); 00510 } 00511 00521 boolean 00522 CMN_load_from_file(CMNWork *c, char *filename) 00523 { 00524 FILE *fp; 00525 int veclen; 00526 00527 jlog("Stat: wav2mfcc-pipe: reading initial CMN from file \"%s\"\n", filename); 00528 if ((fp = fopen_readfile(filename)) == NULL) { 00529 jlog("Error: wav2mfcc-pipe: failed to open\n"); 00530 return(FALSE); 00531 } 00532 /* read header */ 00533 if (myread(&veclen, sizeof(int), 1, fp) == FALSE) { 00534 jlog("Error: wav2mfcc-pipe: failed to read header\n"); 00535 fclose_readfile(fp); 00536 return(FALSE); 00537 } 00538 /* check length */ 00539 if (veclen != c->veclen) { 00540 jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n"); 00541 jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, veclen); 00542 fclose_readfile(fp); 00543 return(FALSE); 00544 } 00545 /* read body */ 00546 if (myread(c->cmean_init, sizeof(float), c->veclen, fp) == FALSE) { 00547 jlog("Error: wav2mfcc-pipe: failed to read mean for CMN\n"); 00548 fclose_readfile(fp); 00549 return(FALSE); 00550 } 00551 if (c->var) { 00552 if (myread(c->cvar_init, sizeof(float), c->veclen, fp) == FALSE) { 00553 jlog("Error: wav2mfcc-pipe: failed to read variance for CVN\n"); 00554 fclose_readfile(fp); 00555 return(FALSE); 00556 } 00557 } 00558 00559 if (fclose_readfile(fp) == -1) { 00560 jlog("Error: wav2mfcc-pipe: failed to close\n"); 00561 return(FALSE); 00562 } 00563 00564 c->cmean_init_set = TRUE; 00565 jlog("Stat: wav2mfcc-pipe: read CMN parameter\n"); 00566 00567 return(TRUE); 00568 } 00569 00578 boolean 00579 CMN_save_to_file(CMNWork *c, char *filename) 00580 { 00581 int fd; 00582 00583 jlog("Stat: wav2mfcc-pipe: writing current cepstral data to file \"%s\"\n", filename); 00584 00585 if ((fd = open(filename, O_CREAT | O_RDWR 00586 #ifdef O_BINARY 00587 | O_BINARY 00588 #endif 00589 , 0644)) == -1) { 00590 jlog("Error: wav2mfcc-pipe: failed to open \"%s\" to write current cepstral data\n", filename); 00591 return(FALSE); 00592 } 00593 /* write header */ 00594 if (mywrite(&(c->veclen), sizeof(int), 1, fd) == FALSE) { 00595 jlog("Error: wav2mfcc-pipe: cannot write header to \"%s\" as current cepstral data\n", filename); 00596 close(fd); 00597 return(FALSE); 00598 } 00599 /* write body */ 00600 if (mywrite(c->cmean_init, sizeof(float), c->veclen, fd) == FALSE) { 00601 jlog("Error: wav2mfcc-pipe: cannot write mean to \"%s\" as current cepstral data\n", filename); 00602 close(fd); 00603 return(FALSE); 00604 } 00605 if (c->var) { 00606 if (mywrite(c->cvar_init, sizeof(float), c->veclen, fd) == FALSE) { 00607 jlog("Error: wav2mfcc-pipe: cannot write variance to \"%s\" as current cepstrum\n", filename); 00608 close(fd); 00609 return(FALSE); 00610 } 00611 } 00612 00613 close(fd); 00614 00615 jlog("Stat: wav2mfcc-pipe: current cepstral data written to \"%s\"\n", filename); 00616 00617 return(TRUE); 00618 } 00619 00620 00621 /***********************************************************************/ 00622 /* energy normalization and scaling on live input */ 00623 /***********************************************************************/ 00624 00632 void 00633 energy_max_init(ENERGYWork *energy) 00634 { 00635 energy->max = 5.0; 00636 } 00637 00645 void 00646 energy_max_prepare(ENERGYWork *energy, Value *para) 00647 { 00648 energy->max_last = energy->max; 00649 energy->min_last = energy->max - (para->silFloor * LOG_TEN) / 10.0; 00650 energy->max = 0.0; 00651 } 00652 00662 LOGPROB 00663 energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para) 00664 { 00665 if (energy->max < f) energy->max = f; 00666 if (f < energy->min_last) f = energy->min_last; 00667 return(1.0 - (energy->max_last - f) * para->escale); 00668 }