Julius 4.1.5
|
00001 00117 /* 00118 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University 00119 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00120 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology 00121 * All rights reserved 00122 */ 00123 00124 #include <julius/julius.h> 00125 00126 #undef RDEBUG ///< Define if you want local debug message 00127 00158 static void 00159 init_param(MFCCCalc *mfcc) 00160 { 00161 Value *para; 00162 00163 para = mfcc->para; 00164 00165 /* これから計算されるパラメータの型をヘッダに設定 */ 00166 /* set header types */ 00167 mfcc->param->header.samptype = F_MFCC; 00168 if (para->delta) mfcc->param->header.samptype |= F_DELTA; 00169 if (para->acc) mfcc->param->header.samptype |= F_ACCL; 00170 if (para->energy) mfcc->param->header.samptype |= F_ENERGY; 00171 if (para->c0) mfcc->param->header.samptype |= F_ZEROTH; 00172 if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP; 00173 if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM; 00174 00175 mfcc->param->header.wshift = para->smp_period * para->frameshift; 00176 mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */ 00177 mfcc->param->veclen = para->veclen; 00178 00179 /* 認識処理中/終了後にセットされる変数: 00180 param->parvec (パラメータベクトル系列) 00181 param->header.samplenum, param->samplenum (全フレーム数) 00182 */ 00183 /* variables that will be set while/after computation has been done: 00184 param->parvec (parameter vector sequence) 00185 param->header.samplenum, param->samplenum (total number of frames) 00186 */ 00187 /* MAP-CMN の初期化 */ 00188 /* Prepare for MAP-CMN */ 00189 if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk); 00190 } 00191 00219 boolean 00220 RealTimeInit(Recog *recog) 00221 { 00222 Value *para; 00223 Jconf *jconf; 00224 RealBeam *r; 00225 MFCCCalc *mfcc; 00226 00227 00228 jconf = recog->jconf; 00229 r = &(recog->real); 00230 00231 /* 最大フレーム長を最大入力時間数から計算 */ 00232 /* set maximum allowed frame length */ 00233 r->maxframelen = MAXSPEECHLEN / recog->jconf->input.frameshift; 00234 00235 /* -ssload 指定時, SS用のノイズスペクトルをファイルから読み込む */ 00236 /* if "-ssload", load noise spectrum for spectral subtraction from file */ 00237 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00238 if (mfcc->frontend.ssload_filename && mfcc->frontend.ssbuf == NULL) { 00239 if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) { 00240 jlog("ERROR: failed to read \"%s\"\n", mfcc->frontend.ssload_filename); 00241 return FALSE; 00242 } 00243 /* check ssbuf length */ 00244 if (mfcc->frontend.sslen != mfcc->wrk->bflen) { 00245 jlog("ERROR: noise spectrum length not match\n"); 00246 return FALSE; 00247 } 00248 mfcc->wrk->ssbuf = mfcc->frontend.ssbuf; 00249 mfcc->wrk->ssbuflen = mfcc->frontend.sslen; 00250 mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha; 00251 mfcc->wrk->ss_floor = mfcc->frontend.ss_floor; 00252 } 00253 } 00254 00255 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00256 00257 para = mfcc->para; 00258 00259 /* 対数エネルギー正規化のための初期値 */ 00260 /* set initial value for log energy normalization */ 00261 if (para->energy && para->enormal) energy_max_init(&(mfcc->ewrk)); 00262 /* デルタ計算のためのサイクルバッファを用意 */ 00263 /* initialize cycle buffers for delta and accel coef. computation */ 00264 if (para->delta) mfcc->db = WMP_deltabuf_new(para->baselen, para->delWin); 00265 if (para->acc) mfcc->ab = WMP_deltabuf_new(para->baselen * 2, para->accWin); 00266 /* デルタ計算のためのワークエリアを確保 */ 00267 /* allocate work area for the delta computation */ 00268 mfcc->tmpmfcc = (VECT *)mymalloc(sizeof(VECT) * para->vecbuflen); 00269 /* MAP-CMN 用の初期ケプストラム平均を読み込んで初期化する */ 00270 /* Initialize the initial cepstral mean data from file for MAP-CMN */ 00271 if (para->cmn || para->cvn) mfcc->cmn.wrk = CMN_realtime_new(para, mfcc->cmn.map_weight); 00272 /* -cmnload 指定時, CMN用のケプストラム平均の初期値をファイルから読み込む */ 00273 /* if "-cmnload", load initial cepstral mean data from file for CMN */ 00274 if (mfcc->cmn.load_filename) { 00275 if (para->cmn) { 00276 if ((mfcc->cmn.loaded = CMN_load_from_file(mfcc->cmn.wrk, mfcc->cmn.load_filename))== FALSE) { 00277 jlog("WARNING: failed to read initial cepstral mean from \"%s\", do flat start\n", mfcc->cmn.load_filename); 00278 } 00279 } else { 00280 jlog("WARNING: CMN not required on AM, file \"%s\" ignored\n", mfcc->cmn.load_filename); 00281 } 00282 } 00283 00284 } 00285 /* 窓長をセット */ 00286 /* set window length */ 00287 r->windowlen = recog->jconf->input.framesize + 1; 00288 /* 窓かけ用バッファを確保 */ 00289 /* set window buffer */ 00290 r->window = mymalloc(sizeof(SP16) * r->windowlen); 00291 00292 return TRUE; 00293 } 00294 00319 void 00320 reset_mfcc(Recog *recog) 00321 { 00322 Value *para; 00323 MFCCCalc *mfcc; 00324 RealBeam *r; 00325 00326 r = &(recog->real); 00327 00328 /* 特徴抽出モジュールを初期化 */ 00329 /* initialize parameter extraction module */ 00330 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00331 00332 para = mfcc->para; 00333 00334 /* 対数エネルギー正規化のための初期値をセット */ 00335 /* set initial value for log energy normalization */ 00336 if (para->energy && para->enormal) energy_max_prepare(&(mfcc->ewrk), para); 00337 /* デルタ計算用バッファを準備 */ 00338 /* set the delta cycle buffer */ 00339 if (para->delta) WMP_deltabuf_prepare(mfcc->db); 00340 if (para->acc) WMP_deltabuf_prepare(mfcc->ab); 00341 } 00342 00343 } 00344 00371 boolean 00372 RealTimePipeLinePrepare(Recog *recog) 00373 { 00374 RealBeam *r; 00375 PROCESS_AM *am; 00376 MFCCCalc *mfcc; 00377 #ifdef SPSEGMENT_NAIST 00378 RecogProcess *p; 00379 #endif 00380 00381 r = &(recog->real); 00382 00383 /* 計算用の変数を初期化 */ 00384 /* initialize variables for computation */ 00385 r->windownum = 0; 00386 /* parameter check */ 00387 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00388 /* パラメータ初期化 */ 00389 /* parameter initialization */ 00390 if (recog->jconf->input.speech_input == SP_MFCMODULE) { 00391 if (mfc_module_set_header(mfcc, recog) == FALSE) return FALSE; 00392 } else { 00393 init_param(mfcc); 00394 } 00395 /* フレームごとのパラメータベクトル保存の領域を確保 */ 00396 /* あとで必要に応じて伸長される */ 00397 if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) { 00398 j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n"); 00399 } 00400 /* フレーム数をリセット */ 00401 /* reset frame count */ 00402 mfcc->f = 0; 00403 } 00404 /* 準備した param 構造体のデータのパラメータ型を音響モデルとチェックする */ 00405 /* check type coherence between param and hmminfo here */ 00406 if (recog->jconf->input.paramtype_check_flag) { 00407 for(am=recog->amlist;am;am=am->next) { 00408 if (!check_param_coherence(am->hmminfo, am->mfcc->param)) { 00409 jlog("ERROR: input parameter type does not match AM\n"); 00410 return FALSE; 00411 } 00412 } 00413 } 00414 00415 /* 計算用のワークエリアを準備 */ 00416 /* prepare work area for calculation */ 00417 if (recog->jconf->input.type == INPUT_WAVEFORM) { 00418 reset_mfcc(recog); 00419 } 00420 /* 音響尤度計算用キャッシュを準備 */ 00421 /* prepare cache area for acoustic computation of HMM states and mixtures */ 00422 for(am=recog->amlist;am;am=am->next) { 00423 outprob_prepare(&(am->hmmwrk), r->maxframelen); 00424 } 00425 00426 #ifdef BACKEND_VAD 00427 if (recog->jconf->decodeopt.segment) { 00428 /* initialize segmentation parameters */ 00429 spsegment_init(recog); 00430 } 00431 #else 00432 recog->triggered = FALSE; 00433 #endif 00434 00435 #ifdef DEBUG_VTLN_ALPHA_TEST 00436 /* store speech */ 00437 recog->speechlen = 0; 00438 #endif 00439 00440 return TRUE; 00441 } 00442 00475 boolean 00476 RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen) 00477 { 00478 int i; 00479 boolean ret; 00480 VECT *tmpmfcc; 00481 Value *para; 00482 00483 tmpmfcc = mfcc->tmpmfcc; 00484 para = mfcc->para; 00485 00486 /* 音声波形から base MFCC を計算 (recog->mfccwrk を利用) */ 00487 /* calculate base MFCC from waveform (use recog->mfccwrk) */ 00488 for (i=0; i < windowlen; i++) { 00489 mfcc->wrk->bf[i+1] = (float) window[i]; 00490 } 00491 WMP_calc(mfcc->wrk, tmpmfcc, para); 00492 00493 if (para->energy && para->enormal) { 00494 /* 対数エネルギー項を正規化する */ 00495 /* normalize log energy */ 00496 /* リアルタイム入力では発話ごとの最大エネルギーが得られないので 00497 直前の発話のパワーで代用する */ 00498 /* Since the maximum power of the whole input utterance cannot be 00499 obtained at real-time input, the maximum of last input will be 00500 used to normalize. 00501 */ 00502 tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para); 00503 } 00504 00505 if (para->delta) { 00506 /* デルタを計算する */ 00507 /* calc delta coefficients */ 00508 ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc); 00509 #ifdef RDEBUG 00510 printf("DeltaBuf: ret=%d, status=", ret); 00511 for(i=0;i<mfcc->db->len;i++) { 00512 printf("%d", mfcc->db->is_on[i]); 00513 } 00514 printf(", nextstore=%d\n", mfcc->db->store); 00515 #endif 00516 /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */ 00517 /* if ret == FALSE, there is no available frame. So just wait for 00518 next input */ 00519 if (! ret) { 00520 return FALSE; 00521 } 00522 00523 /* db->vec に現在の元データとデルタ係数が入っているので tmpmfcc にコピー */ 00524 /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */ 00525 memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2); 00526 } 00527 00528 if (para->acc) { 00529 /* Accelerationを計算する */ 00530 /* calc acceleration coefficients */ 00531 /* base+delta をそのまま入れる */ 00532 /* send the whole base+delta to the cycle buffer */ 00533 ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc); 00534 #ifdef RDEBUG 00535 printf("AccelBuf: ret=%d, status=", ret); 00536 for(i=0;i<mfcc->ab->len;i++) { 00537 printf("%d", mfcc->ab->is_on[i]); 00538 } 00539 printf(", nextstore=%d\n", mfcc->ab->store); 00540 #endif 00541 /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */ 00542 /* if ret == FALSE, there is no available frame. So just wait for 00543 next input */ 00544 if (! ret) { 00545 return FALSE; 00546 } 00547 /* ab->vec には,(base+delta) とその差分係数が入っている. 00548 [base] [delta] [delta] [acc] の順で入っているので, 00549 [base] [delta] [acc] を tmpmfcc にコピーする. */ 00550 /* now ab->vec holds the current (base+delta) and their delta coef. 00551 it holds a vector in the order of [base] [delta] [delta] [acc], 00552 so copy the [base], [delta] and [acc] to tmpmfcc. */ 00553 memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2); 00554 memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen); 00555 } 00556 00557 #ifdef POWER_REJECT 00558 if (para->energy || para->c0) { 00559 mfcc->avg_power += tmpmfcc[para->baselen-1]; 00560 } 00561 #endif 00562 00563 if (para->delta && (para->energy || para->c0) && para->absesup) { 00564 /* 絶対値パワーを除去 */ 00565 /* suppress absolute power */ 00566 memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen)); 00567 } 00568 00569 /* この時点で tmpmfcc に現時点での最新の特徴ベクトルが格納されている */ 00570 /* tmpmfcc[] now holds the latest parameter vector */ 00571 00572 /* CMN を計算 */ 00573 /* perform CMN */ 00574 if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc); 00575 00576 return TRUE; 00577 } 00578 00579 static int 00580 proceed_one_frame(Recog *recog) 00581 { 00582 MFCCCalc *mfcc; 00583 RealBeam *r; 00584 int maxf; 00585 PROCESS_AM *am; 00586 int rewind_frame; 00587 boolean reprocess; 00588 boolean ok_p; 00589 00590 r = &(recog->real); 00591 00592 /* call recognition start callback */ 00593 ok_p = FALSE; 00594 maxf = 0; 00595 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00596 if (!mfcc->valid) continue; 00597 if (maxf < mfcc->f) maxf = mfcc->f; 00598 if (mfcc->f == 0) { 00599 ok_p = TRUE; 00600 } 00601 } 00602 if (ok_p && maxf == 0) { 00603 /* call callback when at least one of MFCC has initial frame */ 00604 if (recog->jconf->decodeopt.segment) { 00605 #ifdef BACKEND_VAD 00606 /* not exec pass1 begin callback here */ 00607 #else 00608 if (!recog->process_segment) { 00609 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00610 } 00611 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 00612 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00613 recog->triggered = TRUE; 00614 #endif 00615 } else { 00616 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00617 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00618 recog->triggered = TRUE; 00619 } 00620 } 00621 /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ 00622 switch (decode_proceed(recog)) { 00623 case -1: /* error */ 00624 return -1; 00625 break; 00626 case 0: /* success */ 00627 break; 00628 case 1: /* segmented */ 00629 /* 認識処理のセグメント要求で終わったことをフラグにセット */ 00630 /* set flag which indicates that the input has ended with segmentation request */ 00631 r->last_is_segmented = TRUE; 00632 /* tell the caller to be segmented by this function */ 00633 /* 呼び出し元に,ここで入力を切るよう伝える */ 00634 return 1; 00635 } 00636 #ifdef BACKEND_VAD 00637 /* check up trigger in case of VAD segmentation */ 00638 if (recog->jconf->decodeopt.segment) { 00639 if (recog->triggered == FALSE) { 00640 if (spsegment_trigger_sync(recog)) { 00641 if (!recog->process_segment) { 00642 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00643 } 00644 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 00645 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00646 recog->triggered = TRUE; 00647 } 00648 } 00649 } 00650 #endif 00651 00652 if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) { 00653 /* set total length to the current frame */ 00654 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00655 if (!mfcc->valid) continue; 00656 mfcc->param->header.samplenum = mfcc->f + 1; 00657 mfcc->param->samplenum = mfcc->f + 1; 00658 } 00659 /* do rewind for all mfcc here */ 00660 spsegment_restart_mfccs(recog, rewind_frame, reprocess); 00661 /* also tell adin module to rehash the concurrent audio input */ 00662 recog->adin->rehash = TRUE; 00663 /* reset outprob cache for all AM */ 00664 for(am=recog->amlist;am;am=am->next) { 00665 outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); 00666 } 00667 if (reprocess) { 00668 /* process the backstep MFCCs here */ 00669 while(1) { 00670 ok_p = TRUE; 00671 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00672 if (! mfcc->valid) continue; 00673 mfcc->f++; 00674 if (mfcc->f < mfcc->param->samplenum) { 00675 mfcc->valid = TRUE; 00676 ok_p = FALSE; 00677 } else { 00678 mfcc->valid = FALSE; 00679 } 00680 } 00681 if (ok_p) { 00682 /* すべての MFCC が終わりに達したのでループ終了 */ 00683 /* all MFCC has been processed, end of loop */ 00684 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00685 if (! mfcc->valid) continue; 00686 mfcc->f--; 00687 } 00688 break; 00689 } 00690 /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ 00691 switch (decode_proceed(recog)) { 00692 case -1: /* error */ 00693 return -1; 00694 break; 00695 case 0: /* success */ 00696 break; 00697 case 1: /* segmented */ 00698 /* ignore segmentation while in the backstep segment */ 00699 break; 00700 } 00701 /* call frame-wise callback */ 00702 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 00703 } 00704 } 00705 } 00706 /* call frame-wise callback if at least one of MFCC is valid at this frame */ 00707 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00708 if (mfcc->valid) { 00709 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 00710 break; 00711 } 00712 } 00713 00714 return 0; 00715 } 00716 00717 00786 int 00787 RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = input */ 00788 { 00789 int i, now, ret; 00790 MFCCCalc *mfcc; 00791 RealBeam *r; 00792 00793 r = &(recog->real); 00794 00795 #ifdef DEBUG_VTLN_ALPHA_TEST 00796 /* store speech */ 00797 adin_cut_callback_store_buffer(Speech, nowlen, recog); 00798 #endif 00799 00800 /* window[0..windownum-1] は前回の呼び出しで残った音声データが格納されている */ 00801 /* window[0..windownum-1] are speech data left from previous call */ 00802 00803 /* 処理用ポインタを初期化 */ 00804 /* initialize pointer for local processing */ 00805 now = 0; 00806 00807 /* 認識処理がセグメント要求で終わったのかどうかのフラグをリセット */ 00808 /* reset flag which indicates whether the input has ended with segmentation request */ 00809 r->last_is_segmented = FALSE; 00810 00811 #ifdef RDEBUG 00812 printf("got %d samples\n", nowlen); 00813 #endif 00814 00815 while (now < nowlen) { /* till whole input is processed */ 00816 /* 入力長が maxframelen に達したらここで強制終了 */ 00817 /* if input length reaches maximum buffer size, terminate 1st pass here */ 00818 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00819 if (mfcc->f >= r->maxframelen) return(1); 00820 } 00821 /* 窓バッファを埋められるだけ埋める */ 00822 /* fill window buffer as many as possible */ 00823 for(i = min(r->windowlen - r->windownum, nowlen - now); i > 0 ; i--) 00824 r->window[r->windownum++] = (float) Speech[now++]; 00825 /* もし窓バッファが埋まらなければ, このセグメントの処理はここで終わる. 00826 処理されなかったサンプル (window[0..windownum-1]) は次回に持ち越し. */ 00827 /* if window buffer was not filled, end processing here, keeping the 00828 rest samples (window[0..windownum-1]) in the window buffer. */ 00829 if (r->windownum < r->windowlen) break; 00830 #ifdef RDEBUG 00831 /* printf("%d used, %d rest\n", now, nowlen - now); 00832 00833 printf("[f = %d]\n", f);*/ 00834 #endif 00835 00836 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00837 mfcc->valid = FALSE; 00838 /* 窓内の音声波形から特徴量を計算して r->tmpmfcc に格納 */ 00839 /* calculate a parameter vector from current waveform windows 00840 and store to r->tmpmfcc */ 00841 if ((*(recog->calc_vector))(mfcc, r->window, r->windowlen)) { 00842 #ifdef ENABLE_PLUGIN 00843 /* call post-process plugin if exist */ 00844 plugin_exec_vector_postprocess(mfcc->tmpmfcc, mfcc->param->veclen, mfcc->f); 00845 #endif 00846 /* MFCC完成,登録 */ 00847 mfcc->valid = TRUE; 00848 /* now get the MFCC vector of current frame, now store it to param */ 00849 if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) { 00850 jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n"); 00851 return -1; 00852 } 00853 memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen); 00854 #ifdef RDEBUG 00855 printf("DeltaBuf: %02d: got frame %d\n", mfcc->id, mfcc->f); 00856 #endif 00857 } 00858 } 00859 00860 /* 処理を1フレーム進める */ 00861 /* proceed one frame */ 00862 ret = proceed_one_frame(recog); 00863 00864 if (ret == 1 && recog->jconf->decodeopt.segment) { 00865 /* ショートポーズセグメンテーション: バッファに残っているデータを 00866 別に保持して,次回の最初に処理する */ 00867 /* short pause segmentation: there is some data left in buffer, so 00868 we should keep them for next processing */ 00869 r->rest_len = nowlen - now; 00870 if (r->rest_len > 0) { 00871 /* copy rest samples to rest_Speech */ 00872 if (r->rest_Speech == NULL) { 00873 r->rest_alloc_len = r->rest_len; 00874 r->rest_Speech = (SP16 *)mymalloc(sizeof(SP16)*r->rest_alloc_len); 00875 } else if (r->rest_alloc_len < r->rest_len) { 00876 r->rest_alloc_len = r->rest_len; 00877 r->rest_Speech = (SP16 *)myrealloc(r->rest_Speech, sizeof(SP16)*r->rest_alloc_len); 00878 } 00879 memcpy(r->rest_Speech, &(Speech[now]), sizeof(SP16) * r->rest_len); 00880 } 00881 } 00882 if (ret != 0) return ret; 00883 00884 /* 1フレーム処理が進んだのでポインタを進める */ 00885 /* proceed frame pointer */ 00886 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00887 if (!mfcc->valid) continue; 00888 mfcc->f++; 00889 } 00890 00891 /* 窓バッファを処理が終わった分シフト */ 00892 /* shift window */ 00893 memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift)); 00894 r->windownum -= recog->jconf->input.frameshift; 00895 } 00896 00897 /* 与えられた音声セグメントに対する認識処理が全て終了 00898 呼び出し元に, 入力を続けるよう伝える */ 00899 /* input segment is fully processed 00900 tell the caller to continue input */ 00901 return(0); 00902 } 00903 00937 int 00938 RealTimeResume(Recog *recog) 00939 { 00940 MFCCCalc *mfcc; 00941 RealBeam *r; 00942 boolean ok_p; 00943 #ifdef SPSEGMENT_NAIST 00944 RecogProcess *p; 00945 #endif 00946 PROCESS_AM *am; 00947 00948 r = &(recog->real); 00949 00950 /* 計算用のワークエリアを準備 */ 00951 /* prepare work area for calculation */ 00952 if (recog->jconf->input.type == INPUT_WAVEFORM) { 00953 reset_mfcc(recog); 00954 } 00955 /* 音響尤度計算用キャッシュを準備 */ 00956 /* prepare cache area for acoustic computation of HMM states and mixtures */ 00957 for(am=recog->amlist;am;am=am->next) { 00958 outprob_prepare(&(am->hmmwrk), r->maxframelen); 00959 } 00960 00961 /* param にある全パラメータを処理する準備 */ 00962 /* prepare to process all data in param */ 00963 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00964 if (mfcc->param->samplenum == 0) mfcc->valid = FALSE; 00965 else mfcc->valid = TRUE; 00966 #ifdef RDEBUG 00967 printf("Resume: %02d: f=%d\n", mfcc->id, mfcc->mfcc->param->samplenum-1); 00968 #endif 00969 /* フレーム数をリセット */ 00970 /* reset frame count */ 00971 mfcc->f = 0; 00972 /* MAP-CMN の初期化 */ 00973 /* Prepare for MAP-CMN */ 00974 if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk); 00975 } 00976 00977 #ifdef BACKEND_VAD 00978 if (recog->jconf->decodeopt.segment) { 00979 spsegment_init(recog); 00980 } 00981 /* not exec pass1 begin callback here */ 00982 #else 00983 recog->triggered = FALSE; 00984 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00985 if (!mfcc->valid) continue; 00986 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 00987 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00988 recog->triggered = TRUE; 00989 break; 00990 } 00991 #endif 00992 00993 /* param 内の全フレームについて認識処理を進める */ 00994 /* proceed recognition for all frames in param */ 00995 00996 while(1) { 00997 ok_p = TRUE; 00998 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00999 if (! mfcc->valid) continue; 01000 if (mfcc->f < mfcc->param->samplenum) { 01001 mfcc->valid = TRUE; 01002 ok_p = FALSE; 01003 } else { 01004 mfcc->valid = FALSE; 01005 } 01006 } 01007 if (ok_p) { 01008 /* すべての MFCC が終わりに達したのでループ終了 */ 01009 /* all MFCC has been processed, end of loop */ 01010 break; 01011 } 01012 01013 /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ 01014 switch (decode_proceed(recog)) { 01015 case -1: /* error */ 01016 return -1; 01017 break; 01018 case 0: /* success */ 01019 break; 01020 case 1: /* segmented */ 01021 /* segmented, end procs ([0..f])*/ 01022 r->last_is_segmented = TRUE; 01023 return 1; /* segmented by this function */ 01024 } 01025 01026 #ifdef BACKEND_VAD 01027 /* check up trigger in case of VAD segmentation */ 01028 if (recog->jconf->decodeopt.segment) { 01029 if (recog->triggered == FALSE) { 01030 if (spsegment_trigger_sync(recog)) { 01031 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 01032 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 01033 recog->triggered = TRUE; 01034 } 01035 } 01036 } 01037 #endif 01038 01039 /* call frame-wise callback */ 01040 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 01041 01042 /* 1フレーム処理が進んだのでポインタを進める */ 01043 /* proceed frame pointer */ 01044 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01045 if (!mfcc->valid) continue; 01046 mfcc->f++; 01047 } 01048 01049 } 01050 /* 前回のセグメント時に入力をシフトしていない分をシフトする */ 01051 /* do the last shift here */ 01052 if (recog->jconf->input.type == INPUT_WAVEFORM) { 01053 memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift)); 01054 r->windownum -= recog->jconf->input.frameshift; 01055 /* これで再開の準備が整ったので,まずは前回の処理で残っていた音声データから 01056 処理する */ 01057 /* now that the search status has been prepared for the next input, we 01058 first process the rest unprocessed samples at the last session */ 01059 if (r->rest_len > 0) { 01060 return(RealTimePipeLine(r->rest_Speech, r->rest_len, recog)); 01061 } 01062 } 01063 01064 /* 新規の入力に対して認識処理は続く… */ 01065 /* the recognition process will continue for the newly incoming samples... */ 01066 return 0; 01067 01068 } 01069 01070 01104 boolean 01105 RealTimeParam(Recog *recog) 01106 { 01107 boolean ret1, ret2; 01108 RealBeam *r; 01109 int ret; 01110 int maxf; 01111 boolean ok_p; 01112 MFCCCalc *mfcc; 01113 Value *para; 01114 #ifdef RDEBUG 01115 int i; 01116 #endif 01117 01118 r = &(recog->real); 01119 01120 if (r->last_is_segmented) { 01121 01122 /* RealTimePipeLine で認識処理側の理由により認識が中断した場合, 01123 現状態のMFCC計算データをそのまま次回へ保持する必要があるので, 01124 MFCC計算終了処理を行わずに第1パスの結果のみ出力して終わる. */ 01125 /* When input segmented by recognition process in RealTimePipeLine(), 01126 we have to keep the whole current status of MFCC computation to the 01127 next call. So here we only output the 1st pass result. */ 01128 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01129 mfcc->param->header.samplenum = mfcc->f + 1;/* len = lastid + 1 */ 01130 mfcc->param->samplenum = mfcc->f + 1; 01131 } 01132 decode_end_segmented(recog); 01133 01134 /* この区間の param データを第2パスのために返す */ 01135 /* return obtained parameter for 2nd pass */ 01136 return(TRUE); 01137 } 01138 01139 if (recog->jconf->input.type == INPUT_VECTOR) { 01140 /* finalize real-time 1st pass */ 01141 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01142 mfcc->param->header.samplenum = mfcc->f; 01143 mfcc->param->samplenum = mfcc->f; 01144 } 01145 /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ 01146 decode_end(recog); 01147 return TRUE; 01148 } 01149 01150 /* MFCC計算の終了処理を行う: 最後の遅延フレーム分を処理 */ 01151 /* finish MFCC computation for the last delayed frames */ 01152 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01153 if (mfcc->para->delta || mfcc->para->acc) { 01154 mfcc->valid = TRUE; 01155 } else { 01156 mfcc->valid = FALSE; 01157 } 01158 } 01159 01160 /* loop until all data has been flushed */ 01161 while (1) { 01162 01163 /* if all mfcc became invalid, exit loop here */ 01164 ok_p = FALSE; 01165 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01166 if (mfcc->valid) { 01167 ok_p = TRUE; 01168 break; 01169 } 01170 } 01171 if (!ok_p) break; 01172 01173 /* try to get 1 frame for all mfcc instances */ 01174 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01175 01176 para = mfcc->para; 01177 01178 if (! mfcc->valid) continue; 01179 01180 /* check if there is data in cycle buffer of delta */ 01181 ret1 = WMP_deltabuf_flush(mfcc->db); 01182 #ifdef RDEBUG 01183 printf("DeltaBufLast: ret=%d, status=", ret1); 01184 for(i=0;i<mfcc->db->len;i++) { 01185 printf("%d", mfcc->db->is_on[i]); 01186 } 01187 printf(", nextstore=%d\n", mfcc->db->store); 01188 #endif 01189 if (ret1) { 01190 /* uncomputed delta has flushed, compute it with tmpmfcc */ 01191 if (para->energy && para->absesup) { 01192 memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * (para->baselen - 1)); 01193 memcpy(&(mfcc->tmpmfcc[para->baselen-1]), &(mfcc->db->vec[para->baselen]), sizeof(VECT) * para->baselen); 01194 } else { 01195 memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2); 01196 } 01197 if (para->acc) { 01198 /* this new delta should be given to the accel cycle buffer */ 01199 ret2 = WMP_deltabuf_proceed(mfcc->ab, mfcc->tmpmfcc); 01200 #ifdef RDEBUG 01201 printf("AccelBuf: ret=%d, status=", ret2); 01202 for(i=0;i<mfcc->ab->len;i++) { 01203 printf("%d", mfcc->ab->is_on[i]); 01204 } 01205 printf(", nextstore=%d\n", mfcc->ab->store); 01206 #endif 01207 if (ret2) { 01208 /* uncomputed accel was given, compute it with tmpmfcc */ 01209 memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen)); 01210 memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen); 01211 } else { 01212 /* still no input is given: */ 01213 /* in case of very short input: go on to the next input */ 01214 continue; 01215 } 01216 } 01217 01218 } else { 01219 01220 /* no data left in the delta buffer */ 01221 if (para->acc) { 01222 /* no new data, just flush the accel buffer */ 01223 ret2 = WMP_deltabuf_flush(mfcc->ab); 01224 #ifdef RDEBUG 01225 printf("AccelBuf: ret=%d, status=", ret2); 01226 for(i=0;i<mfcc->ab->len;i++) { 01227 printf("%d", mfcc->ab->is_on[i]); 01228 } 01229 printf(", nextstore=%d\n", mfcc->ab->store); 01230 #endif 01231 if (ret2) { 01232 /* uncomputed data has flushed, compute it with tmpmfcc */ 01233 memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen)); 01234 memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen); 01235 } else { 01236 /* actually no data exists in both delta and accel */ 01237 mfcc->valid = FALSE; /* disactivate this instance */ 01238 continue; /* end this loop */ 01239 } 01240 } else { 01241 /* only delta: input fully flushed */ 01242 mfcc->valid = FALSE; /* disactivate this instance */ 01243 continue; /* end this loop */ 01244 } 01245 } 01246 /* a new frame has been obtained from delta buffer to tmpmfcc */ 01247 if(para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, mfcc->tmpmfcc); 01248 if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) { 01249 jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n"); 01250 return FALSE; 01251 } 01252 /* store to mfcc->f */ 01253 memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen); 01254 #ifdef ENABLE_PLUGIN 01255 /* call postprocess plugin if any */ 01256 plugin_exec_vector_postprocess(mfcc->param->parvec[mfcc->f], mfcc->param->veclen, mfcc->f); 01257 #endif 01258 } 01259 01260 /* call recognition start callback */ 01261 ok_p = FALSE; 01262 maxf = 0; 01263 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01264 if (!mfcc->valid) continue; 01265 if (maxf < mfcc->f) maxf = mfcc->f; 01266 if (mfcc->f == 0) { 01267 ok_p = TRUE; 01268 } 01269 } 01270 01271 if (ok_p && maxf == 0) { 01272 /* call callback when at least one of MFCC has initial frame */ 01273 if (recog->jconf->decodeopt.segment) { 01274 #ifdef BACKEND_VAD 01275 /* not exec pass1 begin callback here */ 01276 #else 01277 if (!recog->process_segment) { 01278 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 01279 } 01280 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 01281 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 01282 recog->triggered = TRUE; 01283 #endif 01284 } else { 01285 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 01286 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 01287 recog->triggered = TRUE; 01288 } 01289 } 01290 01291 /* proceed for the curent frame */ 01292 ret = decode_proceed(recog); 01293 if (ret == -1) { /* error */ 01294 return -1; 01295 } else if (ret == 1) { /* segmented */ 01296 /* loop out */ 01297 break; 01298 } /* else no event occured */ 01299 01300 #ifdef BACKEND_VAD 01301 /* check up trigger in case of VAD segmentation */ 01302 if (recog->jconf->decodeopt.segment) { 01303 if (recog->triggered == FALSE) { 01304 if (spsegment_trigger_sync(recog)) { 01305 if (!recog->process_segment) { 01306 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 01307 } 01308 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 01309 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 01310 recog->triggered = TRUE; 01311 } 01312 } 01313 } 01314 #endif 01315 01316 /* call frame-wise callback */ 01317 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 01318 01319 /* move to next */ 01320 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01321 if (! mfcc->valid) continue; 01322 mfcc->f++; 01323 if (mfcc->f > r->maxframelen) mfcc->valid = FALSE; 01324 } 01325 } 01326 01327 /* finalize real-time 1st pass */ 01328 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01329 mfcc->param->header.samplenum = mfcc->f; 01330 mfcc->param->samplenum = mfcc->f; 01331 } 01332 /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ 01333 decode_end(recog); 01334 01335 return(TRUE); 01336 } 01337 01356 void 01357 RealTimeCMNUpdate(MFCCCalc *mfcc, Recog *recog) 01358 { 01359 boolean cmn_update_p; 01360 Value *para; 01361 Jconf *jconf; 01362 RecogProcess *r; 01363 01364 jconf = recog->jconf; 01365 para = mfcc->para; 01366 01367 /* update CMN vector for next speech */ 01368 if(para->cmn) { 01369 if (mfcc->cmn.update) { 01370 cmn_update_p = TRUE; 01371 for(r=recog->process_list;r;r=r->next) { 01372 if (!r->live) continue; 01373 if (r->am->mfcc != mfcc) continue; 01374 if (r->result.status < 0) { /* input rejected */ 01375 cmn_update_p = FALSE; 01376 break; 01377 } 01378 } 01379 if (cmn_update_p) { 01380 /* update last CMN parameter for next spech */ 01381 CMN_realtime_update(mfcc->cmn.wrk, mfcc->param); 01382 } else { 01383 /* do not update, because the last input is bogus */ 01384 if (verbose_flag) { 01385 #ifdef BACKEND_VAD 01386 if (!recog->jconf->decodeopt.segment || recog->triggered) { 01387 jlog("STAT: skip CMN parameter update since last input was invalid\n"); 01388 } 01389 #else 01390 jlog("STAT: skip CMN parameter update since last input was invalid\n"); 01391 #endif 01392 } 01393 } 01394 } 01395 /* if needed, save the updated CMN parameter to a file */ 01396 if (mfcc->cmn.save_filename) { 01397 if (CMN_save_to_file(mfcc->cmn.wrk, mfcc->cmn.save_filename) == FALSE) { 01398 jlog("WARNING: failed to save CMN parameter to \"%s\"\n", mfcc->cmn.save_filename); 01399 } 01400 } 01401 } 01402 } 01403 01416 void 01417 RealTimeTerminate(Recog *recog) 01418 { 01419 MFCCCalc *mfcc; 01420 01421 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01422 mfcc->param->header.samplenum = mfcc->f; 01423 mfcc->param->samplenum = mfcc->f; 01424 } 01425 01426 /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ 01427 decode_end(recog); 01428 } 01429 01441 void 01442 realbeam_free(Recog *recog) 01443 { 01444 RealBeam *r; 01445 01446 r = &(recog->real); 01447 01448 if (recog->real.window) { 01449 free(recog->real.window); 01450 recog->real.window = NULL; 01451 } 01452 if (recog->real.rest_Speech) { 01453 free(recog->real.rest_Speech); 01454 recog->real.rest_Speech = NULL; 01455 } 01456 } 01457 01458 01459 01460 /************************************************************************/ 01461 /************************************************************************/ 01462 /************************************************************************/ 01463 /************************************************************************/ 01464 01465 /* MFCC realtime input */ 01483 int 01484 mfcc_go(Recog *recog, int (*ad_check)(Recog *)) 01485 { 01486 RealBeam *r; 01487 MFCCCalc *mfcc; 01488 int new_f; 01489 int ret, ret3; 01490 01491 r = &(recog->real); 01492 01493 r->last_is_segmented = FALSE; 01494 01495 while(1/*in_data_vec*/) { 01496 01497 ret = mfc_module_read(recog->mfcclist, &new_f); 01498 01499 if (debug2_flag) { 01500 if (recog->mfcclist->f < new_f) { 01501 jlog("%d: %d (%d)\n", recog->mfcclist->f, new_f, ret); 01502 } 01503 } 01504 01505 /* callback poll */ 01506 if (ad_check != NULL) { 01507 if ((ret3 = (*(ad_check))(recog)) < 0) { 01508 if ((ret3 == -1 && recog->mfcclist->f == 0) || ret3 == -2) { 01509 return(-2); 01510 } 01511 } 01512 } 01513 01514 while(recog->mfcclist->f < new_f) { 01515 01516 recog->mfcclist->valid = TRUE; 01517 01518 #ifdef ENABLE_PLUGIN 01519 /* call post-process plugin if exist */ 01520 plugin_exec_vector_postprocess(recog->mfcclist->param->parvec[recog->mfcclist->f], recog->mfcclist->param->veclen, recog->mfcclist->f); 01521 #endif 01522 01523 /* 処理を1フレーム進める */ 01524 /* proceed one frame */ 01525 01526 switch(proceed_one_frame(recog)) { 01527 case -1: /* error */ 01528 return -1; 01529 case 0: /* normal */ 01530 break; 01531 case 1: /* segmented by process */ 01532 return 2; 01533 } 01534 01535 /* 1フレーム処理が進んだのでポインタを進める */ 01536 /* proceed frame pointer */ 01537 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01538 if (!mfcc->valid) continue; 01539 mfcc->f++; 01540 } 01541 } 01542 01543 /* check if input end */ 01544 switch(ret) { 01545 case -1: /* end of input */ 01546 return 0; 01547 case -2: /* error */ 01548 return -1; 01549 case -3: /* end of segment request */ 01550 return 1; 01551 } 01552 } 01553 /* 与えられた音声セグメントに対する認識処理が全て終了 01554 呼び出し元に, 入力を続けるよう伝える */ 01555 /* input segment is fully processed 01556 tell the caller to continue input */ 01557 return(1); 01558 } 01559 01560 /* end of file */ 01561 01562