Julius 4.2
|
00001 00117 /* 00118 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00119 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00120 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00121 * All rights reserved 00122 */ 00123 00124 #include <julius/julius.h> 00125 00126 #undef RDEBUG ///< Define if you want local debug message 00127 00158 static void 00159 init_param(MFCCCalc *mfcc) 00160 { 00161 Value *para; 00162 00163 para = mfcc->para; 00164 00165 /* これから計算されるパラメータの型をヘッダに設定 */ 00166 /* set header types */ 00167 mfcc->param->header.samptype = F_MFCC; 00168 if (para->delta) mfcc->param->header.samptype |= F_DELTA; 00169 if (para->acc) mfcc->param->header.samptype |= F_ACCL; 00170 if (para->energy) mfcc->param->header.samptype |= F_ENERGY; 00171 if (para->c0) mfcc->param->header.samptype |= F_ZEROTH; 00172 if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP; 00173 if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM; 00174 00175 mfcc->param->header.wshift = para->smp_period * para->frameshift; 00176 mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */ 00177 mfcc->param->veclen = para->veclen; 00178 00179 /* 認識処理中/終了後にセットされる変数: 00180 param->parvec (パラメータベクトル系列) 00181 param->header.samplenum, param->samplenum (全フレーム数) 00182 */ 00183 /* variables that will be set while/after computation has been done: 00184 param->parvec (parameter vector sequence) 00185 param->header.samplenum, param->samplenum (total number of frames) 00186 */ 00187 /* MAP-CMN の初期化 */ 00188 /* Prepare for MAP-CMN */ 00189 if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk); 00190 } 00191 00219 boolean 00220 RealTimeInit(Recog *recog) 00221 { 00222 Value *para; 00223 Jconf *jconf; 00224 RealBeam *r; 00225 MFCCCalc *mfcc; 00226 00227 00228 jconf = recog->jconf; 00229 r = &(recog->real); 00230 00231 /* 最大フレーム長を最大入力時間数から計算 */ 00232 /* set maximum allowed frame length */ 00233 r->maxframelen = MAXSPEECHLEN / recog->jconf->input.frameshift; 00234 00235 /* -ssload 指定時, SS用のノイズスペクトルをファイルから読み込む */ 00236 /* if "-ssload", load noise spectrum for spectral subtraction from file */ 00237 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00238 if (mfcc->frontend.ssload_filename && mfcc->frontend.ssbuf == NULL) { 00239 if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) { 00240 jlog("ERROR: failed to read \"%s\"\n", mfcc->frontend.ssload_filename); 00241 return FALSE; 00242 } 00243 /* check ssbuf length */ 00244 if (mfcc->frontend.sslen != mfcc->wrk->bflen) { 00245 jlog("ERROR: noise spectrum length not match\n"); 00246 return FALSE; 00247 } 00248 mfcc->wrk->ssbuf = mfcc->frontend.ssbuf; 00249 mfcc->wrk->ssbuflen = mfcc->frontend.sslen; 00250 mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha; 00251 mfcc->wrk->ss_floor = mfcc->frontend.ss_floor; 00252 } 00253 } 00254 00255 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00256 00257 para = mfcc->para; 00258 00259 /* 対数エネルギー正規化のための初期値 */ 00260 /* set initial value for log energy normalization */ 00261 if (para->energy && para->enormal) energy_max_init(&(mfcc->ewrk)); 00262 /* デルタ計算のためのサイクルバッファを用意 */ 00263 /* initialize cycle buffers for delta and accel coef. computation */ 00264 if (para->delta) mfcc->db = WMP_deltabuf_new(para->baselen, para->delWin); 00265 if (para->acc) mfcc->ab = WMP_deltabuf_new(para->baselen * 2, para->accWin); 00266 /* デルタ計算のためのワークエリアを確保 */ 00267 /* allocate work area for the delta computation */ 00268 mfcc->tmpmfcc = (VECT *)mymalloc(sizeof(VECT) * para->vecbuflen); 00269 /* MAP-CMN 用の初期ケプストラム平均を読み込んで初期化する */ 00270 /* Initialize the initial cepstral mean data from file for MAP-CMN */ 00271 if (para->cmn || para->cvn) mfcc->cmn.wrk = CMN_realtime_new(para, mfcc->cmn.map_weight); 00272 /* -cmnload 指定時, CMN用のケプストラム平均の初期値をファイルから読み込む */ 00273 /* if "-cmnload", load initial cepstral mean data from file for CMN */ 00274 if (mfcc->cmn.load_filename) { 00275 if (para->cmn) { 00276 if ((mfcc->cmn.loaded = CMN_load_from_file(mfcc->cmn.wrk, mfcc->cmn.load_filename))== FALSE) { 00277 jlog("WARNING: failed to read initial cepstral mean from \"%s\", do flat start\n", mfcc->cmn.load_filename); 00278 } 00279 } else { 00280 jlog("WARNING: CMN not required on AM, file \"%s\" ignored\n", mfcc->cmn.load_filename); 00281 } 00282 } 00283 00284 } 00285 /* 窓長をセット */ 00286 /* set window length */ 00287 r->windowlen = recog->jconf->input.framesize + 1; 00288 /* 窓かけ用バッファを確保 */ 00289 /* set window buffer */ 00290 r->window = mymalloc(sizeof(SP16) * r->windowlen); 00291 00292 return TRUE; 00293 } 00294 00319 void 00320 reset_mfcc(Recog *recog) 00321 { 00322 Value *para; 00323 MFCCCalc *mfcc; 00324 RealBeam *r; 00325 00326 r = &(recog->real); 00327 00328 /* 特徴抽出モジュールを初期化 */ 00329 /* initialize parameter extraction module */ 00330 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00331 00332 para = mfcc->para; 00333 00334 /* 対数エネルギー正規化のための初期値をセット */ 00335 /* set initial value for log energy normalization */ 00336 if (para->energy && para->enormal) energy_max_prepare(&(mfcc->ewrk), para); 00337 /* デルタ計算用バッファを準備 */ 00338 /* set the delta cycle buffer */ 00339 if (para->delta) WMP_deltabuf_prepare(mfcc->db); 00340 if (para->acc) WMP_deltabuf_prepare(mfcc->ab); 00341 } 00342 00343 } 00344 00371 boolean 00372 RealTimePipeLinePrepare(Recog *recog) 00373 { 00374 RealBeam *r; 00375 PROCESS_AM *am; 00376 MFCCCalc *mfcc; 00377 #ifdef SPSEGMENT_NAIST 00378 RecogProcess *p; 00379 #endif 00380 00381 r = &(recog->real); 00382 00383 /* 計算用の変数を初期化 */ 00384 /* initialize variables for computation */ 00385 r->windownum = 0; 00386 /* parameter check */ 00387 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00388 /* パラメータ初期化 */ 00389 /* parameter initialization */ 00390 if (recog->jconf->input.speech_input == SP_MFCMODULE) { 00391 if (mfc_module_set_header(mfcc, recog) == FALSE) return FALSE; 00392 } else { 00393 init_param(mfcc); 00394 } 00395 /* フレームごとのパラメータベクトル保存の領域を確保 */ 00396 /* あとで必要に応じて伸長される */ 00397 if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) { 00398 j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n"); 00399 } 00400 /* フレーム数をリセット */ 00401 /* reset frame count */ 00402 mfcc->f = 0; 00403 } 00404 /* 準備した param 構造体のデータのパラメータ型を音響モデルとチェックする */ 00405 /* check type coherence between param and hmminfo here */ 00406 if (recog->jconf->input.paramtype_check_flag) { 00407 for(am=recog->amlist;am;am=am->next) { 00408 if (!check_param_coherence(am->hmminfo, am->mfcc->param)) { 00409 jlog("ERROR: input parameter type does not match AM\n"); 00410 return FALSE; 00411 } 00412 } 00413 } 00414 00415 /* 計算用のワークエリアを準備 */ 00416 /* prepare work area for calculation */ 00417 if (recog->jconf->input.type == INPUT_WAVEFORM) { 00418 reset_mfcc(recog); 00419 } 00420 /* 音響尤度計算用キャッシュを準備 */ 00421 /* prepare cache area for acoustic computation of HMM states and mixtures */ 00422 for(am=recog->amlist;am;am=am->next) { 00423 outprob_prepare(&(am->hmmwrk), r->maxframelen); 00424 } 00425 00426 #ifdef BACKEND_VAD 00427 if (recog->jconf->decodeopt.segment) { 00428 /* initialize segmentation parameters */ 00429 spsegment_init(recog); 00430 } 00431 #else 00432 recog->triggered = FALSE; 00433 #endif 00434 00435 #ifdef DEBUG_VTLN_ALPHA_TEST 00436 /* store speech */ 00437 recog->speechlen = 0; 00438 #endif 00439 00440 return TRUE; 00441 } 00442 00475 boolean 00476 RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen) 00477 { 00478 int i; 00479 boolean ret; 00480 VECT *tmpmfcc; 00481 Value *para; 00482 00483 tmpmfcc = mfcc->tmpmfcc; 00484 para = mfcc->para; 00485 00486 /* 音声波形から base MFCC を計算 (recog->mfccwrk を利用) */ 00487 /* calculate base MFCC from waveform (use recog->mfccwrk) */ 00488 for (i=0; i < windowlen; i++) { 00489 mfcc->wrk->bf[i+1] = (float) window[i]; 00490 } 00491 WMP_calc(mfcc->wrk, tmpmfcc, para); 00492 00493 if (para->energy && para->enormal) { 00494 /* 対数エネルギー項を正規化する */ 00495 /* normalize log energy */ 00496 /* リアルタイム入力では発話ごとの最大エネルギーが得られないので 00497 直前の発話のパワーで代用する */ 00498 /* Since the maximum power of the whole input utterance cannot be 00499 obtained at real-time input, the maximum of last input will be 00500 used to normalize. 00501 */ 00502 tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para); 00503 } 00504 00505 if (para->delta) { 00506 /* デルタを計算する */ 00507 /* calc delta coefficients */ 00508 ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc); 00509 #ifdef RDEBUG 00510 printf("DeltaBuf: ret=%d, status=", ret); 00511 for(i=0;i<mfcc->db->len;i++) { 00512 printf("%d", mfcc->db->is_on[i]); 00513 } 00514 printf(", nextstore=%d\n", mfcc->db->store); 00515 #endif 00516 /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */ 00517 /* if ret == FALSE, there is no available frame. So just wait for 00518 next input */ 00519 if (! ret) { 00520 return FALSE; 00521 } 00522 00523 /* db->vec に現在の元データとデルタ係数が入っているので tmpmfcc にコピー */ 00524 /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */ 00525 memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2); 00526 } 00527 00528 if (para->acc) { 00529 /* Accelerationを計算する */ 00530 /* calc acceleration coefficients */ 00531 /* base+delta をそのまま入れる */ 00532 /* send the whole base+delta to the cycle buffer */ 00533 ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc); 00534 #ifdef RDEBUG 00535 printf("AccelBuf: ret=%d, status=", ret); 00536 for(i=0;i<mfcc->ab->len;i++) { 00537 printf("%d", mfcc->ab->is_on[i]); 00538 } 00539 printf(", nextstore=%d\n", mfcc->ab->store); 00540 #endif 00541 /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */ 00542 /* if ret == FALSE, there is no available frame. So just wait for 00543 next input */ 00544 if (! ret) { 00545 return FALSE; 00546 } 00547 /* ab->vec には,(base+delta) とその差分係数が入っている. 00548 [base] [delta] [delta] [acc] の順で入っているので, 00549 [base] [delta] [acc] を tmpmfcc にコピーする. */ 00550 /* now ab->vec holds the current (base+delta) and their delta coef. 00551 it holds a vector in the order of [base] [delta] [delta] [acc], 00552 so copy the [base], [delta] and [acc] to tmpmfcc. */ 00553 memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2); 00554 memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen); 00555 } 00556 00557 #ifdef POWER_REJECT 00558 if (para->energy || para->c0) { 00559 mfcc->avg_power += tmpmfcc[para->baselen-1]; 00560 } 00561 #endif 00562 00563 if (para->delta && (para->energy || para->c0) && para->absesup) { 00564 /* 絶対値パワーを除去 */ 00565 /* suppress absolute power */ 00566 memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen)); 00567 } 00568 00569 /* この時点で tmpmfcc に現時点での最新の特徴ベクトルが格納されている */ 00570 /* tmpmfcc[] now holds the latest parameter vector */ 00571 00572 /* CMN を計算 */ 00573 /* perform CMN */ 00574 if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc); 00575 00576 return TRUE; 00577 } 00578 00579 static int 00580 proceed_one_frame(Recog *recog) 00581 { 00582 MFCCCalc *mfcc; 00583 RealBeam *r; 00584 int maxf; 00585 PROCESS_AM *am; 00586 int rewind_frame; 00587 boolean reprocess; 00588 boolean ok_p; 00589 00590 r = &(recog->real); 00591 00592 /* call recognition start callback */ 00593 ok_p = FALSE; 00594 maxf = 0; 00595 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00596 if (!mfcc->valid) continue; 00597 if (maxf < mfcc->f) maxf = mfcc->f; 00598 if (mfcc->f == 0) { 00599 ok_p = TRUE; 00600 } 00601 } 00602 if (ok_p && maxf == 0) { 00603 /* call callback when at least one of MFCC has initial frame */ 00604 if (recog->jconf->decodeopt.segment) { 00605 #ifdef BACKEND_VAD 00606 /* not exec pass1 begin callback here */ 00607 #else 00608 if (!recog->process_segment) { 00609 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00610 } 00611 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 00612 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00613 recog->triggered = TRUE; 00614 #endif 00615 } else { 00616 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00617 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00618 recog->triggered = TRUE; 00619 } 00620 } 00621 /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ 00622 switch (decode_proceed(recog)) { 00623 case -1: /* error */ 00624 return -1; 00625 break; 00626 case 0: /* success */ 00627 break; 00628 case 1: /* segmented */ 00629 /* 認識処理のセグメント要求で終わったことをフラグにセット */ 00630 /* set flag which indicates that the input has ended with segmentation request */ 00631 r->last_is_segmented = TRUE; 00632 /* tell the caller to be segmented by this function */ 00633 /* 呼び出し元に,ここで入力を切るよう伝える */ 00634 return 1; 00635 } 00636 #ifdef BACKEND_VAD 00637 /* check up trigger in case of VAD segmentation */ 00638 if (recog->jconf->decodeopt.segment) { 00639 if (recog->triggered == FALSE) { 00640 if (spsegment_trigger_sync(recog)) { 00641 if (!recog->process_segment) { 00642 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00643 } 00644 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 00645 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00646 recog->triggered = TRUE; 00647 } 00648 } 00649 } 00650 #endif 00651 00652 if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) { 00653 /* set total length to the current frame */ 00654 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00655 if (!mfcc->valid) continue; 00656 mfcc->param->header.samplenum = mfcc->f + 1; 00657 mfcc->param->samplenum = mfcc->f + 1; 00658 } 00659 /* do rewind for all mfcc here */ 00660 spsegment_restart_mfccs(recog, rewind_frame, reprocess); 00661 /* also tell adin module to rehash the concurrent audio input */ 00662 recog->adin->rehash = TRUE; 00663 /* reset outprob cache for all AM */ 00664 for(am=recog->amlist;am;am=am->next) { 00665 outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); 00666 } 00667 if (reprocess) { 00668 /* process the backstep MFCCs here */ 00669 while(1) { 00670 ok_p = TRUE; 00671 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00672 if (! mfcc->valid) continue; 00673 mfcc->f++; 00674 if (mfcc->f < mfcc->param->samplenum) { 00675 mfcc->valid = TRUE; 00676 ok_p = FALSE; 00677 } else { 00678 mfcc->valid = FALSE; 00679 } 00680 } 00681 if (ok_p) { 00682 /* すべての MFCC が終わりに達したのでループ終了 */ 00683 /* all MFCC has been processed, end of loop */ 00684 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00685 if (! mfcc->valid) continue; 00686 mfcc->f--; 00687 } 00688 break; 00689 } 00690 /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ 00691 switch (decode_proceed(recog)) { 00692 case -1: /* error */ 00693 return -1; 00694 break; 00695 case 0: /* success */ 00696 break; 00697 case 1: /* segmented */ 00698 /* ignore segmentation while in the backstep segment */ 00699 break; 00700 } 00701 /* call frame-wise callback */ 00702 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 00703 } 00704 } 00705 } 00706 /* call frame-wise callback if at least one of MFCC is valid at this frame */ 00707 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00708 if (mfcc->valid) { 00709 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 00710 break; 00711 } 00712 } 00713 00714 return 0; 00715 } 00716 00717 00787 int 00788 RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = input */ 00789 { 00790 int i, now, ret; 00791 MFCCCalc *mfcc; 00792 RealBeam *r; 00793 00794 r = &(recog->real); 00795 00796 #ifdef DEBUG_VTLN_ALPHA_TEST 00797 /* store speech */ 00798 adin_cut_callback_store_buffer(Speech, nowlen, recog); 00799 #endif 00800 00801 /* window[0..windownum-1] は前回の呼び出しで残った音声データが格納されている */ 00802 /* window[0..windownum-1] are speech data left from previous call */ 00803 00804 /* 処理用ポインタを初期化 */ 00805 /* initialize pointer for local processing */ 00806 now = 0; 00807 00808 /* 認識処理がセグメント要求で終わったのかどうかのフラグをリセット */ 00809 /* reset flag which indicates whether the input has ended with segmentation request */ 00810 r->last_is_segmented = FALSE; 00811 00812 #ifdef RDEBUG 00813 printf("got %d samples\n", nowlen); 00814 #endif 00815 00816 while (now < nowlen) { /* till whole input is processed */ 00817 /* 入力長が maxframelen に達したらここで強制終了 */ 00818 /* if input length reaches maximum buffer size, terminate 1st pass here */ 00819 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00820 if (mfcc->f >= r->maxframelen) return(1); 00821 } 00822 /* 窓バッファを埋められるだけ埋める */ 00823 /* fill window buffer as many as possible */ 00824 for(i = min(r->windowlen - r->windownum, nowlen - now); i > 0 ; i--) 00825 r->window[r->windownum++] = (float) Speech[now++]; 00826 /* もし窓バッファが埋まらなければ, このセグメントの処理はここで終わる. 00827 処理されなかったサンプル (window[0..windownum-1]) は次回に持ち越し. */ 00828 /* if window buffer was not filled, end processing here, keeping the 00829 rest samples (window[0..windownum-1]) in the window buffer. */ 00830 if (r->windownum < r->windowlen) break; 00831 #ifdef RDEBUG 00832 /* printf("%d used, %d rest\n", now, nowlen - now); 00833 00834 printf("[f = %d]\n", f);*/ 00835 #endif 00836 00837 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00838 mfcc->valid = FALSE; 00839 /* 窓内の音声波形から特徴量を計算して r->tmpmfcc に格納 */ 00840 /* calculate a parameter vector from current waveform windows 00841 and store to r->tmpmfcc */ 00842 if ((*(recog->calc_vector))(mfcc, r->window, r->windowlen)) { 00843 #ifdef ENABLE_PLUGIN 00844 /* call post-process plugin if exist */ 00845 plugin_exec_vector_postprocess(mfcc->tmpmfcc, mfcc->param->veclen, mfcc->f); 00846 #endif 00847 /* MFCC完成,登録 */ 00848 mfcc->valid = TRUE; 00849 /* now get the MFCC vector of current frame, now store it to param */ 00850 if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) { 00851 jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n"); 00852 return -1; 00853 } 00854 memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen); 00855 #ifdef RDEBUG 00856 printf("DeltaBuf: %02d: got frame %d\n", mfcc->id, mfcc->f); 00857 #endif 00858 } 00859 } 00860 00861 /* 処理を1フレーム進める */ 00862 /* proceed one frame */ 00863 ret = proceed_one_frame(recog); 00864 00865 if (ret == 1 && recog->jconf->decodeopt.segment) { 00866 /* ショートポーズセグメンテーション: バッファに残っているデータを 00867 別に保持して,次回の最初に処理する */ 00868 /* short pause segmentation: there is some data left in buffer, so 00869 we should keep them for next processing */ 00870 r->rest_len = nowlen - now; 00871 if (r->rest_len > 0) { 00872 /* copy rest samples to rest_Speech */ 00873 if (r->rest_Speech == NULL) { 00874 r->rest_alloc_len = r->rest_len; 00875 r->rest_Speech = (SP16 *)mymalloc(sizeof(SP16)*r->rest_alloc_len); 00876 } else if (r->rest_alloc_len < r->rest_len) { 00877 r->rest_alloc_len = r->rest_len; 00878 r->rest_Speech = (SP16 *)myrealloc(r->rest_Speech, sizeof(SP16)*r->rest_alloc_len); 00879 } 00880 memcpy(r->rest_Speech, &(Speech[now]), sizeof(SP16) * r->rest_len); 00881 } 00882 } 00883 if (ret != 0) return ret; 00884 00885 /* 1フレーム処理が進んだのでポインタを進める */ 00886 /* proceed frame pointer */ 00887 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00888 if (!mfcc->valid) continue; 00889 mfcc->f++; 00890 } 00891 00892 /* 窓バッファを処理が終わった分シフト */ 00893 /* shift window */ 00894 memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift)); 00895 r->windownum -= recog->jconf->input.frameshift; 00896 } 00897 00898 /* 与えられた音声セグメントに対する認識処理が全て終了 00899 呼び出し元に, 入力を続けるよう伝える */ 00900 /* input segment is fully processed 00901 tell the caller to continue input */ 00902 return(0); 00903 } 00904 00938 int 00939 RealTimeResume(Recog *recog) 00940 { 00941 MFCCCalc *mfcc; 00942 RealBeam *r; 00943 boolean ok_p; 00944 #ifdef SPSEGMENT_NAIST 00945 RecogProcess *p; 00946 #endif 00947 PROCESS_AM *am; 00948 00949 r = &(recog->real); 00950 00951 /* 計算用のワークエリアを準備 */ 00952 /* prepare work area for calculation */ 00953 if (recog->jconf->input.type == INPUT_WAVEFORM) { 00954 reset_mfcc(recog); 00955 } 00956 /* 音響尤度計算用キャッシュを準備 */ 00957 /* prepare cache area for acoustic computation of HMM states and mixtures */ 00958 for(am=recog->amlist;am;am=am->next) { 00959 outprob_prepare(&(am->hmmwrk), r->maxframelen); 00960 } 00961 00962 /* param にある全パラメータを処理する準備 */ 00963 /* prepare to process all data in param */ 00964 for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00965 if (mfcc->param->samplenum == 0) mfcc->valid = FALSE; 00966 else mfcc->valid = TRUE; 00967 #ifdef RDEBUG 00968 printf("Resume: %02d: f=%d\n", mfcc->id, mfcc->mfcc->param->samplenum-1); 00969 #endif 00970 /* フレーム数をリセット */ 00971 /* reset frame count */ 00972 mfcc->f = 0; 00973 /* MAP-CMN の初期化 */ 00974 /* Prepare for MAP-CMN */ 00975 if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk); 00976 } 00977 00978 #ifdef BACKEND_VAD 00979 if (recog->jconf->decodeopt.segment) { 00980 spsegment_init(recog); 00981 } 00982 /* not exec pass1 begin callback here */ 00983 #else 00984 recog->triggered = FALSE; 00985 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00986 if (!mfcc->valid) continue; 00987 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 00988 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00989 recog->triggered = TRUE; 00990 break; 00991 } 00992 #endif 00993 00994 /* param 内の全フレームについて認識処理を進める */ 00995 /* proceed recognition for all frames in param */ 00996 00997 while(1) { 00998 ok_p = TRUE; 00999 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01000 if (! mfcc->valid) continue; 01001 if (mfcc->f < mfcc->param->samplenum) { 01002 mfcc->valid = TRUE; 01003 ok_p = FALSE; 01004 } else { 01005 mfcc->valid = FALSE; 01006 } 01007 } 01008 if (ok_p) { 01009 /* すべての MFCC が終わりに達したのでループ終了 */ 01010 /* all MFCC has been processed, end of loop */ 01011 break; 01012 } 01013 01014 /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ 01015 switch (decode_proceed(recog)) { 01016 case -1: /* error */ 01017 return -1; 01018 break; 01019 case 0: /* success */ 01020 break; 01021 case 1: /* segmented */ 01022 /* segmented, end procs ([0..f])*/ 01023 r->last_is_segmented = TRUE; 01024 return 1; /* segmented by this function */ 01025 } 01026 01027 #ifdef BACKEND_VAD 01028 /* check up trigger in case of VAD segmentation */ 01029 if (recog->jconf->decodeopt.segment) { 01030 if (recog->triggered == FALSE) { 01031 if (spsegment_trigger_sync(recog)) { 01032 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 01033 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 01034 recog->triggered = TRUE; 01035 } 01036 } 01037 } 01038 #endif 01039 01040 /* call frame-wise callback */ 01041 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 01042 01043 /* 1フレーム処理が進んだのでポインタを進める */ 01044 /* proceed frame pointer */ 01045 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01046 if (!mfcc->valid) continue; 01047 mfcc->f++; 01048 } 01049 01050 } 01051 /* 前回のセグメント時に入力をシフトしていない分をシフトする */ 01052 /* do the last shift here */ 01053 if (recog->jconf->input.type == INPUT_WAVEFORM) { 01054 memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift)); 01055 r->windownum -= recog->jconf->input.frameshift; 01056 /* これで再開の準備が整ったので,まずは前回の処理で残っていた音声データから 01057 処理する */ 01058 /* now that the search status has been prepared for the next input, we 01059 first process the rest unprocessed samples at the last session */ 01060 if (r->rest_len > 0) { 01061 return(RealTimePipeLine(r->rest_Speech, r->rest_len, recog)); 01062 } 01063 } 01064 01065 /* 新規の入力に対して認識処理は続く… */ 01066 /* the recognition process will continue for the newly incoming samples... */ 01067 return 0; 01068 01069 } 01070 01071 01105 boolean 01106 RealTimeParam(Recog *recog) 01107 { 01108 boolean ret1, ret2; 01109 RealBeam *r; 01110 int ret; 01111 int maxf; 01112 boolean ok_p; 01113 MFCCCalc *mfcc; 01114 Value *para; 01115 #ifdef RDEBUG 01116 int i; 01117 #endif 01118 01119 r = &(recog->real); 01120 01121 if (r->last_is_segmented) { 01122 01123 /* RealTimePipeLine で認識処理側の理由により認識が中断した場合, 01124 現状態のMFCC計算データをそのまま次回へ保持する必要があるので, 01125 MFCC計算終了処理を行わずに第1パスの結果のみ出力して終わる. */ 01126 /* When input segmented by recognition process in RealTimePipeLine(), 01127 we have to keep the whole current status of MFCC computation to the 01128 next call. So here we only output the 1st pass result. */ 01129 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01130 mfcc->param->header.samplenum = mfcc->f + 1;/* len = lastid + 1 */ 01131 mfcc->param->samplenum = mfcc->f + 1; 01132 } 01133 decode_end_segmented(recog); 01134 01135 /* この区間の param データを第2パスのために返す */ 01136 /* return obtained parameter for 2nd pass */ 01137 return(TRUE); 01138 } 01139 01140 if (recog->jconf->input.type == INPUT_VECTOR) { 01141 /* finalize real-time 1st pass */ 01142 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01143 mfcc->param->header.samplenum = mfcc->f; 01144 mfcc->param->samplenum = mfcc->f; 01145 } 01146 /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ 01147 decode_end(recog); 01148 return TRUE; 01149 } 01150 01151 /* MFCC計算の終了処理を行う: 最後の遅延フレーム分を処理 */ 01152 /* finish MFCC computation for the last delayed frames */ 01153 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01154 if (mfcc->para->delta || mfcc->para->acc) { 01155 mfcc->valid = TRUE; 01156 } else { 01157 mfcc->valid = FALSE; 01158 } 01159 } 01160 01161 /* loop until all data has been flushed */ 01162 while (1) { 01163 01164 /* if all mfcc became invalid, exit loop here */ 01165 ok_p = FALSE; 01166 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01167 if (mfcc->valid) { 01168 ok_p = TRUE; 01169 break; 01170 } 01171 } 01172 if (!ok_p) break; 01173 01174 /* try to get 1 frame for all mfcc instances */ 01175 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01176 01177 para = mfcc->para; 01178 01179 if (! mfcc->valid) continue; 01180 01181 /* check if there is data in cycle buffer of delta */ 01182 ret1 = WMP_deltabuf_flush(mfcc->db); 01183 #ifdef RDEBUG 01184 printf("DeltaBufLast: ret=%d, status=", ret1); 01185 for(i=0;i<mfcc->db->len;i++) { 01186 printf("%d", mfcc->db->is_on[i]); 01187 } 01188 printf(", nextstore=%d\n", mfcc->db->store); 01189 #endif 01190 if (ret1) { 01191 /* uncomputed delta has flushed, compute it with tmpmfcc */ 01192 if (para->energy && para->absesup) { 01193 memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * (para->baselen - 1)); 01194 memcpy(&(mfcc->tmpmfcc[para->baselen-1]), &(mfcc->db->vec[para->baselen]), sizeof(VECT) * para->baselen); 01195 } else { 01196 memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2); 01197 } 01198 if (para->acc) { 01199 /* this new delta should be given to the accel cycle buffer */ 01200 ret2 = WMP_deltabuf_proceed(mfcc->ab, mfcc->tmpmfcc); 01201 #ifdef RDEBUG 01202 printf("AccelBuf: ret=%d, status=", ret2); 01203 for(i=0;i<mfcc->ab->len;i++) { 01204 printf("%d", mfcc->ab->is_on[i]); 01205 } 01206 printf(", nextstore=%d\n", mfcc->ab->store); 01207 #endif 01208 if (ret2) { 01209 /* uncomputed accel was given, compute it with tmpmfcc */ 01210 memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen)); 01211 memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen); 01212 } else { 01213 /* still no input is given: */ 01214 /* in case of very short input: go on to the next input */ 01215 continue; 01216 } 01217 } 01218 01219 } else { 01220 01221 /* no data left in the delta buffer */ 01222 if (para->acc) { 01223 /* no new data, just flush the accel buffer */ 01224 ret2 = WMP_deltabuf_flush(mfcc->ab); 01225 #ifdef RDEBUG 01226 printf("AccelBuf: ret=%d, status=", ret2); 01227 for(i=0;i<mfcc->ab->len;i++) { 01228 printf("%d", mfcc->ab->is_on[i]); 01229 } 01230 printf(", nextstore=%d\n", mfcc->ab->store); 01231 #endif 01232 if (ret2) { 01233 /* uncomputed data has flushed, compute it with tmpmfcc */ 01234 memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen)); 01235 memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen); 01236 } else { 01237 /* actually no data exists in both delta and accel */ 01238 mfcc->valid = FALSE; /* disactivate this instance */ 01239 continue; /* end this loop */ 01240 } 01241 } else { 01242 /* only delta: input fully flushed */ 01243 mfcc->valid = FALSE; /* disactivate this instance */ 01244 continue; /* end this loop */ 01245 } 01246 } 01247 /* a new frame has been obtained from delta buffer to tmpmfcc */ 01248 if(para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, mfcc->tmpmfcc); 01249 if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) { 01250 jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n"); 01251 return FALSE; 01252 } 01253 /* store to mfcc->f */ 01254 memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen); 01255 #ifdef ENABLE_PLUGIN 01256 /* call postprocess plugin if any */ 01257 plugin_exec_vector_postprocess(mfcc->param->parvec[mfcc->f], mfcc->param->veclen, mfcc->f); 01258 #endif 01259 } 01260 01261 /* call recognition start callback */ 01262 ok_p = FALSE; 01263 maxf = 0; 01264 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01265 if (!mfcc->valid) continue; 01266 if (maxf < mfcc->f) maxf = mfcc->f; 01267 if (mfcc->f == 0) { 01268 ok_p = TRUE; 01269 } 01270 } 01271 01272 if (ok_p && maxf == 0) { 01273 /* call callback when at least one of MFCC has initial frame */ 01274 if (recog->jconf->decodeopt.segment) { 01275 #ifdef BACKEND_VAD 01276 /* not exec pass1 begin callback here */ 01277 #else 01278 if (!recog->process_segment) { 01279 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 01280 } 01281 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 01282 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 01283 recog->triggered = TRUE; 01284 #endif 01285 } else { 01286 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 01287 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 01288 recog->triggered = TRUE; 01289 } 01290 } 01291 01292 /* proceed for the curent frame */ 01293 ret = decode_proceed(recog); 01294 if (ret == -1) { /* error */ 01295 return -1; 01296 } else if (ret == 1) { /* segmented */ 01297 /* loop out */ 01298 break; 01299 } /* else no event occured */ 01300 01301 #ifdef BACKEND_VAD 01302 /* check up trigger in case of VAD segmentation */ 01303 if (recog->jconf->decodeopt.segment) { 01304 if (recog->triggered == FALSE) { 01305 if (spsegment_trigger_sync(recog)) { 01306 if (!recog->process_segment) { 01307 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 01308 } 01309 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 01310 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 01311 recog->triggered = TRUE; 01312 } 01313 } 01314 } 01315 #endif 01316 01317 /* call frame-wise callback */ 01318 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 01319 01320 /* move to next */ 01321 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01322 if (! mfcc->valid) continue; 01323 mfcc->f++; 01324 if (mfcc->f > r->maxframelen) mfcc->valid = FALSE; 01325 } 01326 } 01327 01328 /* finalize real-time 1st pass */ 01329 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01330 mfcc->param->header.samplenum = mfcc->f; 01331 mfcc->param->samplenum = mfcc->f; 01332 } 01333 /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ 01334 decode_end(recog); 01335 01336 return(TRUE); 01337 } 01338 01357 void 01358 RealTimeCMNUpdate(MFCCCalc *mfcc, Recog *recog) 01359 { 01360 boolean cmn_update_p; 01361 Value *para; 01362 Jconf *jconf; 01363 RecogProcess *r; 01364 01365 jconf = recog->jconf; 01366 para = mfcc->para; 01367 01368 /* update CMN vector for next speech */ 01369 if(para->cmn) { 01370 if (mfcc->cmn.update) { 01371 cmn_update_p = TRUE; 01372 for(r=recog->process_list;r;r=r->next) { 01373 if (!r->live) continue; 01374 if (r->am->mfcc != mfcc) continue; 01375 if (r->result.status < 0) { /* input rejected */ 01376 cmn_update_p = FALSE; 01377 break; 01378 } 01379 } 01380 if (cmn_update_p) { 01381 /* update last CMN parameter for next spech */ 01382 CMN_realtime_update(mfcc->cmn.wrk, mfcc->param); 01383 } else { 01384 /* do not update, because the last input is bogus */ 01385 if (verbose_flag) { 01386 #ifdef BACKEND_VAD 01387 if (!recog->jconf->decodeopt.segment || recog->triggered) { 01388 jlog("STAT: skip CMN parameter update since last input was invalid\n"); 01389 } 01390 #else 01391 jlog("STAT: skip CMN parameter update since last input was invalid\n"); 01392 #endif 01393 } 01394 } 01395 } 01396 /* if needed, save the updated CMN parameter to a file */ 01397 if (mfcc->cmn.save_filename) { 01398 if (CMN_save_to_file(mfcc->cmn.wrk, mfcc->cmn.save_filename) == FALSE) { 01399 jlog("WARNING: failed to save CMN parameter to \"%s\"\n", mfcc->cmn.save_filename); 01400 } 01401 } 01402 } 01403 } 01404 01417 void 01418 RealTimeTerminate(Recog *recog) 01419 { 01420 MFCCCalc *mfcc; 01421 01422 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01423 mfcc->param->header.samplenum = mfcc->f; 01424 mfcc->param->samplenum = mfcc->f; 01425 } 01426 01427 /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ 01428 decode_end(recog); 01429 } 01430 01442 void 01443 realbeam_free(Recog *recog) 01444 { 01445 RealBeam *r; 01446 01447 r = &(recog->real); 01448 01449 if (recog->real.window) { 01450 free(recog->real.window); 01451 recog->real.window = NULL; 01452 } 01453 if (recog->real.rest_Speech) { 01454 free(recog->real.rest_Speech); 01455 recog->real.rest_Speech = NULL; 01456 } 01457 } 01458 01459 01460 01461 /************************************************************************/ 01462 /************************************************************************/ 01463 /************************************************************************/ 01464 /************************************************************************/ 01465 01466 /* MFCC realtime input */ 01484 int 01485 mfcc_go(Recog *recog, int (*ad_check)(Recog *)) 01486 { 01487 RealBeam *r; 01488 MFCCCalc *mfcc; 01489 int new_f; 01490 int ret, ret3; 01491 01492 r = &(recog->real); 01493 01494 r->last_is_segmented = FALSE; 01495 01496 while(1/*in_data_vec*/) { 01497 01498 ret = mfc_module_read(recog->mfcclist, &new_f); 01499 01500 if (debug2_flag) { 01501 if (recog->mfcclist->f < new_f) { 01502 jlog("%d: %d (%d)\n", recog->mfcclist->f, new_f, ret); 01503 } 01504 } 01505 01506 /* callback poll */ 01507 if (ad_check != NULL) { 01508 if ((ret3 = (*(ad_check))(recog)) < 0) { 01509 if ((ret3 == -1 && recog->mfcclist->f == 0) || ret3 == -2) { 01510 return(-2); 01511 } 01512 } 01513 } 01514 01515 while(recog->mfcclist->f < new_f) { 01516 01517 recog->mfcclist->valid = TRUE; 01518 01519 #ifdef ENABLE_PLUGIN 01520 /* call post-process plugin if exist */ 01521 plugin_exec_vector_postprocess(recog->mfcclist->param->parvec[recog->mfcclist->f], recog->mfcclist->param->veclen, recog->mfcclist->f); 01522 #endif 01523 01524 /* 処理を1フレーム進める */ 01525 /* proceed one frame */ 01526 01527 switch(proceed_one_frame(recog)) { 01528 case -1: /* error */ 01529 return -1; 01530 case 0: /* normal */ 01531 break; 01532 case 1: /* segmented by process */ 01533 return 2; 01534 } 01535 01536 /* 1フレーム処理が進んだのでポインタを進める */ 01537 /* proceed frame pointer */ 01538 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 01539 if (!mfcc->valid) continue; 01540 mfcc->f++; 01541 } 01542 } 01543 01544 /* check if input end */ 01545 switch(ret) { 01546 case -1: /* end of input */ 01547 return 0; 01548 case -2: /* error */ 01549 return -1; 01550 case -3: /* end of segment request */ 01551 return 1; 01552 } 01553 } 01554 /* 与えられた音声セグメントに対する認識処理が全て終了 01555 呼び出し元に, 入力を続けるよう伝える */ 01556 /* input segment is fully processed 01557 tell the caller to continue input */ 01558 return(1); 01559 } 01560 01561 /* end of file */ 01562 01563