Julius: libjulius/src/realtime-1stpass.c ソースファイル

Julius 4.2
00001 
00117 /*
00118  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00119  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00120  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00121  * All rights reserved
00122  */
00123 
00124 #include <julius/julius.h>
00125 
00126 #undef RDEBUG                   ///< Define if you want local debug message
00127 
00158 static void
00159 init_param(MFCCCalc *mfcc)
00160 {
00161   Value *para;
00162 
00163   para = mfcc->para;
00164 
00165   /* これから計算されるパラメータの型をヘッダに設定 */
00166   /* set header types */
00167   mfcc->param->header.samptype = F_MFCC;
00168   if (para->delta) mfcc->param->header.samptype |= F_DELTA;
00169   if (para->acc) mfcc->param->header.samptype |= F_ACCL;
00170   if (para->energy) mfcc->param->header.samptype |= F_ENERGY;
00171   if (para->c0) mfcc->param->header.samptype |= F_ZEROTH;
00172   if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP;
00173   if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM;
00174   
00175   mfcc->param->header.wshift = para->smp_period * para->frameshift;
00176   mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */
00177   mfcc->param->veclen = para->veclen;
00178   
00179   /* 認識処理中/終了後にセットされる変数:
00180      param->parvec (パラメータベクトル系列)
00181      param->header.samplenum, param->samplenum (全フレーム数)
00182   */
00183   /* variables that will be set while/after computation has been done:
00184      param->parvec (parameter vector sequence)
00185      param->header.samplenum, param->samplenum (total number of frames)
00186   */
00187   /* MAP-CMN の初期化 */
00188   /* Prepare for MAP-CMN */
00189   if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk);
00190 }
00191 
00219 boolean
00220 RealTimeInit(Recog *recog)
00221 {
00222   Value *para;
00223   Jconf *jconf;
00224   RealBeam *r;
00225   MFCCCalc *mfcc;
00226 
00227 
00228   jconf = recog->jconf;
00229   r = &(recog->real);
00230 
00231   /* 最大フレーム長を最大入力時間数から計算 */
00232   /* set maximum allowed frame length */
00233   r->maxframelen = MAXSPEECHLEN / recog->jconf->input.frameshift;
00234 
00235   /* -ssload 指定時, SS用のノイズスペクトルをファイルから読み込む */
00236   /* if "-ssload", load noise spectrum for spectral subtraction from file */
00237   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00238     if (mfcc->frontend.ssload_filename && mfcc->frontend.ssbuf == NULL) {
00239       if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) {
00240         jlog("ERROR: failed to read \"%s\"\n", mfcc->frontend.ssload_filename);
00241         return FALSE;
00242       }
00243       /* check ssbuf length */
00244       if (mfcc->frontend.sslen != mfcc->wrk->bflen) {
00245         jlog("ERROR: noise spectrum length not match\n");
00246         return FALSE;
00247       }
00248       mfcc->wrk->ssbuf = mfcc->frontend.ssbuf;
00249       mfcc->wrk->ssbuflen = mfcc->frontend.sslen;
00250       mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha;
00251       mfcc->wrk->ss_floor = mfcc->frontend.ss_floor;
00252     }
00253   }
00254 
00255   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00256   
00257     para = mfcc->para;
00258 
00259     /* 対数エネルギー正規化のための初期値 */
00260     /* set initial value for log energy normalization */
00261     if (para->energy && para->enormal) energy_max_init(&(mfcc->ewrk));
00262     /* デルタ計算のためのサイクルバッファを用意 */
00263     /* initialize cycle buffers for delta and accel coef. computation */
00264     if (para->delta) mfcc->db = WMP_deltabuf_new(para->baselen, para->delWin);
00265     if (para->acc) mfcc->ab = WMP_deltabuf_new(para->baselen * 2, para->accWin);
00266     /* デルタ計算のためのワークエリアを確保 */
00267     /* allocate work area for the delta computation */
00268     mfcc->tmpmfcc = (VECT *)mymalloc(sizeof(VECT) * para->vecbuflen);
00269     /* MAP-CMN 用の初期ケプストラム平均を読み込んで初期化する */
00270     /* Initialize the initial cepstral mean data from file for MAP-CMN */
00271     if (para->cmn || para->cvn) mfcc->cmn.wrk = CMN_realtime_new(para, mfcc->cmn.map_weight);
00272     /* -cmnload 指定時, CMN用のケプストラム平均の初期値をファイルから読み込む */
00273     /* if "-cmnload", load initial cepstral mean data from file for CMN */
00274     if (mfcc->cmn.load_filename) {
00275       if (para->cmn) {
00276         if ((mfcc->cmn.loaded = CMN_load_from_file(mfcc->cmn.wrk, mfcc->cmn.load_filename))== FALSE) {
00277           jlog("WARNING: failed to read initial cepstral mean from \"%s\", do flat start\n", mfcc->cmn.load_filename);
00278         }
00279       } else {
00280         jlog("WARNING: CMN not required on AM, file \"%s\" ignored\n", mfcc->cmn.load_filename);
00281       }
00282     }
00283 
00284   }
00285   /* 窓長をセット */
00286   /* set window length */
00287   r->windowlen = recog->jconf->input.framesize + 1;
00288   /* 窓かけ用バッファを確保 */
00289   /* set window buffer */
00290   r->window = mymalloc(sizeof(SP16) * r->windowlen);
00291 
00292   return TRUE;
00293 }
00294 
00319 void
00320 reset_mfcc(Recog *recog) 
00321 {
00322   Value *para;
00323   MFCCCalc *mfcc;
00324   RealBeam *r;
00325 
00326   r = &(recog->real);
00327 
00328   /* 特徴抽出モジュールを初期化 */
00329   /* initialize parameter extraction module */
00330   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00331 
00332     para = mfcc->para;
00333 
00334     /* 対数エネルギー正規化のための初期値をセット */
00335     /* set initial value for log energy normalization */
00336     if (para->energy && para->enormal) energy_max_prepare(&(mfcc->ewrk), para);
00337     /* デルタ計算用バッファを準備 */
00338     /* set the delta cycle buffer */
00339     if (para->delta) WMP_deltabuf_prepare(mfcc->db);
00340     if (para->acc) WMP_deltabuf_prepare(mfcc->ab);
00341   }
00342 
00343 }
00344 
00371 boolean
00372 RealTimePipeLinePrepare(Recog *recog)
00373 {
00374   RealBeam *r;
00375   PROCESS_AM *am;
00376   MFCCCalc *mfcc;
00377 #ifdef SPSEGMENT_NAIST
00378   RecogProcess *p;
00379 #endif
00380 
00381   r = &(recog->real);
00382 
00383   /* 計算用の変数を初期化 */
00384   /* initialize variables for computation */
00385   r->windownum = 0;
00386   /* parameter check */
00387   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00388     /* パラメータ初期化 */
00389     /* parameter initialization */
00390     if (recog->jconf->input.speech_input == SP_MFCMODULE) {
00391       if (mfc_module_set_header(mfcc, recog) == FALSE) return FALSE;
00392     } else {
00393       init_param(mfcc);
00394     }
00395     /* フレームごとのパラメータベクトル保存の領域を確保 */
00396     /* あとで必要に応じて伸長される */
00397     if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) {
00398       j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n");
00399     }
00400     /* フレーム数をリセット */
00401     /* reset frame count */
00402     mfcc->f = 0;
00403   }
00404   /* 準備した param 構造体のデータのパラメータ型を音響モデルとチェックする */
00405   /* check type coherence between param and hmminfo here */
00406   if (recog->jconf->input.paramtype_check_flag) {
00407     for(am=recog->amlist;am;am=am->next) {
00408       if (!check_param_coherence(am->hmminfo, am->mfcc->param)) {
00409         jlog("ERROR: input parameter type does not match AM\n");
00410         return FALSE;
00411       }
00412     }
00413   }
00414 
00415   /* 計算用のワークエリアを準備 */
00416   /* prepare work area for calculation */
00417   if (recog->jconf->input.type == INPUT_WAVEFORM) {
00418     reset_mfcc(recog);
00419   }
00420   /* 音響尤度計算用キャッシュを準備 */
00421   /* prepare cache area for acoustic computation of HMM states and mixtures */
00422   for(am=recog->amlist;am;am=am->next) {
00423     outprob_prepare(&(am->hmmwrk), r->maxframelen);
00424   }
00425 
00426 #ifdef BACKEND_VAD
00427   if (recog->jconf->decodeopt.segment) {
00428     /* initialize segmentation parameters */
00429     spsegment_init(recog);
00430   }
00431 #else
00432   recog->triggered = FALSE;
00433 #endif
00434 
00435 #ifdef DEBUG_VTLN_ALPHA_TEST
00436   /* store speech */
00437   recog->speechlen = 0;
00438 #endif
00439 
00440   return TRUE;
00441 }
00442 
00475 boolean
00476 RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen)
00477 {
00478   int i;
00479   boolean ret;
00480   VECT *tmpmfcc;
00481   Value *para;
00482 
00483   tmpmfcc = mfcc->tmpmfcc;
00484   para = mfcc->para;
00485 
00486   /* 音声波形から base MFCC を計算 (recog->mfccwrk を利用) */
00487   /* calculate base MFCC from waveform (use recog->mfccwrk) */
00488   for (i=0; i < windowlen; i++) {
00489     mfcc->wrk->bf[i+1] = (float) window[i];
00490   }
00491   WMP_calc(mfcc->wrk, tmpmfcc, para);
00492 
00493   if (para->energy && para->enormal) {
00494     /* 対数エネルギー項を正規化する */
00495     /* normalize log energy */
00496     /* リアルタイム入力では発話ごとの最大エネルギーが得られないので
00497        直前の発話のパワーで代用する */
00498     /* Since the maximum power of the whole input utterance cannot be
00499        obtained at real-time input, the maximum of last input will be
00500        used to normalize.
00501     */
00502     tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para);
00503   }
00504 
00505   if (para->delta) {
00506     /* デルタを計算する */
00507     /* calc delta coefficients */
00508     ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc);
00509 #ifdef RDEBUG
00510     printf("DeltaBuf: ret=%d, status=", ret);
00511     for(i=0;i<mfcc->db->len;i++) {
00512       printf("%d", mfcc->db->is_on[i]);
00513     }
00514     printf(", nextstore=%d\n", mfcc->db->store);
00515 #endif
00516     /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */
00517     /* if ret == FALSE, there is no available frame.  So just wait for
00518        next input */
00519     if (! ret) {
00520       return FALSE;
00521     }
00522 
00523     /* db->vec に現在の元データとデルタ係数が入っているので tmpmfcc にコピー */
00524     /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */
00525     memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);
00526   }
00527 
00528   if (para->acc) {
00529     /* Accelerationを計算する */
00530     /* calc acceleration coefficients */
00531     /* base+delta をそのまま入れる */
00532     /* send the whole base+delta to the cycle buffer */
00533     ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc);
00534 #ifdef RDEBUG
00535     printf("AccelBuf: ret=%d, status=", ret);
00536     for(i=0;i<mfcc->ab->len;i++) {
00537       printf("%d", mfcc->ab->is_on[i]);
00538     }
00539     printf(", nextstore=%d\n", mfcc->ab->store);
00540 #endif
00541     /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */
00542     /* if ret == FALSE, there is no available frame.  So just wait for
00543        next input */
00544     if (! ret) {
00545       return FALSE;
00546     }
00547     /* ab->vec には，(base+delta) とその差分係数が入っている. 
00548        [base] [delta] [delta] [acc] の順で入っているので,
00549        [base] [delta] [acc] を tmpmfcc にコピーする. */
00550     /* now ab->vec holds the current (base+delta) and their delta coef. 
00551        it holds a vector in the order of [base] [delta] [delta] [acc], 
00552        so copy the [base], [delta] and [acc] to tmpmfcc.  */
00553     memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2);
00554     memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen);
00555   }
00556 
00557 #ifdef POWER_REJECT
00558   if (para->energy || para->c0) {
00559     mfcc->avg_power += tmpmfcc[para->baselen-1];
00560   }
00561 #endif
00562 
00563   if (para->delta && (para->energy || para->c0) && para->absesup) {
00564     /* 絶対値パワーを除去 */
00565     /* suppress absolute power */
00566     memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen));
00567   }
00568 
00569   /* この時点で tmpmfcc に現時点での最新の特徴ベクトルが格納されている */
00570   /* tmpmfcc[] now holds the latest parameter vector */
00571 
00572   /* CMN を計算 */
00573   /* perform CMN */
00574   if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc);
00575 
00576   return TRUE;
00577 }
00578 
00579 static int
00580 proceed_one_frame(Recog *recog)
00581 {
00582   MFCCCalc *mfcc;
00583   RealBeam *r;
00584   int maxf;
00585   PROCESS_AM *am;
00586   int rewind_frame;
00587   boolean reprocess;
00588   boolean ok_p;
00589 
00590   r = &(recog->real);
00591 
00592   /* call recognition start callback */
00593   ok_p = FALSE;
00594   maxf = 0;
00595   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00596     if (!mfcc->valid) continue;
00597     if (maxf < mfcc->f) maxf = mfcc->f;
00598     if (mfcc->f == 0) {
00599       ok_p = TRUE;
00600     }
00601   }
00602   if (ok_p && maxf == 0) {
00603     /* call callback when at least one of MFCC has initial frame */
00604     if (recog->jconf->decodeopt.segment) {
00605 #ifdef BACKEND_VAD
00606       /* not exec pass1 begin callback here */
00607 #else
00608       if (!recog->process_segment) {
00609         callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00610       }
00611       callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00612       callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00613       recog->triggered = TRUE;
00614 #endif
00615     } else {
00616       callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00617       callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00618       recog->triggered = TRUE;
00619     }
00620   }
00621   /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
00622   switch (decode_proceed(recog)) {
00623   case -1: /* error */
00624     return -1;
00625     break;
00626   case 0:                       /* success */
00627     break;
00628   case 1:                       /* segmented */
00629     /* 認識処理のセグメント要求で終わったことをフラグにセット */
00630     /* set flag which indicates that the input has ended with segmentation request */
00631     r->last_is_segmented = TRUE;
00632     /* tell the caller to be segmented by this function */
00633     /* 呼び出し元に，ここで入力を切るよう伝える */
00634     return 1;
00635   }
00636 #ifdef BACKEND_VAD
00637   /* check up trigger in case of VAD segmentation */
00638   if (recog->jconf->decodeopt.segment) {
00639     if (recog->triggered == FALSE) {
00640       if (spsegment_trigger_sync(recog)) {
00641         if (!recog->process_segment) {
00642           callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00643         }
00644         callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00645         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00646         recog->triggered = TRUE;
00647       }
00648     }
00649   }
00650 #endif
00651   
00652   if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) {
00653     /* set total length to the current frame */
00654     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00655       if (!mfcc->valid) continue;
00656       mfcc->param->header.samplenum = mfcc->f + 1;
00657       mfcc->param->samplenum = mfcc->f + 1;
00658     }
00659     /* do rewind for all mfcc here */
00660     spsegment_restart_mfccs(recog, rewind_frame, reprocess);
00661     /* also tell adin module to rehash the concurrent audio input */
00662     recog->adin->rehash = TRUE;
00663     /* reset outprob cache for all AM */
00664     for(am=recog->amlist;am;am=am->next) {
00665       outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
00666     }
00667     if (reprocess) {
00668       /* process the backstep MFCCs here */
00669       while(1) {
00670         ok_p = TRUE;
00671         for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00672           if (! mfcc->valid) continue;
00673           mfcc->f++;
00674           if (mfcc->f < mfcc->param->samplenum) {
00675             mfcc->valid = TRUE;
00676             ok_p = FALSE;
00677           } else {
00678             mfcc->valid = FALSE;
00679           }
00680         }
00681         if (ok_p) {
00682           /* すべての MFCC が終わりに達したのでループ終了 */
00683           /* all MFCC has been processed, end of loop  */
00684           for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00685             if (! mfcc->valid) continue;
00686             mfcc->f--;
00687           }
00688           break;
00689         }
00690         /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
00691         switch (decode_proceed(recog)) {
00692         case -1: /* error */
00693           return -1;
00694           break;
00695         case 0:                 /* success */
00696           break;
00697         case 1:                 /* segmented */
00698           /* ignore segmentation while in the backstep segment */
00699           break;
00700         }
00701         /* call frame-wise callback */
00702         callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00703       }
00704     }
00705   }
00706   /* call frame-wise callback if at least one of MFCC is valid at this frame */
00707   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00708     if (mfcc->valid) {
00709       callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00710       break;
00711     }
00712   }
00713   
00714   return 0;
00715 }
00716 
00717 
00787 int
00788 RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = input */
00789 {
00790   int i, now, ret;
00791   MFCCCalc *mfcc;
00792   RealBeam *r;
00793 
00794   r = &(recog->real);
00795 
00796 #ifdef DEBUG_VTLN_ALPHA_TEST
00797   /* store speech */
00798   adin_cut_callback_store_buffer(Speech, nowlen, recog);
00799 #endif
00800 
00801   /* window[0..windownum-1] は前回の呼び出しで残った音声データが格納されている */
00802   /* window[0..windownum-1] are speech data left from previous call */
00803 
00804   /* 処理用ポインタを初期化 */
00805   /* initialize pointer for local processing */
00806   now = 0;
00807   
00808   /* 認識処理がセグメント要求で終わったのかどうかのフラグをリセット */
00809   /* reset flag which indicates whether the input has ended with segmentation request */
00810   r->last_is_segmented = FALSE;
00811 
00812 #ifdef RDEBUG
00813   printf("got %d samples\n", nowlen);
00814 #endif
00815 
00816   while (now < nowlen) {        /* till whole input is processed */
00817     /* 入力長が maxframelen に達したらここで強制終了 */
00818     /* if input length reaches maximum buffer size, terminate 1st pass here */
00819     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00820       if (mfcc->f >= r->maxframelen) return(1);
00821     }
00822     /* 窓バッファを埋められるだけ埋める */
00823     /* fill window buffer as many as possible */
00824     for(i = min(r->windowlen - r->windownum, nowlen - now); i > 0 ; i--)
00825       r->window[r->windownum++] = (float) Speech[now++];
00826     /* もし窓バッファが埋まらなければ, このセグメントの処理はここで終わる. 
00827        処理されなかったサンプル (window[0..windownum-1]) は次回に持ち越し. */
00828     /* if window buffer was not filled, end processing here, keeping the
00829        rest samples (window[0..windownum-1]) in the window buffer. */
00830     if (r->windownum < r->windowlen) break;
00831 #ifdef RDEBUG
00832     /*    printf("%d used, %d rest\n", now, nowlen - now);
00833 
00834           printf("[f = %d]\n", f);*/
00835 #endif
00836 
00837     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00838       mfcc->valid = FALSE;
00839       /* 窓内の音声波形から特徴量を計算して r->tmpmfcc に格納  */
00840       /* calculate a parameter vector from current waveform windows
00841          and store to r->tmpmfcc */
00842       if ((*(recog->calc_vector))(mfcc, r->window, r->windowlen)) {
00843 #ifdef ENABLE_PLUGIN
00844         /* call post-process plugin if exist */
00845         plugin_exec_vector_postprocess(mfcc->tmpmfcc, mfcc->param->veclen, mfcc->f);
00846 #endif
00847         /* MFCC完成，登録 */
00848         mfcc->valid = TRUE;
00849         /* now get the MFCC vector of current frame, now store it to param */
00850         if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
00851           jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n");
00852           return -1;
00853         }
00854         memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen);
00855 #ifdef RDEBUG
00856         printf("DeltaBuf: %02d: got frame %d\n", mfcc->id, mfcc->f);
00857 #endif
00858       }
00859     }
00860 
00861     /* 処理を1フレーム進める */
00862     /* proceed one frame */
00863     ret = proceed_one_frame(recog);
00864 
00865     if (ret == 1 && recog->jconf->decodeopt.segment) {
00866       /* ショートポーズセグメンテーション: バッファに残っているデータを
00867          別に保持して，次回の最初に処理する */
00868       /* short pause segmentation: there is some data left in buffer, so
00869          we should keep them for next processing */
00870       r->rest_len = nowlen - now;
00871       if (r->rest_len > 0) {
00872         /* copy rest samples to rest_Speech */
00873         if (r->rest_Speech == NULL) {
00874           r->rest_alloc_len = r->rest_len;
00875           r->rest_Speech = (SP16 *)mymalloc(sizeof(SP16)*r->rest_alloc_len);
00876         } else if (r->rest_alloc_len < r->rest_len) {
00877           r->rest_alloc_len = r->rest_len;
00878           r->rest_Speech = (SP16 *)myrealloc(r->rest_Speech, sizeof(SP16)*r->rest_alloc_len);
00879         }
00880         memcpy(r->rest_Speech, &(Speech[now]), sizeof(SP16) * r->rest_len);
00881       }
00882     }
00883     if (ret != 0) return ret;
00884 
00885     /* 1フレーム処理が進んだのでポインタを進める */
00886     /* proceed frame pointer */
00887     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00888       if (!mfcc->valid) continue;
00889       mfcc->f++;
00890     }
00891 
00892     /* 窓バッファを処理が終わった分シフト */
00893     /* shift window */
00894     memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift));
00895     r->windownum -= recog->jconf->input.frameshift;
00896   }
00897 
00898   /* 与えられた音声セグメントに対する認識処理が全て終了
00899      呼び出し元に, 入力を続けるよう伝える */
00900   /* input segment is fully processed
00901      tell the caller to continue input */
00902   return(0);                    
00903 }
00904 
00938 int
00939 RealTimeResume(Recog *recog)
00940 {
00941   MFCCCalc *mfcc;
00942   RealBeam *r;
00943   boolean ok_p;
00944 #ifdef SPSEGMENT_NAIST
00945   RecogProcess *p;
00946 #endif
00947   PROCESS_AM *am;
00948 
00949   r = &(recog->real);
00950 
00951   /* 計算用のワークエリアを準備 */
00952   /* prepare work area for calculation */
00953   if (recog->jconf->input.type == INPUT_WAVEFORM) {
00954     reset_mfcc(recog);
00955   }
00956   /* 音響尤度計算用キャッシュを準備 */
00957   /* prepare cache area for acoustic computation of HMM states and mixtures */
00958   for(am=recog->amlist;am;am=am->next) {
00959     outprob_prepare(&(am->hmmwrk), r->maxframelen);
00960   }
00961 
00962   /* param にある全パラメータを処理する準備 */
00963   /* prepare to process all data in param */
00964   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00965     if (mfcc->param->samplenum == 0) mfcc->valid = FALSE;
00966     else mfcc->valid = TRUE;
00967 #ifdef RDEBUG
00968     printf("Resume: %02d: f=%d\n", mfcc->id, mfcc->mfcc->param->samplenum-1);
00969 #endif
00970     /* フレーム数をリセット */
00971     /* reset frame count */
00972     mfcc->f = 0;
00973     /* MAP-CMN の初期化 */
00974     /* Prepare for MAP-CMN */
00975     if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk);
00976   }
00977 
00978 #ifdef BACKEND_VAD
00979   if (recog->jconf->decodeopt.segment) {
00980     spsegment_init(recog);
00981   }
00982   /* not exec pass1 begin callback here */
00983 #else
00984   recog->triggered = FALSE;
00985   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00986     if (!mfcc->valid) continue;
00987     callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00988     callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00989     recog->triggered = TRUE;
00990     break;
00991   }
00992 #endif
00993 
00994   /* param 内の全フレームについて認識処理を進める */
00995   /* proceed recognition for all frames in param */
00996 
00997   while(1) {
00998     ok_p = TRUE;
00999     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01000       if (! mfcc->valid) continue;
01001       if (mfcc->f < mfcc->param->samplenum) {
01002         mfcc->valid = TRUE;
01003         ok_p = FALSE;
01004       } else {
01005         mfcc->valid = FALSE;
01006       }
01007     }
01008     if (ok_p) {
01009       /* すべての MFCC が終わりに達したのでループ終了 */
01010       /* all MFCC has been processed, end of loop  */
01011       break;
01012     }
01013 
01014     /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
01015     switch (decode_proceed(recog)) {
01016     case -1: /* error */
01017       return -1;
01018       break;
01019     case 0:                     /* success */
01020       break;
01021     case 1:                     /* segmented */
01022       /* segmented, end procs ([0..f])*/
01023       r->last_is_segmented = TRUE;
01024       return 1;         /* segmented by this function */
01025     }
01026 
01027 #ifdef BACKEND_VAD
01028     /* check up trigger in case of VAD segmentation */
01029     if (recog->jconf->decodeopt.segment) {
01030       if (recog->triggered == FALSE) {
01031         if (spsegment_trigger_sync(recog)) {
01032           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
01033           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01034           recog->triggered = TRUE;
01035         }
01036       }
01037     }
01038 #endif
01039 
01040     /* call frame-wise callback */
01041     callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
01042 
01043     /* 1フレーム処理が進んだのでポインタを進める */
01044     /* proceed frame pointer */
01045     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01046       if (!mfcc->valid) continue;
01047       mfcc->f++;
01048     }
01049 
01050   }
01051   /* 前回のセグメント時に入力をシフトしていない分をシフトする */
01052   /* do the last shift here */
01053   if (recog->jconf->input.type == INPUT_WAVEFORM) {
01054     memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift));
01055     r->windownum -= recog->jconf->input.frameshift;
01056     /* これで再開の準備が整ったので,まずは前回の処理で残っていた音声データから
01057        処理する */
01058     /* now that the search status has been prepared for the next input, we
01059        first process the rest unprocessed samples at the last session */
01060     if (r->rest_len > 0) {
01061       return(RealTimePipeLine(r->rest_Speech, r->rest_len, recog));
01062     }
01063   }
01064 
01065   /* 新規の入力に対して認識処理は続く… */
01066   /* the recognition process will continue for the newly incoming samples... */
01067   return 0;
01068 
01069 }
01070 
01071 
01105 boolean
01106 RealTimeParam(Recog *recog)
01107 {
01108   boolean ret1, ret2;
01109   RealBeam *r;
01110   int ret;
01111   int maxf;
01112   boolean ok_p;
01113   MFCCCalc *mfcc;
01114   Value *para;
01115 #ifdef RDEBUG
01116   int i;
01117 #endif
01118 
01119   r = &(recog->real);
01120 
01121   if (r->last_is_segmented) {
01122 
01123     /* RealTimePipeLine で認識処理側の理由により認識が中断した場合,
01124        現状態のMFCC計算データをそのまま次回へ保持する必要があるので,
01125        MFCC計算終了処理を行わずに第１パスの結果のみ出力して終わる. */
01126     /* When input segmented by recognition process in RealTimePipeLine(),
01127        we have to keep the whole current status of MFCC computation to the
01128        next call.  So here we only output the 1st pass result. */
01129     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01130       mfcc->param->header.samplenum = mfcc->f + 1;/* len = lastid + 1 */
01131       mfcc->param->samplenum = mfcc->f + 1;
01132     }
01133     decode_end_segmented(recog);
01134 
01135     /* この区間の param データを第２パスのために返す */
01136     /* return obtained parameter for 2nd pass */
01137     return(TRUE);
01138   }
01139 
01140   if (recog->jconf->input.type == INPUT_VECTOR) {
01141     /* finalize real-time 1st pass */
01142     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01143       mfcc->param->header.samplenum = mfcc->f;
01144       mfcc->param->samplenum = mfcc->f;
01145     }
01146     /* 最終フレーム処理を行い，認識の結果出力と終了処理を行う */
01147     decode_end(recog);
01148     return TRUE;
01149   }
01150 
01151   /* MFCC計算の終了処理を行う: 最後の遅延フレーム分を処理 */
01152   /* finish MFCC computation for the last delayed frames */
01153   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01154     if (mfcc->para->delta || mfcc->para->acc) {
01155       mfcc->valid = TRUE;
01156     } else {
01157       mfcc->valid = FALSE;
01158     }
01159   }
01160 
01161   /* loop until all data has been flushed */
01162   while (1) {
01163 
01164     /* if all mfcc became invalid, exit loop here */
01165     ok_p = FALSE;
01166     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01167       if (mfcc->valid) {
01168         ok_p = TRUE;
01169         break;
01170       }
01171     }
01172     if (!ok_p) break;
01173 
01174     /* try to get 1 frame for all mfcc instances */
01175     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01176       
01177       para = mfcc->para;
01178       
01179       if (! mfcc->valid) continue;
01180       
01181       /* check if there is data in cycle buffer of delta */
01182       ret1 = WMP_deltabuf_flush(mfcc->db);
01183 #ifdef RDEBUG
01184       printf("DeltaBufLast: ret=%d, status=", ret1);
01185       for(i=0;i<mfcc->db->len;i++) {
01186         printf("%d", mfcc->db->is_on[i]);
01187       }
01188       printf(", nextstore=%d\n", mfcc->db->store);
01189 #endif
01190       if (ret1) {
01191         /* uncomputed delta has flushed, compute it with tmpmfcc */
01192         if (para->energy && para->absesup) {
01193           memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * (para->baselen - 1));
01194           memcpy(&(mfcc->tmpmfcc[para->baselen-1]), &(mfcc->db->vec[para->baselen]), sizeof(VECT) * para->baselen);
01195         } else {
01196           memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);
01197         }
01198         if (para->acc) {
01199           /* this new delta should be given to the accel cycle buffer */
01200           ret2 = WMP_deltabuf_proceed(mfcc->ab, mfcc->tmpmfcc);
01201 #ifdef RDEBUG
01202           printf("AccelBuf: ret=%d, status=", ret2);
01203           for(i=0;i<mfcc->ab->len;i++) {
01204             printf("%d", mfcc->ab->is_on[i]);
01205           }
01206           printf(", nextstore=%d\n", mfcc->ab->store);
01207 #endif
01208           if (ret2) {
01209             /* uncomputed accel was given, compute it with tmpmfcc */
01210             memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen));
01211             memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen);
01212           } else {
01213             /* still no input is given: */
01214             /* in case of very short input: go on to the next input */
01215             continue;
01216           }
01217         }
01218         
01219       } else {
01220       
01221         /* no data left in the delta buffer */
01222         if (para->acc) {
01223           /* no new data, just flush the accel buffer */
01224           ret2 = WMP_deltabuf_flush(mfcc->ab);
01225 #ifdef RDEBUG
01226           printf("AccelBuf: ret=%d, status=", ret2);
01227           for(i=0;i<mfcc->ab->len;i++) {
01228             printf("%d", mfcc->ab->is_on[i]);
01229           }
01230           printf(", nextstore=%d\n", mfcc->ab->store);
01231 #endif
01232           if (ret2) {
01233             /* uncomputed data has flushed, compute it with tmpmfcc */
01234             memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen));
01235             memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen);
01236           } else {
01237             /* actually no data exists in both delta and accel */
01238             mfcc->valid = FALSE; /* disactivate this instance */
01239             continue;           /* end this loop */
01240           }
01241         } else {
01242           /* only delta: input fully flushed */
01243           mfcc->valid = FALSE; /* disactivate this instance */
01244           continue;             /* end this loop */
01245         }
01246       }
01247       /* a new frame has been obtained from delta buffer to tmpmfcc */
01248       if(para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, mfcc->tmpmfcc);
01249       if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
01250         jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n");
01251         return FALSE;
01252       }
01253       /* store to mfcc->f */
01254       memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen);
01255 #ifdef ENABLE_PLUGIN
01256       /* call postprocess plugin if any */
01257       plugin_exec_vector_postprocess(mfcc->param->parvec[mfcc->f], mfcc->param->veclen, mfcc->f);
01258 #endif
01259     }
01260 
01261     /* call recognition start callback */
01262     ok_p = FALSE;
01263     maxf = 0;
01264     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01265       if (!mfcc->valid) continue;
01266       if (maxf < mfcc->f) maxf = mfcc->f;
01267       if (mfcc->f == 0) {
01268         ok_p = TRUE;
01269       }
01270     }
01271 
01272     if (ok_p && maxf == 0) {
01273       /* call callback when at least one of MFCC has initial frame */
01274       if (recog->jconf->decodeopt.segment) {
01275 #ifdef BACKEND_VAD
01276           /* not exec pass1 begin callback here */
01277 #else
01278         if (!recog->process_segment) {
01279           callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01280         }
01281         callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
01282         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01283         recog->triggered = TRUE;
01284 #endif
01285       } else {
01286         callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01287         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01288         recog->triggered = TRUE;
01289       }
01290     }
01291 
01292     /* proceed for the curent frame */
01293     ret = decode_proceed(recog);
01294     if (ret == -1) {            /* error */
01295       return -1;
01296     } else if (ret == 1) {      /* segmented */
01297       /* loop out */
01298       break;
01299     } /* else no event occured */
01300 
01301 #ifdef BACKEND_VAD
01302     /* check up trigger in case of VAD segmentation */
01303     if (recog->jconf->decodeopt.segment) {
01304       if (recog->triggered == FALSE) {
01305         if (spsegment_trigger_sync(recog)) {
01306           if (!recog->process_segment) {
01307             callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01308           }
01309           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
01310           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01311           recog->triggered = TRUE;
01312         }
01313       }
01314     }
01315 #endif
01316 
01317     /* call frame-wise callback */
01318     callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
01319 
01320     /* move to next */
01321     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01322       if (! mfcc->valid) continue;
01323       mfcc->f++;
01324       if (mfcc->f > r->maxframelen) mfcc->valid = FALSE;
01325     }
01326   }
01327 
01328   /* finalize real-time 1st pass */
01329   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01330     mfcc->param->header.samplenum = mfcc->f;
01331     mfcc->param->samplenum = mfcc->f;
01332   }
01333   /* 最終フレーム処理を行い，認識の結果出力と終了処理を行う */
01334   decode_end(recog);
01335 
01336   return(TRUE);
01337 }
01338 
01357 void
01358 RealTimeCMNUpdate(MFCCCalc *mfcc, Recog *recog)
01359 {
01360   boolean cmn_update_p;
01361   Value *para;
01362   Jconf *jconf;
01363   RecogProcess *r;
01364 
01365   jconf = recog->jconf;
01366   para = mfcc->para;
01367   
01368   /* update CMN vector for next speech */
01369   if(para->cmn) {
01370     if (mfcc->cmn.update) {
01371       cmn_update_p = TRUE;
01372       for(r=recog->process_list;r;r=r->next) {
01373         if (!r->live) continue;
01374         if (r->am->mfcc != mfcc) continue;
01375         if (r->result.status < 0) { /* input rejected */
01376           cmn_update_p = FALSE;
01377           break;
01378         }
01379       }
01380       if (cmn_update_p) {
01381         /* update last CMN parameter for next spech */
01382         CMN_realtime_update(mfcc->cmn.wrk, mfcc->param);
01383       } else {
01384         /* do not update, because the last input is bogus */
01385         if (verbose_flag) {
01386 #ifdef BACKEND_VAD
01387           if (!recog->jconf->decodeopt.segment || recog->triggered) {
01388             jlog("STAT: skip CMN parameter update since last input was invalid\n");
01389           }
01390 #else
01391           jlog("STAT: skip CMN parameter update since last input was invalid\n");
01392 #endif
01393         }
01394       }
01395     }
01396     /* if needed, save the updated CMN parameter to a file */
01397     if (mfcc->cmn.save_filename) {
01398       if (CMN_save_to_file(mfcc->cmn.wrk, mfcc->cmn.save_filename) == FALSE) {
01399         jlog("WARNING: failed to save CMN parameter to \"%s\"\n", mfcc->cmn.save_filename);
01400       }
01401     }
01402   }
01403 }
01404 
01417 void
01418 RealTimeTerminate(Recog *recog)
01419 {
01420   MFCCCalc *mfcc;
01421 
01422   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01423     mfcc->param->header.samplenum = mfcc->f;
01424     mfcc->param->samplenum = mfcc->f;
01425   }
01426 
01427   /* 最終フレーム処理を行い，認識の結果出力と終了処理を行う */
01428   decode_end(recog);
01429 }
01430 
01442 void
01443 realbeam_free(Recog *recog)
01444 {
01445   RealBeam *r;
01446 
01447   r = &(recog->real);
01448 
01449   if (recog->real.window) {
01450     free(recog->real.window);
01451     recog->real.window = NULL;
01452   }
01453   if (recog->real.rest_Speech) {
01454     free(recog->real.rest_Speech);
01455     recog->real.rest_Speech = NULL;
01456   }
01457 }
01458 
01459 
01460 
01461 /************************************************************************/
01462 /************************************************************************/
01463 /************************************************************************/
01464 /************************************************************************/
01465 
01466 /* MFCC realtime input */
01484 int
01485 mfcc_go(Recog *recog, int (*ad_check)(Recog *))
01486 {
01487   RealBeam *r;
01488   MFCCCalc *mfcc;
01489   int new_f;
01490   int ret, ret3;
01491 
01492   r = &(recog->real);
01493 
01494   r->last_is_segmented = FALSE;
01495   
01496   while(1/*in_data_vec*/) {
01497 
01498     ret = mfc_module_read(recog->mfcclist, &new_f);
01499 
01500     if (debug2_flag) {
01501       if (recog->mfcclist->f < new_f) {
01502         jlog("%d: %d (%d)\n", recog->mfcclist->f, new_f, ret);
01503       }
01504     }
01505  
01506     /* callback poll */
01507     if (ad_check != NULL) {
01508       if ((ret3 = (*(ad_check))(recog)) < 0) {
01509         if ((ret3 == -1 && recog->mfcclist->f == 0) || ret3 == -2) {
01510           return(-2);
01511         }
01512       }
01513     }
01514 
01515     while(recog->mfcclist->f < new_f) {
01516 
01517       recog->mfcclist->valid = TRUE;
01518 
01519 #ifdef ENABLE_PLUGIN
01520       /* call post-process plugin if exist */
01521       plugin_exec_vector_postprocess(recog->mfcclist->param->parvec[recog->mfcclist->f], recog->mfcclist->param->veclen, recog->mfcclist->f);
01522 #endif
01523 
01524       /* 処理を1フレーム進める */
01525       /* proceed one frame */
01526       
01527       switch(proceed_one_frame(recog)) {
01528       case -1:                  /* error */
01529         return -1;
01530       case 0:                   /* normal */
01531         break;
01532       case 1:                   /* segmented by process */
01533         return 2;
01534       }
01535 
01536       /* 1フレーム処理が進んだのでポインタを進める */
01537       /* proceed frame pointer */
01538       for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01539         if (!mfcc->valid) continue;
01540         mfcc->f++;
01541       }
01542     }
01543     
01544     /* check if input end */
01545     switch(ret) {
01546     case -1:                    /* end of input */
01547       return 0;
01548     case -2:                    /* error */
01549       return -1;
01550     case -3:                    /* end of segment request */
01551       return 1;
01552     }
01553   }
01554   /* 与えられた音声セグメントに対する認識処理が全て終了
01555      呼び出し元に, 入力を続けるよう伝える */
01556   /* input segment is fully processed
01557      tell the caller to continue input */
01558   return(1);
01559 }
01560 
01561 /* end of file */
01562 
01563