Julius 4.2
|
00001 00042 /* 00043 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00044 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00045 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00046 * All rights reserved 00047 */ 00048 00049 #include <julius/julius.h> 00050 00051 /********************************************************************/ 00052 /* 第1パスを実行するメイン関数 */ 00053 /* 入力をパイプライン処理する場合は realtime_1stpass.c を参照のこと */ 00054 /* main function to execute 1st pass */ 00055 /* the pipeline processing is not here: see realtime_1stpass.c */ 00056 /********************************************************************/ 00057 00111 int 00112 decode_proceed(Recog *recog) 00113 { 00114 MFCCCalc *mfcc; 00115 boolean break_flag; 00116 boolean break_decode; 00117 RecogProcess *p; 00118 boolean ok_p; 00119 #ifdef GMM_VAD 00120 GMMCalc *gmm; 00121 boolean break_gmm; 00122 #endif 00123 00124 break_decode = FALSE; 00125 00126 for(p = recog->process_list; p; p = p->next) { 00127 #ifdef DETERMINE 00128 p->have_determine = FALSE; 00129 #endif 00130 p->have_interim = FALSE; 00131 } 00132 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00133 mfcc->segmented = FALSE; 00134 } 00135 00136 #ifdef POWER_REJECT 00137 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00138 if (!mfcc->valid) continue; 00139 if (mfcc->f == 0) { 00140 mfcc->avg_power = 0.0; 00141 if (debug2_flag) jlog("STAT: power_reject: reset\n"); 00142 } 00143 } 00144 #endif 00145 00146 00147 #ifdef GMM_VAD 00148 if (recog->gmm != NULL) { 00149 /* reset flags */ 00150 break_gmm = FALSE; 00151 recog->gc->want_rewind = FALSE; 00152 } 00153 #endif 00154 if (recog->gmm != NULL && recog->gmmmfcc->valid) { 00155 /* GMM 計算を行う */ 00156 if (recog->gmmmfcc->f == 0) { 00157 /* GMM 計算の初期化 */ 00158 gmm_prepare(recog); 00159 } 00160 /* このフレームに対するGMMの尤度を計算 */ 00161 gmm_proceed(recog); 00162 #ifdef GMM_VAD 00163 /* Check for GMM-based VAD */ 00164 gmm = recog->gc; 00165 gmm_check_trigger(recog); 00166 if (gmm->after_trigger) { 00167 /* after trigger, in speech area */ 00168 if (gmm->down_trigger) { 00169 /* down trigger, end segment */ 00170 #ifdef GMM_VAD_DEBUG 00171 printf("GMM_VAD: %d: down trigger\n", recog->gmmmfcc->f); 00172 #endif 00173 recog->gmmmfcc->sparea_start = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin; 00174 if (recog->gmmmfcc->sparea_start < 0) recog->gmmmfcc->sparea_start = 0; 00175 gmm->after_trigger = FALSE; 00176 recog->gmmmfcc->segmented = TRUE; 00177 break_gmm = TRUE; 00178 } else { 00179 /* keep recognition */ 00180 } 00181 } else { 00182 /* before trigger, in noise area */ 00183 if (gmm->up_trigger) { 00184 /* start recognition */ 00185 /* request caller to rewind to the backstep point and 00186 re-start with normal search */ 00187 if (recog->gmmmfcc->f + 1 < recog->jconf->detect.gmm_margin) { 00188 gmm->rewind_frame = 0; 00189 } else { 00190 gmm->rewind_frame = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin; 00191 } 00192 #ifdef GMM_VAD_DEBUG 00193 printf("GMM_VAD: %d: up trigger, start recognition with %d frame rewind\n", recog->gmmmfcc->f, recog->gmmmfcc->f - gmm->rewind_frame); 00194 #endif 00195 gmm->want_rewind = TRUE; 00196 gmm->want_rewind_reprocess = TRUE; 00197 gmm->after_trigger = TRUE; 00198 return 0; 00199 } else { 00200 /* before trigger, noise continues */ 00201 00202 /* if noise goes more than a certain frame, shrink the noise area 00203 to avoid unlimited memory usage */ 00204 if (recog->gmmmfcc->f + 1 > GMM_VAD_AUTOSHRINK_LIMIT) { 00205 gmm->want_rewind = TRUE; 00206 gmm->want_rewind_reprocess = FALSE; 00207 gmm->rewind_frame = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin; 00208 if (debug2_flag) { 00209 jlog("DEBUG: GMM_VAD: pause exceeded %d, rewind\n", GMM_VAD_AUTOSHRINK_LIMIT); 00210 } 00211 } 00212 00213 /* skip recognition processing */ 00214 return 0; 00215 } 00216 } 00217 #endif /* GMM_VAD */ 00218 } 00219 00220 for(p = recog->process_list; p; p = p->next) { 00221 if (!p->live) continue; 00222 mfcc = p->am->mfcc; 00223 if (!mfcc->valid) { 00224 /* このフレームの処理をスキップ */ 00225 /* skip processing the frame */ 00226 continue; 00227 } 00228 00229 /* mfcc-f のフレームについて認識処理(フレーム同期ビーム探索)を進める */ 00230 /* proceed beam search for mfcc->f */ 00231 if (mfcc->f == 0) { 00232 /* 最初のフレーム: 探索処理を初期化 */ 00233 /* initial frame: initialize search process */ 00234 if (get_back_trellis_init(mfcc->param, p) == FALSE) { 00235 jlog("ERROR: %02d %s: failed to initialize the 1st pass\n", p->config->id, p->config->name); 00236 return -1; 00237 } 00238 } 00239 if (mfcc->f > 0 || p->am->hmminfo->multipath) { 00240 /* 1フレーム探索を進める */ 00241 /* proceed search for 1 frame */ 00242 if (get_back_trellis_proceed(mfcc->f, mfcc->param, p, FALSE) == FALSE) { 00243 mfcc->segmented = TRUE; 00244 break_decode = TRUE; 00245 } 00246 if (p->config->successive.enabled) { 00247 if (detect_end_of_segment(p, mfcc->f - 1)) { 00248 /* セグメント終了検知: 第1パスここで中断 */ 00249 mfcc->segmented = TRUE; 00250 break_decode = TRUE; 00251 } 00252 } 00253 } 00254 } 00255 00256 /* セグメントすべきかどうか最終的な判定を行う. 00257 デコーダベースVADあるいは spsegment の場合,複数インスタンス間で OR 00258 を取る.また,GMMなど複数基準がある場合は基準間で AND を取る.*/ 00259 /* determine whether to segment at here 00260 If multiple segmenter exists, take their AND */ 00261 break_flag = FALSE; 00262 if (break_decode 00263 #ifdef GMM_VAD 00264 || (recog->gmm != NULL && break_gmm) 00265 #endif 00266 ) { 00267 break_flag = TRUE; 00268 } 00269 00270 if (break_flag) { 00271 /* 探索処理の終了が発生したのでここで認識を終える. 00272 最初のフレームから [f-1] 番目までが認識されたことになる 00273 */ 00274 /* the recognition process tells us to stop recognition, so 00275 recognition should be terminated here. 00276 the recognized data are [0..f-1] */ 00277 00278 /* 最終フレームを last_time にセット */ 00279 /* set the last frame to last_time */ 00280 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00281 mfcc->last_time = mfcc->f - 1; 00282 } 00283 00284 if (! recog->jconf->decodeopt.segment) { 00285 /* ショートポーズ以外で切れた場合,残りのサンプルは認識せずに捨てる */ 00286 /* drop rest inputs if segmented by error */ 00287 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00288 mfcc->param->header.samplenum = mfcc->f; 00289 mfcc->param->samplenum = mfcc->f; 00290 } 00291 } 00292 00293 return 1; 00294 } 00295 00296 /* call frame-wise callback for the processing results if any */ 00297 #ifdef DETERMINE 00298 ok_p = FALSE; 00299 for(p=recog->process_list;p;p=p->next) { 00300 if (!p->live) continue; 00301 if (p->have_determine) { 00302 ok_p = TRUE; 00303 } 00304 } 00305 if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, recog); 00306 #endif 00307 ok_p = FALSE; 00308 for(p=recog->process_list;p;p=p->next) { 00309 if (!p->live) continue; 00310 if (p->have_interim) { 00311 ok_p = TRUE; 00312 } 00313 } 00314 if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog); 00315 00316 return 0; 00317 } 00318 00319 #ifdef POWER_REJECT 00320 boolean 00321 power_reject(Recog *recog) 00322 { 00323 MFCCCalc *mfcc; 00324 00325 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00326 /* skip if not realtime and raw file processing */ 00327 if (mfcc->avg_power == 0.0) continue; 00328 if (debug2_flag) jlog("STAT: power_reject: MFCC%02d: avg_power = %f\n", mfcc->id, mfcc->avg_power / mfcc->param->samplenum); 00329 if (mfcc->avg_power / mfcc->param->samplenum < recog->jconf->reject.powerthres) return TRUE; 00330 } 00331 return FALSE; 00332 } 00333 #endif 00334 00371 void 00372 decode_end_segmented(Recog *recog) 00373 { 00374 boolean ok_p; 00375 int mseclen; 00376 RecogProcess *p; 00377 int last_status; 00378 00379 /* rejectshort 指定時, 入力が短ければここで第1パス結果を出力しない */ 00380 /* suppress 1st pass output if -rejectshort and input shorter than specified */ 00381 ok_p = TRUE; 00382 if (recog->jconf->reject.rejectshortlen > 0) { 00383 mseclen = (float)recog->mfcclist->last_time * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0; 00384 if (mseclen < recog->jconf->reject.rejectshortlen) { 00385 last_status = J_RESULT_STATUS_REJECT_SHORT; 00386 ok_p = FALSE; 00387 } 00388 } 00389 00390 #ifdef POWER_REJECT 00391 if (ok_p) { 00392 if (power_reject(recog)) { 00393 last_status = J_RESULT_STATUS_REJECT_POWER; 00394 ok_p = FALSE; 00395 } 00396 } 00397 #endif 00398 00399 if (ok_p) { 00400 for(p=recog->process_list;p;p=p->next) { 00401 if (!p->live) continue; 00402 finalize_1st_pass(p, p->am->mfcc->last_time); 00403 } 00404 } else { 00405 for(p=recog->process_list;p;p=p->next) { 00406 if (!p->live) continue; 00407 p->result.status = last_status; 00408 } 00409 } 00410 if (recog->jconf->decodeopt.segment) { 00411 finalize_segment(recog); 00412 } 00413 00414 if (recog->gmm != NULL) { 00415 /* GMM 計算の終了 */ 00416 gmm_end(recog); 00417 } 00418 } 00419 00449 void 00450 decode_end(Recog *recog) 00451 { 00452 MFCCCalc *mfcc; 00453 int mseclen; 00454 boolean ok_p; 00455 RecogProcess *p; 00456 int last_status; 00457 00458 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00459 mfcc->segmented = FALSE; 00460 } 00461 00462 if (recog->gmm != NULL) { 00463 /* GMM 計算の終了 */ 00464 gmm_end(recog); 00465 } 00466 00467 #ifdef GMM_VAD 00468 /* もしトリガがかからないまま入力終了に達したのなら,そのままエラー終了 */ 00469 if (recog->jconf->decodeopt.segment) { 00470 if (recog->gmm) { 00471 if (recog->gc->after_trigger == FALSE) { 00472 for(p=recog->process_list;p;p=p->next) { 00473 p->result.status = J_RESULT_STATUS_ONLY_SILENCE; /* reject by decoding */ 00474 } 00475 /* ショートポーズセグメンテーションの場合, 00476 入力パラメータ分割などの最終処理も行なう */ 00477 /* When short-pause segmentation enabled */ 00478 finalize_segment(recog); 00479 return; 00480 } 00481 } 00482 } 00483 #endif 00484 00485 /* 第1パスの最後のフレームの認識処理を行う */ 00486 /* finalize 1st pass */ 00487 for(p=recog->process_list;p;p=p->next) { 00488 if (!p->live) continue; 00489 #ifdef SPSEGMENT_NAIST 00490 if (recog->jconf->decodeopt.segment) { 00491 if (p->pass1.after_trigger == FALSE) continue; 00492 } 00493 #endif 00494 mfcc = p->am->mfcc; 00495 if (mfcc->f > 0) { 00496 get_back_trellis_end(mfcc->param, p); 00497 } 00498 } 00499 00500 /* 終了処理 */ 00501 for(p=recog->process_list;p;p=p->next) { 00502 if (!p->live) continue; 00503 00504 ok_p = TRUE; 00505 00506 /* check rejection by no input */ 00507 if (ok_p) { 00508 mfcc = p->am->mfcc; 00509 /* 入力長がデルタの計算に十分でない場合,入力無しとする. */ 00510 /* if input is short for compute all the delta coeff., terminate here */ 00511 if (mfcc->f == 0) { 00512 jlog("STAT: no input frame\n"); 00513 last_status = J_RESULT_STATUS_FAIL; 00514 ok_p = FALSE; 00515 } 00516 } 00517 00518 /* check rejection by input length */ 00519 if (ok_p) { 00520 if (recog->jconf->reject.rejectshortlen > 0) { 00521 mseclen = (float)mfcc->param->samplenum * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0; 00522 if (mseclen < recog->jconf->reject.rejectshortlen) { 00523 last_status = J_RESULT_STATUS_REJECT_SHORT; 00524 ok_p = FALSE; 00525 } 00526 } 00527 } 00528 00529 #ifdef POWER_REJECT 00530 /* check rejection by average power */ 00531 if (ok_p) { 00532 if (power_reject(recog)) { 00533 last_status = J_RESULT_STATUS_REJECT_POWER; 00534 ok_p = FALSE; 00535 } 00536 } 00537 #endif 00538 00539 #ifdef SPSEGMENT_NAIST 00540 /* check rejection non-triggered input segment */ 00541 if (ok_p) { 00542 if (recog->jconf->decodeopt.segment) { 00543 if (p->pass1.after_trigger == FALSE) { 00544 last_status = J_RESULT_STATUS_ONLY_SILENCE; /* reject by decoding */ 00545 ok_p = FALSE; 00546 } 00547 } 00548 } 00549 #endif 00550 00551 if (ok_p) { 00552 /* valid input segment, finalize it */ 00553 finalize_1st_pass(p, mfcc->param->samplenum); 00554 } else { 00555 /* invalid input segment */ 00556 p->result.status = last_status; 00557 } 00558 } 00559 if (recog->jconf->decodeopt.segment) { 00560 /* ショートポーズセグメンテーションの場合, 00561 入力パラメータ分割などの最終処理も行なう */ 00562 /* When short-pause segmentation enabled */ 00563 finalize_segment(recog); 00564 } 00565 } 00566 00567 00601 boolean 00602 get_back_trellis(Recog *recog) 00603 { 00604 boolean ok_p; 00605 MFCCCalc *mfcc; 00606 int rewind_frame; 00607 PROCESS_AM *am; 00608 boolean reprocess; 00609 00610 /* initialize mfcc instances */ 00611 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00612 /* mark all as valid, since all frames are fully prepared beforehand */ 00613 if (mfcc->param->samplenum == 0) mfcc->valid = FALSE; 00614 else mfcc->valid = TRUE; 00615 /* set frame pointers to 0 */ 00616 mfcc->f = 0; 00617 } 00618 00619 /* callback of process start */ 00620 #ifdef BACKEND_VAD 00621 if (recog->jconf->decodeopt.segment) { 00622 /* at first time, recognition does not start yet */ 00623 /* reset segmentation flags */ 00624 spsegment_init(recog); 00625 } else { 00626 /* execute callback for pass1 begin here */ 00627 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00628 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00629 recog->triggered = TRUE; 00630 } 00631 #else 00632 if (recog->jconf->decodeopt.segment) { 00633 if (!recog->process_segment) { 00634 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00635 } 00636 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 00637 } else { 00638 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00639 } 00640 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00641 recog->triggered = TRUE; 00642 #endif 00643 00644 while(1) { 00645 ok_p = TRUE; 00646 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00647 if (! mfcc->valid) continue; 00648 if (mfcc->f < mfcc->param->samplenum) { 00649 mfcc->valid = TRUE; 00650 ok_p = FALSE; 00651 } else { 00652 mfcc->valid = FALSE; 00653 } 00654 } 00655 if (ok_p) { 00656 /* すべての MFCC が終わりに達したのでループ終了 */ 00657 /* all MFCC has been processed, end of loop */ 00658 break; 00659 } 00660 00661 switch (decode_proceed(recog)) { 00662 case -1: /* error */ 00663 return FALSE; 00664 break; 00665 case 0: /* success */ 00666 break; 00667 case 1: /* segmented */ 00668 /* 探索中断: 処理された入力は 0 から t-2 まで */ 00669 /* search terminated: processed input = [0..t-2] */ 00670 /* この時点で第1パスを終了する */ 00671 /* end the 1st pass at this point */ 00672 decode_end_segmented(recog); 00673 /* terminate 1st pass here */ 00674 return TRUE; 00675 } 00676 00677 #ifdef BACKEND_VAD 00678 /* check up trigger in case of VAD segmentation */ 00679 if (recog->jconf->decodeopt.segment) { 00680 if (recog->triggered == FALSE) { 00681 if (spsegment_trigger_sync(recog)) { 00682 if (!recog->process_segment) { 00683 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); 00684 } 00685 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); 00686 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); 00687 recog->triggered = TRUE; 00688 } 00689 } 00690 } 00691 #endif 00692 00693 if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) { 00694 /* do rewind for all mfcc here */ 00695 spsegment_restart_mfccs(recog, rewind_frame, reprocess); 00696 /* reset outprob cache for all AM */ 00697 for(am=recog->amlist;am;am=am->next) { 00698 outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); 00699 } 00700 } 00701 /* call frame-wise callback */ 00702 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); 00703 00704 /* 1フレーム処理が進んだのでポインタを進める */ 00705 /* proceed frame pointer */ 00706 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00707 if (!mfcc->valid) continue; 00708 mfcc->f++; 00709 } 00710 00711 if (recog->process_want_terminate) { 00712 /* termination requested */ 00713 decode_end_segmented(recog); 00714 return TRUE; 00715 } 00716 } 00717 00718 /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ 00719 decode_end(recog); 00720 00721 return TRUE; 00722 } 00723 00724 /* end of file */