Julius 4.2
libjulius/src/pass1.c
説明を見る。
00001 
00042 /*
00043  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00044  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00045  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00046  * All rights reserved
00047  */
00048 
00049 #include <julius/julius.h>
00050 
00051 /********************************************************************/
00052 /* 第1パスを実行するメイン関数                                     */
00053 /* 入力をパイプライン処理する場合は realtime_1stpass.c を参照のこと */
00054 /* main function to execute 1st pass                                */
00055 /* the pipeline processing is not here: see realtime_1stpass.c      */
00056 /********************************************************************/
00057 
00111 int
00112 decode_proceed(Recog *recog)
00113 {
00114   MFCCCalc *mfcc;
00115   boolean break_flag;
00116   boolean break_decode;
00117   RecogProcess *p;
00118   boolean ok_p;
00119 #ifdef GMM_VAD
00120   GMMCalc *gmm;
00121   boolean break_gmm;
00122 #endif
00123   
00124   break_decode = FALSE;
00125 
00126   for(p = recog->process_list; p; p = p->next) {
00127 #ifdef DETERMINE
00128     p->have_determine = FALSE;
00129 #endif
00130     p->have_interim = FALSE;
00131   }
00132   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00133     mfcc->segmented = FALSE;
00134   }
00135 
00136 #ifdef POWER_REJECT
00137   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00138     if (!mfcc->valid) continue;
00139     if (mfcc->f == 0) {
00140       mfcc->avg_power = 0.0;
00141       if (debug2_flag) jlog("STAT: power_reject: reset\n");
00142     }
00143   }
00144 #endif
00145 
00146 
00147 #ifdef GMM_VAD
00148   if (recog->gmm != NULL) {
00149     /* reset flags */
00150     break_gmm = FALSE;
00151     recog->gc->want_rewind = FALSE;
00152   }
00153 #endif
00154   if (recog->gmm != NULL && recog->gmmmfcc->valid) {
00155     /* GMM 計算を行う */
00156     if (recog->gmmmfcc->f == 0) {
00157       /* GMM 計算の初期化 */
00158       gmm_prepare(recog);
00159     }
00160     /* このフレームに対するGMMの尤度を計算 */
00161     gmm_proceed(recog);
00162 #ifdef GMM_VAD
00163     /* Check for GMM-based VAD */
00164     gmm = recog->gc;
00165     gmm_check_trigger(recog);
00166     if (gmm->after_trigger) {
00167       /* after trigger, in speech area */
00168       if (gmm->down_trigger) {
00169         /* down trigger, end segment */
00170 #ifdef GMM_VAD_DEBUG
00171         printf("GMM_VAD: %d: down trigger\n", recog->gmmmfcc->f);
00172 #endif
00173         recog->gmmmfcc->sparea_start = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin;
00174         if (recog->gmmmfcc->sparea_start < 0) recog->gmmmfcc->sparea_start = 0;
00175         gmm->after_trigger = FALSE;
00176         recog->gmmmfcc->segmented = TRUE;
00177         break_gmm = TRUE;
00178       } else {
00179         /* keep recognition */
00180       }
00181     } else {
00182       /* before trigger, in noise area */
00183       if (gmm->up_trigger) {
00184         /* start recognition */
00185         /* request caller to rewind to the backstep point and
00186            re-start with normal search */
00187         if (recog->gmmmfcc->f + 1 < recog->jconf->detect.gmm_margin) {
00188           gmm->rewind_frame = 0;
00189         } else {
00190           gmm->rewind_frame = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin;
00191         }
00192 #ifdef GMM_VAD_DEBUG
00193         printf("GMM_VAD: %d: up trigger, start recognition with %d frame rewind\n", recog->gmmmfcc->f, recog->gmmmfcc->f - gmm->rewind_frame);
00194 #endif
00195         gmm->want_rewind = TRUE;
00196         gmm->want_rewind_reprocess = TRUE;
00197         gmm->after_trigger = TRUE;
00198         return 0;
00199       } else {
00200         /* before trigger, noise continues */
00201 
00202         /* if noise goes more than a certain frame, shrink the noise area
00203            to avoid unlimited memory usage */
00204         if (recog->gmmmfcc->f + 1 > GMM_VAD_AUTOSHRINK_LIMIT) {
00205           gmm->want_rewind = TRUE;
00206           gmm->want_rewind_reprocess = FALSE;
00207           gmm->rewind_frame = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin;
00208           if (debug2_flag) {
00209             jlog("DEBUG: GMM_VAD: pause exceeded %d, rewind\n", GMM_VAD_AUTOSHRINK_LIMIT);
00210           }
00211         }
00212 
00213         /* skip recognition processing */
00214         return 0;
00215       }
00216     }
00217 #endif /* GMM_VAD */
00218   }
00219 
00220   for(p = recog->process_list; p; p = p->next) {
00221     if (!p->live) continue;
00222     mfcc = p->am->mfcc;
00223     if (!mfcc->valid) {
00224       /* このフレームの処理をスキップ */
00225       /* skip processing the frame */
00226       continue;
00227     }
00228 
00229     /* mfcc-f のフレームについて認識処理(フレーム同期ビーム探索)を進める */
00230     /* proceed beam search for mfcc->f */
00231     if (mfcc->f == 0) {
00232       /* 最初のフレーム: 探索処理を初期化 */
00233       /* initial frame: initialize search process */
00234       if (get_back_trellis_init(mfcc->param, p) == FALSE) {
00235         jlog("ERROR: %02d %s: failed to initialize the 1st pass\n", p->config->id, p->config->name);
00236         return -1;
00237       }
00238     }
00239     if (mfcc->f > 0 || p->am->hmminfo->multipath) {
00240       /* 1フレーム探索を進める */
00241       /* proceed search for 1 frame */
00242       if (get_back_trellis_proceed(mfcc->f, mfcc->param, p, FALSE) == FALSE) {
00243         mfcc->segmented = TRUE;
00244         break_decode = TRUE;
00245       }
00246       if (p->config->successive.enabled) {
00247         if (detect_end_of_segment(p, mfcc->f - 1)) {
00248           /* セグメント終了検知: 第1パスここで中断 */
00249           mfcc->segmented = TRUE;
00250           break_decode = TRUE;
00251         }
00252       }
00253     }
00254   }
00255 
00256   /* セグメントすべきかどうか最終的な判定を行う.
00257      デコーダベースVADあるいは spsegment の場合,複数インスタンス間で OR
00258      を取る.また,GMMなど複数基準がある場合は基準間で AND を取る.*/
00259   /* determine whether to segment at here
00260      If multiple segmenter exists, take their AND */
00261   break_flag = FALSE;
00262   if (break_decode
00263 #ifdef GMM_VAD
00264       || (recog->gmm != NULL && break_gmm)
00265 #endif
00266       ) {
00267     break_flag = TRUE;
00268   }
00269 
00270   if (break_flag) {
00271     /* 探索処理の終了が発生したのでここで認識を終える. 
00272        最初のフレームから [f-1] 番目までが認識されたことになる
00273     */
00274     /* the recognition process tells us to stop recognition, so
00275        recognition should be terminated here.
00276        the recognized data are [0..f-1] */
00277 
00278     /* 最終フレームを last_time にセット */
00279     /* set the last frame to last_time */
00280     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00281       mfcc->last_time = mfcc->f - 1;
00282     }
00283 
00284     if (! recog->jconf->decodeopt.segment) {
00285       /* ショートポーズ以外で切れた場合,残りのサンプルは認識せずに捨てる */
00286       /* drop rest inputs if segmented by error */
00287       for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00288         mfcc->param->header.samplenum = mfcc->f;
00289         mfcc->param->samplenum = mfcc->f;
00290       }
00291     }
00292 
00293     return 1;
00294   }
00295 
00296   /* call frame-wise callback for the processing results if any */
00297 #ifdef DETERMINE
00298   ok_p = FALSE;
00299   for(p=recog->process_list;p;p=p->next) {
00300     if (!p->live) continue;
00301     if (p->have_determine) {
00302       ok_p = TRUE;
00303     }
00304   }
00305   if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, recog);
00306 #endif
00307   ok_p = FALSE;
00308   for(p=recog->process_list;p;p=p->next) {
00309     if (!p->live) continue;
00310     if (p->have_interim) {
00311       ok_p = TRUE;
00312     }
00313   }
00314   if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog);
00315   
00316   return 0;
00317 }
00318 
00319 #ifdef POWER_REJECT
00320 boolean
00321 power_reject(Recog *recog)
00322 {
00323   MFCCCalc *mfcc;
00324 
00325   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00326     /* skip if not realtime and raw file processing */
00327     if (mfcc->avg_power == 0.0) continue;
00328     if (debug2_flag) jlog("STAT: power_reject: MFCC%02d: avg_power = %f\n", mfcc->id, mfcc->avg_power / mfcc->param->samplenum);
00329     if (mfcc->avg_power / mfcc->param->samplenum < recog->jconf->reject.powerthres) return TRUE;
00330   }
00331   return FALSE;
00332 }
00333 #endif
00334 
00371 void
00372 decode_end_segmented(Recog *recog)
00373 {
00374   boolean ok_p;
00375   int mseclen;
00376   RecogProcess *p;
00377   int last_status;
00378 
00379   /* rejectshort 指定時, 入力が短ければここで第1パス結果を出力しない */
00380   /* suppress 1st pass output if -rejectshort and input shorter than specified */
00381   ok_p = TRUE;
00382   if (recog->jconf->reject.rejectshortlen > 0) {
00383     mseclen = (float)recog->mfcclist->last_time * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0;
00384     if (mseclen < recog->jconf->reject.rejectshortlen) {
00385       last_status = J_RESULT_STATUS_REJECT_SHORT;
00386       ok_p = FALSE;
00387     }
00388   }
00389 
00390 #ifdef POWER_REJECT
00391   if (ok_p) {
00392     if (power_reject(recog)) {
00393       last_status = J_RESULT_STATUS_REJECT_POWER;
00394       ok_p = FALSE;
00395     }
00396   }
00397 #endif
00398 
00399   if (ok_p) {
00400     for(p=recog->process_list;p;p=p->next) {
00401       if (!p->live) continue;
00402       finalize_1st_pass(p, p->am->mfcc->last_time);
00403     }
00404   } else {
00405     for(p=recog->process_list;p;p=p->next) {
00406       if (!p->live) continue;
00407       p->result.status = last_status;
00408     }
00409   }
00410   if (recog->jconf->decodeopt.segment) {
00411     finalize_segment(recog);
00412   }
00413 
00414   if (recog->gmm != NULL) {
00415     /* GMM 計算の終了 */
00416     gmm_end(recog);
00417   }
00418 }
00419 
00449 void
00450 decode_end(Recog *recog)
00451 {
00452   MFCCCalc *mfcc;
00453   int mseclen;
00454   boolean ok_p;
00455   RecogProcess *p;
00456   int last_status;
00457 
00458   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00459     mfcc->segmented = FALSE;
00460   }
00461 
00462   if (recog->gmm != NULL) {
00463     /* GMM 計算の終了 */
00464     gmm_end(recog);
00465   }
00466 
00467 #ifdef GMM_VAD
00468   /* もしトリガがかからないまま入力終了に達したのなら,そのままエラー終了 */
00469   if (recog->jconf->decodeopt.segment) {
00470     if (recog->gmm) {
00471       if (recog->gc->after_trigger == FALSE) {
00472         for(p=recog->process_list;p;p=p->next) {
00473           p->result.status = J_RESULT_STATUS_ONLY_SILENCE;      /* reject by decoding */
00474         }
00475         /* ショートポーズセグメンテーションの場合,
00476            入力パラメータ分割などの最終処理も行なう */
00477         /* When short-pause segmentation enabled */
00478         finalize_segment(recog);
00479         return;
00480       }
00481     }
00482   }
00483 #endif
00484 
00485   /* 第1パスの最後のフレームの認識処理を行う */
00486   /* finalize 1st pass */
00487   for(p=recog->process_list;p;p=p->next) {
00488     if (!p->live) continue;
00489 #ifdef SPSEGMENT_NAIST
00490     if (recog->jconf->decodeopt.segment) {
00491       if (p->pass1.after_trigger == FALSE) continue;
00492     }
00493 #endif
00494     mfcc = p->am->mfcc;
00495     if (mfcc->f > 0) {
00496       get_back_trellis_end(mfcc->param, p);
00497     }
00498   }
00499 
00500   /* 終了処理 */
00501   for(p=recog->process_list;p;p=p->next) {
00502     if (!p->live) continue;
00503 
00504     ok_p = TRUE;
00505 
00506     /* check rejection by no input */
00507     if (ok_p) {
00508       mfcc = p->am->mfcc;
00509       /* 入力長がデルタの計算に十分でない場合,入力無しとする. */
00510       /* if input is short for compute all the delta coeff., terminate here */
00511       if (mfcc->f == 0) {
00512         jlog("STAT: no input frame\n");
00513         last_status = J_RESULT_STATUS_FAIL;
00514         ok_p = FALSE;
00515       }
00516     }
00517 
00518     /* check rejection by input length */
00519     if (ok_p) {
00520       if (recog->jconf->reject.rejectshortlen > 0) {
00521         mseclen = (float)mfcc->param->samplenum * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0;
00522         if (mseclen < recog->jconf->reject.rejectshortlen) {
00523           last_status = J_RESULT_STATUS_REJECT_SHORT;
00524           ok_p = FALSE;
00525         }
00526       }
00527     }
00528 
00529 #ifdef POWER_REJECT
00530     /* check rejection by average power */
00531     if (ok_p) {
00532       if (power_reject(recog)) {
00533         last_status = J_RESULT_STATUS_REJECT_POWER;
00534         ok_p = FALSE;
00535       }
00536     }
00537 #endif
00538 
00539 #ifdef SPSEGMENT_NAIST
00540     /* check rejection non-triggered input segment */
00541     if (ok_p) {
00542       if (recog->jconf->decodeopt.segment) {
00543         if (p->pass1.after_trigger == FALSE) {
00544           last_status = J_RESULT_STATUS_ONLY_SILENCE;   /* reject by decoding */
00545           ok_p = FALSE;
00546         }
00547       }
00548     }
00549 #endif
00550 
00551     if (ok_p) {
00552       /* valid input segment, finalize it */
00553       finalize_1st_pass(p, mfcc->param->samplenum);
00554     } else {
00555       /* invalid input segment */
00556       p->result.status = last_status;
00557     }
00558   }
00559   if (recog->jconf->decodeopt.segment) {
00560     /* ショートポーズセグメンテーションの場合,
00561        入力パラメータ分割などの最終処理も行なう */
00562     /* When short-pause segmentation enabled */
00563     finalize_segment(recog);
00564   }
00565 }
00566 
00567 
00601 boolean
00602 get_back_trellis(Recog *recog)
00603 {
00604   boolean ok_p;
00605   MFCCCalc *mfcc;
00606   int rewind_frame;
00607   PROCESS_AM *am;
00608   boolean reprocess;
00609 
00610   /* initialize mfcc instances */
00611   for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00612     /* mark all as valid, since all frames are fully prepared beforehand */
00613     if (mfcc->param->samplenum == 0) mfcc->valid = FALSE;
00614     else mfcc->valid = TRUE;
00615     /* set frame pointers to 0 */
00616     mfcc->f = 0;
00617   }
00618 
00619   /* callback of process start */
00620 #ifdef BACKEND_VAD
00621   if (recog->jconf->decodeopt.segment) {
00622     /* at first time, recognition does not start yet */
00623     /* reset segmentation flags */
00624     spsegment_init(recog);
00625   } else {
00626     /* execute callback for pass1 begin here */
00627     callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00628     callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00629     recog->triggered = TRUE;
00630   }
00631 #else
00632   if (recog->jconf->decodeopt.segment) {
00633     if (!recog->process_segment) {
00634       callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00635     }
00636     callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00637   } else {
00638     callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00639   }
00640   callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00641   recog->triggered = TRUE;
00642 #endif
00643 
00644   while(1) {
00645     ok_p = TRUE;
00646     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00647       if (! mfcc->valid) continue;
00648       if (mfcc->f < mfcc->param->samplenum) {
00649         mfcc->valid = TRUE;
00650         ok_p = FALSE;
00651       } else {
00652         mfcc->valid = FALSE;
00653       }
00654     }
00655     if (ok_p) {
00656       /* すべての MFCC が終わりに達したのでループ終了 */
00657       /* all MFCC has been processed, end of loop  */
00658       break;
00659     }
00660 
00661     switch (decode_proceed(recog)) {
00662     case -1: /* error */
00663       return FALSE;
00664       break;
00665     case 0:                     /* success */
00666       break;
00667     case 1:                     /* segmented */
00668       /* 探索中断: 処理された入力は 0 から t-2 まで */
00669       /* search terminated: processed input = [0..t-2] */
00670       /* この時点で第1パスを終了する */
00671       /* end the 1st pass at this point */
00672       decode_end_segmented(recog);
00673       /* terminate 1st pass here */
00674       return TRUE;
00675     }
00676 
00677 #ifdef BACKEND_VAD
00678     /* check up trigger in case of VAD segmentation */
00679     if (recog->jconf->decodeopt.segment) {
00680       if (recog->triggered == FALSE) {
00681         if (spsegment_trigger_sync(recog)) {
00682           if (!recog->process_segment) {
00683             callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00684           }
00685           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00686           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00687           recog->triggered = TRUE;
00688         }
00689       }
00690     }
00691 #endif
00692 
00693     if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) {
00694       /* do rewind for all mfcc here */
00695       spsegment_restart_mfccs(recog, rewind_frame, reprocess);
00696       /* reset outprob cache for all AM */
00697       for(am=recog->amlist;am;am=am->next) {
00698         outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
00699       }
00700     }
00701     /* call frame-wise callback */
00702     callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00703 
00704     /* 1フレーム処理が進んだのでポインタを進める */
00705     /* proceed frame pointer */
00706     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00707       if (!mfcc->valid) continue;
00708       mfcc->f++;
00709     }
00710 
00711     if (recog->process_want_terminate) {
00712       /* termination requested */
00713       decode_end_segmented(recog);
00714       return TRUE;
00715     }
00716   }
00717 
00718   /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
00719   decode_end(recog);
00720 
00721   return TRUE;
00722 }
00723 
00724 /* end of file */