Julius 4.2
|
00001 00019 /* 00020 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00021 * Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan 00022 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00023 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00024 * All rights reserved 00025 */ 00161 #define GLOBAL_VARIABLE_DEFINE ///< Actually make global vars in global.h 00162 #include <julius/julius.h> 00163 #include <signal.h> 00164 #if defined(_WIN32) && !defined(__CYGWIN32__) 00165 #include <mbctype.h> 00166 #include <mbstring.h> 00167 #endif 00168 00169 /* ---------- utility functions -----------------------------------------*/ 00170 #ifdef REPORT_MEMORY_USAGE 00171 00181 static void 00182 print_mem() 00183 { 00184 char buf[200]; 00185 sprintf(buf,"ps -o vsz,rss -p %d",getpid()); 00186 system(buf); 00187 fflush(stdout); 00188 fflush(stderr); 00189 } 00190 #endif 00191 00192 00209 SentenceAlign * 00210 result_align_new() 00211 { 00212 SentenceAlign *new; 00213 new = (SentenceAlign *)mymalloc(sizeof(SentenceAlign)); 00214 new->w = NULL; 00215 new->ph = NULL; 00216 new->loc = NULL; 00217 new->begin_frame = NULL; 00218 new->end_frame = NULL; 00219 new->avgscore = NULL; 00220 new->is_iwsp = NULL; 00221 new->next = NULL; 00222 return new; 00223 } 00224 00241 void 00242 result_align_free(SentenceAlign *a) 00243 { 00244 if (a->w) free(a->w); 00245 if (a->ph) free(a->ph); 00246 if (a->loc) free(a->loc); 00247 if (a->begin_frame) free(a->begin_frame); 00248 if (a->end_frame) free(a->end_frame); 00249 if (a->avgscore) free(a->avgscore); 00250 if (a->is_iwsp) free(a->is_iwsp); 00251 free(a); 00252 } 00253 00269 void 00270 result_sentence_malloc(RecogProcess *r, int num) 00271 { 00272 int i; 00273 r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num); 00274 for(i=0;i<num;i++) r->result.sent[i].align = NULL; 00275 r->result.sentnum = 0; 00276 } 00277 00291 void 00292 result_sentence_free(RecogProcess *r) 00293 { 00294 int i; 00295 SentenceAlign *a, *atmp; 00296 if (r->result.sent) { 00297 for(i=0;i<r->result.sentnum;i++) { 00298 a = r->result.sent[i].align; 00299 while(a) { 00300 atmp = a->next; 00301 result_align_free(a); 00302 a = atmp; 00303 } 00304 } 00305 free(r->result.sent); 00306 r->result.sent = NULL; 00307 } 00308 } 00309 00323 void 00324 clear_result(RecogProcess *r) 00325 { 00326 #ifdef WORD_GRAPH 00327 /* clear 1st pass word graph output */ 00328 wordgraph_clean(&(r->result.wg1)); 00329 #endif 00330 00331 if (r->lmvar == LM_DFA_WORD) { 00332 if (r->result.status == J_RESULT_STATUS_SUCCESS) { 00333 /* clear word recog result of first pass as in final result */ 00334 free(r->result.sent); 00335 } 00336 } else { 00337 if (r->graphout) { 00338 if (r->config->graph.confnet) { 00339 /* free confusion network clusters */ 00340 cn_free_all(&(r->result.confnet)); 00341 } else if (r->config->graph.lattice) { 00342 } 00343 /* clear all wordgraph */ 00344 wordgraph_clean(&(r->result.wg)); 00345 } 00346 result_sentence_free(r); 00347 } 00348 } 00349 00350 /* --------------------- speech buffering ------------------ */ 00351 00384 int 00385 adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog) 00386 { 00387 if (recog->speechlen == 0) { /* first part of a segment */ 00388 if (!recog->process_active) { 00389 return(1); 00390 } 00391 } 00392 00393 if (recog->speechlen + len > recog->speechalloclen) { 00394 while (recog->speechlen + len > recog->speechalloclen) { 00395 recog->speechalloclen += MAX_SPEECH_ALLOC_STEP; 00396 } 00397 if (recog->speech == NULL) { 00398 recog->speech = (SP16 *)mymalloc(sizeof(SP16) * recog->speechalloclen); 00399 } else { 00400 if (debug2_flag) { 00401 jlog("STAT: expanding recog->speech to %d samples\n", recog->speechalloclen); 00402 } 00403 recog->speech = (SP16 *)myrealloc(recog->speech, sizeof(SP16) * recog->speechalloclen); 00404 } 00405 } 00406 00407 /* store now[0..len] to recog->speech[recog->speechlen] */ 00408 memcpy(&(recog->speech[recog->speechlen]), now, len * sizeof(SP16)); 00409 recog->speechlen += len; 00410 return(0); /* tell adin_go to continue reading */ 00411 } 00412 00413 00414 /* --------------------- adin check callback --------------- */ 00442 static int 00443 callback_check_in_adin(Recog *recog) 00444 { 00445 /* module: check command and terminate recording when requested */ 00446 callback_exec(CALLBACK_POLL, recog); 00447 /* With audio input via adinnet, TERMINATE command will issue terminate 00448 command to the adinnet client. The client then stops recording 00449 immediately and return end-of-segment ack. Then it will cause this 00450 process to stop recognition as normal. So we need not to 00451 perform immediate termination at this callback, but just ignore the 00452 results in the main.c. */ 00453 #if 1 00454 if (recog->process_want_terminate) { /* TERMINATE ... force termination */ 00455 return(-2); 00456 } 00457 if (recog->process_want_reload) { 00458 return(-1); 00459 } 00460 #else 00461 if (recog->process_want_terminate /* TERMINATE ... force termination */ 00462 && recog->jconf->input.speech_input != SP_ADINNET) { 00463 return(-2); 00464 } 00465 if (recog->process_want_reload) { 00466 return(-1); 00467 } 00468 #endif 00469 return(0); 00470 } 00471 00472 /*********************/ 00473 /* open input stream */ 00474 /*********************/ 00492 int 00493 j_open_stream(Recog *recog, char *file_or_dev_name) 00494 { 00495 Jconf *jconf; 00496 char *p; 00497 00498 jconf = recog->jconf; 00499 00500 if (jconf->input.type == INPUT_WAVEFORM) { 00501 /* begin A/D input */ 00502 if (adin_begin(recog->adin, file_or_dev_name) == FALSE) { 00503 return -2; 00504 } 00505 /* create A/D-in thread here */ 00506 #ifdef HAVE_PTHREAD 00507 if (recog->adin->enable_thread && ! recog->adin->input_side_segment) { 00508 if (adin_thread_create(recog) == FALSE) { 00509 return -2; 00510 } 00511 } 00512 #endif 00513 /* when using adin func, input name should be obtained when called */ 00514 } else { 00515 switch(jconf->input.speech_input) { 00516 case SP_MFCMODULE: 00517 param_init_content(recog->mfcclist->param); 00518 if (mfc_module_begin(recog->mfcclist) == FALSE) return -2; 00519 /* when using mfc module func, input name should be obtained when called */ 00520 break; 00521 case SP_MFCFILE: 00522 /* read parameter file */ 00523 param_init_content(recog->mfcclist->param); 00524 if (rdparam(file_or_dev_name, recog->mfcclist->param) == FALSE) { 00525 jlog("ERROR: error in reading parameter file: %s\n", file_or_dev_name); 00526 return -1; 00527 } 00528 /* check and strip invalid frames */ 00529 if (jconf->preprocess.strip_zero_sample) { 00530 param_strip_zero(recog->mfcclist->param); 00531 } 00532 /* output frame length */ 00533 callback_exec(CALLBACK_STATUS_PARAM, recog); 00534 /* store the input filename here */ 00535 strncpy(recog->adin->current_input_name, file_or_dev_name, MAXPATHLEN); 00536 break; 00537 default: 00538 jlog("ERROR: j_open_stream: none of SP_MFC_*??\n"); 00539 return -1; 00540 } 00541 } 00542 00543 if (jconf->input.speech_input != SP_MFCFILE) { 00544 /* store current input name using input source specific function */ 00545 p = j_get_current_filename(recog); 00546 if (p) { 00547 strncpy(recog->adin->current_input_name, p, MAXPATHLEN); 00548 } else { 00549 recog->adin->current_input_name[0] = '\0'; 00550 } 00551 } 00552 00553 return 0; 00554 00555 } 00556 00574 int 00575 j_close_stream(Recog *recog) 00576 { 00577 Jconf *jconf; 00578 00579 jconf = recog->jconf; 00580 00581 if (jconf->input.type == INPUT_WAVEFORM) { 00582 #ifdef HAVE_PTHREAD 00583 /* close A/D-in thread here */ 00584 if (! recog->adin->input_side_segment) { 00585 if (recog->adin->enable_thread) { 00586 if (adin_thread_cancel(recog) == FALSE) { 00587 return -2; 00588 } 00589 } else { 00590 recog->adin->end_of_stream = TRUE; 00591 } 00592 } 00593 #else 00594 if (! recog->adin->input_side_segment) { 00595 recog->adin->end_of_stream = TRUE; 00596 } 00597 #endif 00598 } else { 00599 switch(jconf->input.speech_input) { 00600 case SP_MFCMODULE: 00601 if (mfc_module_end(recog->mfcclist) == FALSE) return -2; 00602 break; 00603 case SP_MFCFILE: 00604 /* nothing to do */ 00605 break; 00606 default: 00607 jlog("ERROR: j_close_stream: none of SP_MFC_*??\n"); 00608 return -1; 00609 } 00610 } 00611 00612 return 0; 00613 00614 } 00615 00616 /**********************************************************************/ 00617 /**********************************************************************/ 00618 /**********************************************************************/ 00619 00632 static void 00633 result_error(Recog *recog, int status) 00634 { 00635 MFCCCalc *mfcc; 00636 RecogProcess *r; 00637 boolean ok_p; 00638 00639 for(r=recog->process_list;r;r=r->next) r->result.status = status; 00640 00641 ok_p = FALSE; 00642 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00643 if (mfcc->f > 0) { 00644 ok_p = TRUE; 00645 break; 00646 } 00647 } 00648 if (ok_p) { /* had some input */ 00649 /* output as rejected */ 00650 callback_exec(CALLBACK_RESULT, recog); 00651 #ifdef ENABLE_PLUGIN 00652 plugin_exec_process_result(recog); 00653 #endif 00654 } 00655 } 00656 00692 static int 00693 j_recognize_stream_core(Recog *recog) 00694 { 00695 Jconf *jconf; 00696 int ret; 00697 float seclen, mseclen; 00698 RecogProcess *r; 00699 MFCCCalc *mfcc; 00700 PROCESS_AM *am; 00701 PROCESS_LM *lm; 00702 boolean ok_p; 00703 boolean process_segment_last; 00704 boolean on_the_fly; 00705 boolean pass2_p; 00706 00707 jconf = recog->jconf; 00708 00709 /* determine whether on-the-fly decoding should be done */ 00710 on_the_fly = FALSE; 00711 switch(jconf->input.type) { 00712 case INPUT_VECTOR: 00713 switch(jconf->input.speech_input) { 00714 case SP_MFCFILE: 00715 on_the_fly = FALSE; 00716 break; 00717 case SP_MFCMODULE: 00718 on_the_fly = TRUE; 00719 break; 00720 } 00721 break; 00722 case INPUT_WAVEFORM: 00723 if (jconf->decodeopt.realtime_flag) { 00724 on_the_fly = TRUE; 00725 } else { 00726 on_the_fly = FALSE; 00727 } 00728 break; 00729 } 00730 00731 if (jconf->input.type == INPUT_WAVEFORM || jconf->input.speech_input == SP_MFCMODULE) { 00732 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00733 param_init_content(mfcc->param); 00734 } 00735 } 00736 00737 /* if no process instance exist, start with terminated */ 00738 if (recog->process_list == NULL) { 00739 jlog("STAT: no recog process, engine inactive\n"); 00740 j_request_pause(recog); 00741 } 00742 00743 /* update initial recognition process status */ 00744 for(r=recog->process_list;r;r=r->next) { 00745 if (r->active > 0) { 00746 r->live = TRUE; 00747 } else if (r->active < 0) { 00748 r->live = FALSE; 00749 } 00750 r->active = 0; 00751 } 00752 00753 /******************************************************************/ 00754 /* do recognition for each incoming segment from the input stream */ 00755 /******************************************************************/ 00756 while (1) { 00757 00758 start_recog: 00759 00760 /*************************************/ 00761 /* Update recognition process status */ 00762 /*************************************/ 00763 for(r=recog->process_list;r;r=r->next) { 00764 if (r->active > 0) { 00765 r->live = TRUE; 00766 jlog("STAT: SR%02d %s now active\n", r->config->id, r->config->name); 00767 } else if (r->active < 0) { 00768 r->live = FALSE; 00769 jlog("STAT: SR%02d %s now inactive\n", r->config->id, r->config->name); 00770 } 00771 r->active = 0; 00772 } 00773 if (debug2_flag) { 00774 for(r=recog->process_list;r;r=r->next) { 00775 jlog("DEBUG: %s: SR%02d %s\n", r->live ? "live" : "dead", r->config->id, r->config->name); 00776 } 00777 } 00778 /* check if any process is live */ 00779 if (recog->process_active) { 00780 ok_p = FALSE; 00781 for(r=recog->process_list;r;r=r->next) { 00782 if (r->live) ok_p = TRUE; 00783 } 00784 if (!ok_p) { /* no process is alive */ 00785 /* make whole process as inactive */ 00786 jlog("STAT: all recog process inactive, pause engine now\n"); 00787 j_request_pause(recog); 00788 } 00789 } 00790 00791 /* Check whether process status was changed while in the last run */ 00792 if (recog->process_online != recog->process_active) { 00793 recog->process_online = recog->process_active; 00794 if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog); 00795 else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog); 00796 } 00797 /* execute poll callback */ 00798 if (recog->process_active) { 00799 callback_exec(CALLBACK_POLL, recog); 00800 } 00801 /* reset reload flag here */ 00802 j_reset_reload(recog); 00803 00804 if (!recog->process_active) { 00805 /* now sleeping, return */ 00806 /* in the next call, we will resume from here */ 00807 return 1; 00808 } 00809 /* update process status */ 00810 if (recog->process_online != recog->process_active) { 00811 recog->process_online = recog->process_active; 00812 if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog); 00813 else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog); 00814 } 00815 00816 /*********************************************************/ 00817 /* check for grammar to change, and rebuild if necessary */ 00818 /*********************************************************/ 00819 for(lm=recog->lmlist;lm;lm=lm->next) { 00820 if (lm->lmtype == LM_DFA) { 00821 multigram_update(lm); /* some modification occured if return TRUE*/ 00822 } 00823 } 00824 for(r=recog->process_list;r;r=r->next) { 00825 if (!r->live) continue; 00826 if (r->lmtype == LM_DFA && r->lm->global_modified) { 00827 multigram_build(r); 00828 } 00829 } 00830 for(lm=recog->lmlist;lm;lm=lm->next) { 00831 if (lm->lmtype == LM_DFA) lm->global_modified = FALSE; 00832 } 00833 00834 ok_p = FALSE; 00835 for(r=recog->process_list;r;r=r->next) { 00836 if (!r->live) continue; 00837 if (r->lmtype == LM_DFA) { 00838 if (r->lm->winfo == NULL || 00839 (r->lmvar == LM_DFA_GRAMMAR && r->lm->dfa == NULL)) { 00840 /* make this instance inactive */ 00841 r->active = -1; 00842 ok_p = TRUE; 00843 } 00844 } 00845 } 00846 if (ok_p) { /* at least one instance has no grammar */ 00847 goto start_recog; 00848 } 00849 00850 00851 /******************/ 00852 /* start 1st pass */ 00853 /******************/ 00854 if (on_the_fly) { 00855 00856 /********************************************/ 00857 /* REALTIME ON-THE-FLY DECODING OF 1ST-PASS */ 00858 /********************************************/ 00859 /* store, analysis and search in a pipeline */ 00860 /* main function is RealTimePipeLine() at realtime-1stpass.c, and 00861 it will be periodically called for each incoming input segment 00862 from the AD-in function adin_go(). RealTimePipeLine() will be 00863 called as a callback function from adin_go() */ 00864 /* after this part, directly jump to the beginning of the 2nd pass */ 00865 00866 if (recog->process_segment) { 00867 /*****************************************************************/ 00868 /* short-pause segmentation: process last remaining frames first */ 00869 /*****************************************************************/ 00870 /* last was segmented by short pause */ 00871 /* the margin segment in the last input will be re-processed first, 00872 and then the speech input will be processed */ 00873 /* process the last remaining parameters */ 00874 ret = RealTimeResume(recog); 00875 if (ret < 0) { /* error end in the margin */ 00876 jlog("ERROR: failed to process last remaining samples on RealTimeResume\n"); /* exit now! */ 00877 return -1; 00878 } 00879 if (ret != 1) { /* if segmented again in the margin, not process the rest */ 00880 /* last parameters has been processed, so continue with the 00881 current input as normal */ 00882 /* process the incoming input */ 00883 if (jconf->input.type == INPUT_WAVEFORM) { 00884 /* get speech and process it on real-time */ 00885 ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog); 00886 } else { 00887 /* get feature vector and process it */ 00888 ret = mfcc_go(recog, callback_check_in_adin); 00889 } 00890 if (ret < 0) { /* error end in adin_go */ 00891 if (ret == -2 || recog->process_want_terminate) { 00892 /* terminated by callback */ 00893 RealTimeTerminate(recog); 00894 /* reset param */ 00895 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00896 param_init_content(mfcc->param); 00897 } 00898 /* execute callback at end of pass1 */ 00899 if (recog->triggered) { 00900 callback_exec(CALLBACK_EVENT_PASS1_END, recog); 00901 /* output result terminate */ 00902 result_error(recog, J_RESULT_STATUS_TERMINATE); 00903 } 00904 goto end_recog; /* cancel this recognition */ 00905 } 00906 jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n"); /* exit now! */ 00907 return(-1); 00908 } 00909 } 00910 00911 } else { 00912 00913 /***********************************************************/ 00914 /* last was not segmented, process the new incoming input */ 00915 /***********************************************************/ 00916 /* end of this input will be determined by either end of stream 00917 (in case of file input), or silence detection by adin_go(), or 00918 'TERMINATE' command from module (if module mode) */ 00919 /* prepare work area for on-the-fly processing */ 00920 if (RealTimePipeLinePrepare(recog) == FALSE) { 00921 jlog("ERROR: failed to prepare for on-the-fly 1st pass decoding\n"); 00922 return (-1); 00923 } 00924 /* process the incoming input */ 00925 if (jconf->input.type == INPUT_WAVEFORM) { 00926 /* get speech and process it on real-time */ 00927 ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog); 00928 } else { 00929 /* get feature vector and process it */ 00930 ret = mfcc_go(recog, callback_check_in_adin); 00931 } 00932 00933 if (ret < 0) { /* error end in adin_go */ 00934 if (ret == -2 || recog->process_want_terminate) { 00935 /* terminated by callback */ 00936 RealTimeTerminate(recog); 00937 /* reset param */ 00938 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00939 param_init_content(mfcc->param); 00940 } 00941 /* execute callback at end of pass1 */ 00942 if (recog->triggered) { 00943 callback_exec(CALLBACK_EVENT_PASS1_END, recog); 00944 /* output result terminate */ 00945 result_error(recog, J_RESULT_STATUS_TERMINATE); 00946 } 00947 goto end_recog; 00948 } 00949 jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n"); /* exit now! */ 00950 return(-1); 00951 } 00952 } 00953 /******************************************************************/ 00954 /* speech stream has been processed on-the-fly, and 1st pass ends */ 00955 /******************************************************************/ 00956 if (ret == 1 || ret == 2) { /* segmented */ 00957 #ifdef HAVE_PTHREAD 00958 if (recog->adin->adinthread_buffer_overflowed) { 00959 jlog("Warning: input buffer overflow, disgard the input\n"); 00960 result_error(recog, J_RESULT_STATUS_BUFFER_OVERFLOW); 00961 /* skip 2nd pass */ 00962 goto end_recog; 00963 } 00964 #endif 00965 /* check for audio overflow */ 00966 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { 00967 if (mfcc->f >= recog->real.maxframelen) { 00968 jlog("Warning: input buffer overflow, disgard the input\n"); 00969 result_error(recog, J_RESULT_STATUS_BUFFER_OVERFLOW); 00970 /* skip 2nd pass */ 00971 goto end_recog; 00972 } 00973 } 00974 } 00975 /* last procedure of 1st-pass */ 00976 if (RealTimeParam(recog) == FALSE) { 00977 jlog("ERROR: fatal error occured, program terminates now\n"); 00978 return -1; 00979 } 00980 00981 #ifdef BACKEND_VAD 00982 /* if not triggered, skip this segment */ 00983 if (recog->jconf->decodeopt.segment && ! recog->triggered) { 00984 goto end_recog; 00985 } 00986 #endif 00987 00988 /* execute callback for 1st pass result */ 00989 /* result.status <0 must be skipped inside callback */ 00990 callback_exec(CALLBACK_RESULT_PASS1, recog); 00991 #ifdef WORD_GRAPH 00992 /* result.wg1 == NULL should be skipped inside callback */ 00993 callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog); 00994 #endif 00995 /* execute callback at end of pass1 */ 00996 callback_exec(CALLBACK_EVENT_PASS1_END, recog); 00997 /* output frame length */ 00998 callback_exec(CALLBACK_STATUS_PARAM, recog); 00999 /* if terminate signal has been received, discard this input */ 01000 if (recog->process_want_terminate) { 01001 result_error(recog, J_RESULT_STATUS_TERMINATE); 01002 goto end_recog; 01003 } 01004 01005 /* END OF ON-THE-FLY INPUT AND DECODING OF 1ST PASS */ 01006 01007 } else { 01008 01009 /******************/ 01010 /* buffered input */ 01011 /******************/ 01012 01013 if (jconf->input.type == INPUT_VECTOR) { 01014 /***********************/ 01015 /* feature vector input */ 01016 /************************/ 01017 if (jconf->input.speech_input == SP_MFCFILE) { 01018 /************************/ 01019 /* parameter file input */ 01020 /************************/ 01021 /* parameter type check --- compare the type to that of HMM, 01022 and adjust them if necessary */ 01023 if (jconf->input.paramtype_check_flag) { 01024 for(am=recog->amlist;am;am=am->next) { 01025 /* return param itself or new malloced param */ 01026 if (param_check_and_adjust(am->hmminfo, am->mfcc->param, verbose_flag) == -1) { /* failed */ 01027 01028 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01029 param_init_content(mfcc->param); 01030 } 01031 /* tell failure */ 01032 result_error(recog, J_RESULT_STATUS_FAIL); 01033 goto end_recog; 01034 } 01035 } 01036 } 01037 /* whole input is already read, so set input status to end of stream */ 01038 /* and jump to the start point of 1st pass */ 01039 ret = 0; 01040 } 01041 } else { 01042 /*************************/ 01043 /* buffered speech input */ 01044 /*************************/ 01045 if (!recog->process_segment) { /* no segment left */ 01046 01047 /****************************************/ 01048 /* store raw speech samples to speech[] */ 01049 /****************************************/ 01050 recog->speechlen = 0; 01051 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01052 param_init_content(mfcc->param); 01053 } 01054 /* tell module to start recording */ 01055 /* the "adin_cut_callback_store_buffer" simply stores 01056 the input speech to a buffer "speech[]" */ 01057 /* end of this input will be determined by either end of stream 01058 (in case of file input), or silence detection by adin_go(), or 01059 'TERMINATE' command from module (if module mode) */ 01060 ret = adin_go(adin_cut_callback_store_buffer, callback_check_in_adin, recog); 01061 if (ret < 0) { /* error end in adin_go */ 01062 if (ret == -2 || recog->process_want_terminate) { 01063 /* terminated by module */ 01064 /* output fail */ 01065 result_error(recog, J_RESULT_STATUS_TERMINATE); 01066 goto end_recog; 01067 } 01068 jlog("ERROR: an error occured while recording input\n"); 01069 return -1; 01070 } 01071 01072 /* output recorded length */ 01073 seclen = (float)recog->speechlen / (float)jconf->input.sfreq; 01074 jlog("STAT: %d samples (%.2f sec.)\n", recog->speechlen, seclen); 01075 01076 /* -rejectshort 指定時, 入力が指定時間以下であれば 01077 ここで入力を棄却する */ 01078 /* when using "-rejectshort", and input was shorter than 01079 specified, reject the input here */ 01080 if (jconf->reject.rejectshortlen > 0) { 01081 if (seclen * 1000.0 < jconf->reject.rejectshortlen) { 01082 result_error(recog, J_RESULT_STATUS_REJECT_SHORT); 01083 goto end_recog; 01084 } 01085 } 01086 01087 /**********************************************/ 01088 /* acoustic analysis and encoding of speech[] */ 01089 /**********************************************/ 01090 jlog("STAT: ### speech analysis (waveform -> MFCC)\n"); 01091 /* CMN will be computed for the whole buffered input */ 01092 if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) { 01093 /* error end, end stream */ 01094 ret = -1; 01095 /* tell failure */ 01096 result_error(recog, J_RESULT_STATUS_FAIL); 01097 goto end_recog; 01098 } 01099 01100 /* if terminate signal has been received, cancel this input */ 01101 if (recog->process_want_terminate) { 01102 result_error(recog, J_RESULT_STATUS_TERMINATE); 01103 goto end_recog; 01104 } 01105 01106 /* output frame length */ 01107 callback_exec(CALLBACK_STATUS_PARAM, recog); 01108 } 01109 } 01110 01111 #ifdef ENABLE_PLUGIN 01112 /* call post-process plugin if exist */ 01113 plugin_exec_vector_postprocess_all(recog->mfcclist->param); 01114 #endif 01115 01116 /******************************************************/ 01117 /* 1st-pass --- backward search to compute heuristics */ 01118 /******************************************************/ 01119 if (!jconf->decodeopt.realtime_flag) { 01120 /* prepare for outprob cache for each HMM state and time frame */ 01121 /* assume all MFCCCalc has params of the same sample num */ 01122 for(am=recog->amlist;am;am=am->next) { 01123 outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); 01124 } 01125 } 01126 01127 /* if terminate signal has been received, cancel this input */ 01128 if (recog->process_want_terminate) { 01129 result_error(recog, J_RESULT_STATUS_TERMINATE); 01130 goto end_recog; 01131 } 01132 01133 /* execute computation of left-to-right backtrellis */ 01134 if (get_back_trellis(recog) == FALSE) { 01135 jlog("ERROR: fatal error occured, program terminates now\n"); 01136 return -1; 01137 } 01138 #ifdef BACKEND_VAD 01139 /* if not triggered, skip this segment */ 01140 if (recog->jconf->decodeopt.segment && ! recog->triggered) { 01141 goto end_recog; 01142 } 01143 #endif 01144 01145 /* execute callback for 1st pass result */ 01146 /* result.status <0 must be skipped inside callback */ 01147 callback_exec(CALLBACK_RESULT_PASS1, recog); 01148 #ifdef WORD_GRAPH 01149 /* result.wg1 == NULL should be skipped inside callback */ 01150 callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog); 01151 #endif 01152 01153 /* execute callback at end of pass1 */ 01154 if (recog->triggered) { 01155 callback_exec(CALLBACK_EVENT_PASS1_END, recog); 01156 } 01157 01158 /* END OF BUFFERED 1ST PASS */ 01159 01160 } 01161 01162 /**********************************/ 01163 /* end processing of the 1st-pass */ 01164 /**********************************/ 01165 /* on-the-fly 1st pass processing will join here */ 01166 01167 /* -rejectshort 指定時, 入力が指定時間以下であれば探索失敗として */ 01168 /* 第2パスを実行せずにここで終了する */ 01169 /* when using "-rejectshort", and input was shorter than the specified 01170 length, terminate search here and output recognition failure */ 01171 if (jconf->reject.rejectshortlen > 0) { 01172 mseclen = (float)recog->mfcclist->param->samplenum * (float)jconf->input.period * (float)jconf->input.frameshift / 10000.0; 01173 if (mseclen < jconf->reject.rejectshortlen) { 01174 result_error(recog, J_RESULT_STATUS_REJECT_SHORT); 01175 goto end_recog; 01176 } 01177 } 01178 #ifdef POWER_REJECT 01179 if (power_reject(recog)) { 01180 result_error(recog, J_RESULT_STATUS_REJECT_POWER); 01181 goto end_recog; 01182 } 01183 #endif 01184 01185 /* if terminate signal has been received, cancel this input */ 01186 if (recog->process_want_terminate) { 01187 result_error(recog, J_RESULT_STATUS_TERMINATE); 01188 goto end_recog; 01189 } 01190 01191 /* if GMM is specified and result are to be rejected, terminate search here */ 01192 if (jconf->reject.gmm_reject_cmn_string != NULL) { 01193 if (! gmm_valid_input(recog)) { 01194 result_error(recog, J_RESULT_STATUS_REJECT_GMM); 01195 goto end_recog; 01196 } 01197 } 01198 01199 /* for instances with "-1pass", copy 1st pass result as final */ 01200 /* execute stack-decoding search */ 01201 /* they will be skipepd in the next pass */ 01202 for(r=recog->process_list;r;r=r->next) { 01203 if (!r->live) continue; 01204 /* skip if 1st pass was failed */ 01205 if (r->result.status < 0) continue; 01206 /* already stored on word recognition, so skip this */ 01207 if (r->lmvar == LM_DFA_WORD) continue; 01208 if (r->config->compute_only_1pass) { 01209 if (verbose_flag) { 01210 jlog("%02d %s: \"-1pass\" specified, output 1st pass result as a final result\n", r->config->id, r->config->name); 01211 } 01212 /* prepare result storage */ 01213 result_sentence_malloc(r, 1); 01214 /* finalize result when no hypothesis was obtained */ 01215 pass2_finalize_on_no_result(r, TRUE); 01216 } 01217 } 01218 01219 /***********************************************/ 01220 /* 2nd-pass --- forward search with heuristics */ 01221 /***********************************************/ 01222 pass2_p = FALSE; 01223 for(r=recog->process_list;r;r=r->next) { 01224 if (!r->live) continue; 01225 /* if [-1pass] is specified, skip 2nd pass */ 01226 if (r->config->compute_only_1pass) continue; 01227 /* if search already failed on 1st pass, skip 2nd pass */ 01228 if (r->result.status < 0) continue; 01229 pass2_p = TRUE; 01230 } 01231 if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_BEGIN, recog); 01232 01233 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE) 01234 /* adjust trellis score not to contain outprob of the last frames */ 01235 for(r=recog->process_list;r;r=r->next) { 01236 if (!r->live) continue; 01237 /* if [-1pass] is specified, skip 2nd pass */ 01238 if (r->config->compute_only_1pass) continue; 01239 /* if search already failed on 1st pass, skip 2nd pass */ 01240 if (r->result.status < 0) continue; 01241 if (! r->am->hmminfo->multipath) { 01242 bt_discount_pescore(r->wchmm, r->backtrellis, r->am->mfcc->param); 01243 } 01244 #ifdef LM_FIX_DOUBLE_SCORING 01245 if (r->lmtype == LM_PROB) { 01246 bt_discount_lm(r->backtrellis); 01247 } 01248 #endif 01249 } 01250 #endif 01251 01252 /* execute stack-decoding search */ 01253 for(r=recog->process_list;r;r=r->next) { 01254 if (!r->live) continue; 01255 /* if [-1pass] is specified, just copy from 1st pass result */ 01256 if (r->config->compute_only_1pass) continue; 01257 /* if search already failed on 1st pass, skip 2nd pass */ 01258 if (r->result.status < 0) continue; 01259 /* prepare result storage */ 01260 if (r->lmtype == LM_DFA && r->config->output.multigramout_flag) { 01261 result_sentence_malloc(r, r->config->output.output_hypo_maxnum * multigram_get_all_num(r->lm)); 01262 } else { 01263 result_sentence_malloc(r, r->config->output.output_hypo_maxnum); 01264 } 01265 /* do 2nd pass */ 01266 if (r->lmtype == LM_PROB) { 01267 wchmm_fbs(r->am->mfcc->param, r, 0, 0); 01268 } else if (r->lmtype == LM_DFA) { 01269 if (r->config->output.multigramout_flag) { 01270 /* execute 2nd pass multiple times for each grammar sequencially */ 01271 /* to output result for each grammar */ 01272 MULTIGRAM *m; 01273 boolean has_success = FALSE; 01274 for(m = r->lm->grammars; m; m = m->next) { 01275 if (m->active) { 01276 jlog("STAT: execute 2nd pass limiting words for gram #%d\n", m->id); 01277 wchmm_fbs(r->am->mfcc->param, r, m->cate_begin, m->dfa->term_num); 01278 if (r->result.status == J_RESULT_STATUS_SUCCESS) { 01279 has_success = TRUE; 01280 } 01281 } 01282 } 01283 r->result.status = (has_success == TRUE) ? J_RESULT_STATUS_SUCCESS : J_RESULT_STATUS_FAIL; 01284 } else { 01285 /* only the best among all grammar will be output */ 01286 wchmm_fbs(r->am->mfcc->param, r, 0, r->lm->dfa->term_num); 01287 } 01288 } 01289 } 01290 01291 /* do forced alignment if needed */ 01292 for(r=recog->process_list;r;r=r->next) { 01293 if (!r->live) continue; 01294 /* if search failed on 2nd pass, skip this */ 01295 if (r->result.status < 0) continue; 01296 /* do needed alignment */ 01297 do_alignment_all(r, r->am->mfcc->param); 01298 } 01299 01300 /* output result */ 01301 callback_exec(CALLBACK_RESULT, recog); 01302 #ifdef ENABLE_PLUGIN 01303 plugin_exec_process_result(recog); 01304 #endif 01305 /* output graph */ 01306 /* r->result.wg == NULL should be skipped inside the callback */ 01307 ok_p = FALSE; 01308 for(r=recog->process_list;r;r=r->next) { 01309 if (!r->live) continue; 01310 if (r->config->compute_only_1pass) continue; 01311 if (r->result.status < 0) continue; 01312 if (r->config->graph.lattice) ok_p = TRUE; 01313 } 01314 if (ok_p) callback_exec(CALLBACK_RESULT_GRAPH, recog); 01315 /* output confnet */ 01316 /* r->result.confnet == NULL should be skipped inside the callback */ 01317 ok_p = FALSE; 01318 for(r=recog->process_list;r;r=r->next) { 01319 if (!r->live) continue; 01320 if (r->config->compute_only_1pass) continue; 01321 if (r->result.status < 0) continue; 01322 if (r->config->graph.confnet) ok_p = TRUE; 01323 } 01324 if (ok_p) callback_exec(CALLBACK_RESULT_CONFNET, recog); 01325 01326 /* clear work area for output */ 01327 for(r=recog->process_list;r;r=r->next) { 01328 if (!r->live) continue; 01329 clear_result(r); 01330 } 01331 01332 /* output end of 2nd pass */ 01333 if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_END, recog); 01334 01335 #ifdef DEBUG_VTLN_ALPHA_TEST 01336 if (r->am->mfcc->para->vtln_alpha == 1.0) { 01337 /* if vtln parameter remains default, search for VTLN parameter */ 01338 vtln_alpha(recog, r); 01339 } 01340 #endif 01341 01342 end_recog: 01343 /**********************/ 01344 /* end of recognition */ 01345 /**********************/ 01346 01347 /* update CMN info for next input (in case of realtime wave input) */ 01348 if (jconf->input.type == INPUT_WAVEFORM && jconf->decodeopt.realtime_flag) { 01349 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01350 if (mfcc->param->samplenum > 0) { 01351 RealTimeCMNUpdate(mfcc, recog); 01352 } 01353 } 01354 } 01355 01356 process_segment_last = recog->process_segment; 01357 if (jconf->decodeopt.segment) { /* sp-segment mode */ 01358 /* param is now shrinked to hold only the processed input, and */ 01359 /* the rests are holded in (newly allocated) "rest_param" */ 01360 /* if this is the last segment, rest_param is NULL */ 01361 /* assume all segmentation are synchronized */ 01362 recog->process_segment = FALSE; 01363 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01364 if (mfcc->rest_param != NULL) { 01365 /* process the rest parameters in the next loop */ 01366 recog->process_segment = TRUE; 01367 free_param(mfcc->param); 01368 mfcc->param = mfcc->rest_param; 01369 mfcc->rest_param = NULL; 01370 } 01371 } 01372 } 01373 01374 /* callback of recognition end */ 01375 if (jconf->decodeopt.segment) { 01376 #ifdef BACKEND_VAD 01377 if (recog->triggered) callback_exec(CALLBACK_EVENT_SEGMENT_END, recog); 01378 if (process_segment_last && !recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); 01379 #else 01380 callback_exec(CALLBACK_EVENT_SEGMENT_END, recog); 01381 if (!recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); 01382 #endif 01383 } else { 01384 callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); 01385 } 01386 01387 01388 if (verbose_flag) jlog("\n"); 01389 jlog_flush(); 01390 01391 if (jconf->decodeopt.segment) { /* sp-segment mode */ 01392 if (recog->process_segment == TRUE) { 01393 if (verbose_flag) jlog("STAT: <<<restart the rest>>>\n\n"); 01394 } else { 01395 /* input has reached end of stream, terminate program */ 01396 if (ret <= 0 && ret != -2) break; 01397 } 01398 } else { /* not sp-segment mode */ 01399 /* input has reached end of stream, terminate program */ 01400 if (ret <= 0 && ret != -2) break; 01401 } 01402 01403 /* recognition continues for next (silence-aparted) segment */ 01404 01405 } /* END OF STREAM LOOP */ 01406 01407 /* close the stream */ 01408 if (jconf->input.type == INPUT_WAVEFORM) { 01409 if (adin_end(recog->adin) == FALSE) return -1; 01410 } 01411 if (jconf->input.speech_input == SP_MFCMODULE) { 01412 if (mfc_module_end(recog->mfcclist) == FALSE) return -1; 01413 } 01414 01415 /* return to the opening of input stream */ 01416 01417 return(0); 01418 01419 } 01420 01465 int 01466 j_recognize_stream(Recog *recog) 01467 { 01468 int ret; 01469 01470 do { 01471 01472 ret = j_recognize_stream_core(recog); 01473 01474 switch(ret) { 01475 case 1: /* paused by a callback (stream will continue) */ 01476 /* call pause event callbacks */ 01477 callback_exec(CALLBACK_EVENT_PAUSE, recog); 01478 /* call pause functions */ 01479 /* block until all pause functions exits */ 01480 if (! callback_exist(recog, CALLBACK_PAUSE_FUNCTION)) { 01481 jlog("WARNING: pause requested but no pause function specified\n"); 01482 jlog("WARNING: engine will resume now immediately\n"); 01483 } 01484 callback_exec(CALLBACK_PAUSE_FUNCTION, recog); 01485 /* after here, recognition will restart for the rest input */ 01486 /* call resume event callbacks */ 01487 callback_exec(CALLBACK_EVENT_RESUME, recog); 01488 break; 01489 case 0: /* end of stream */ 01490 /* go on to the next input */ 01491 break; 01492 case -1: /* error */ 01493 jlog("ERROR: an error occured while recognition, terminate stream\n"); 01494 return -1; 01495 } 01496 } while (ret == 1); /* loop when paused by callback */ 01497 01498 return 0; 01499 } 01500 01501 /* end of file */