Julius: libjulius/src/recogmain.c ソースファイル

Julius 4.1.5
00001 
00019 /*
00020  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00021  * Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan
00022  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00023  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00024  * All rights reserved
00025  */
00161 #define GLOBAL_VARIABLE_DEFINE  ///< Actually make global vars in global.h
00162 #include <julius/julius.h>
00163 #include <signal.h>
00164 #if defined(_WIN32) && !defined(__CYGWIN32__)
00165 #include <mbctype.h>
00166 #include <mbstring.h>
00167 #endif
00168 
00169 /* ---------- utility functions -----------------------------------------*/
00170 #ifdef REPORT_MEMORY_USAGE
00171 
00181 static void
00182 print_mem()
00183 {
00184   char buf[200];
00185   sprintf(buf,"ps -o vsz,rss -p %d",getpid());
00186   system(buf);
00187   fflush(stdout);
00188   fflush(stderr);
00189 }
00190 #endif
00191           
00192 
00209 SentenceAlign *
00210 result_align_new()
00211 {
00212   SentenceAlign *new;
00213   new = (SentenceAlign *)mymalloc(sizeof(SentenceAlign));
00214   new->w = NULL;
00215   new->ph = NULL;
00216   new->loc = NULL;
00217   new->begin_frame = NULL;
00218   new->end_frame = NULL;
00219   new->avgscore = NULL;
00220   new->is_iwsp = NULL;
00221   new->next = NULL;
00222   return new;
00223 }
00224 
00241 void
00242 result_align_free(SentenceAlign *a)
00243 {
00244   if (a->w) free(a->w);
00245   if (a->ph) free(a->ph);
00246   if (a->loc) free(a->loc);
00247   if (a->begin_frame) free(a->begin_frame);
00248   if (a->end_frame) free(a->end_frame);
00249   if (a->avgscore) free(a->avgscore);
00250   if (a->is_iwsp) free(a->is_iwsp);
00251   free(a);
00252 }
00253 
00269 void
00270 result_sentence_malloc(RecogProcess *r, int num)
00271 {
00272   int i;
00273   r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num);
00274   for(i=0;i<num;i++) r->result.sent[i].align = NULL;
00275   r->result.sentnum = 0;
00276 }
00277 
00291 void
00292 result_sentence_free(RecogProcess *r)
00293 {  
00294   int i;
00295   SentenceAlign *a, *atmp;
00296   if (r->result.sent) {
00297     for(i=0;i<r->result.sentnum;i++) {
00298       a = r->result.sent[i].align;
00299       while(a) {
00300         atmp = a->next;
00301         result_align_free(a);
00302         a = atmp;
00303       }
00304     }
00305     free(r->result.sent);
00306     r->result.sent = NULL;
00307   }
00308 }
00309 
00323 void
00324 clear_result(RecogProcess *r)
00325 {
00326 #ifdef WORD_GRAPH
00327   /* clear 1st pass word graph output */
00328   wordgraph_clean(&(r->result.wg1));
00329 #endif
00330 
00331   if (r->lmvar == LM_DFA_WORD) {
00332     if (r->result.status == J_RESULT_STATUS_SUCCESS) {
00333       /* clear word recog result of first pass as in final result */
00334       free(r->result.sent);
00335     }
00336   } else {
00337     if (r->graphout) {
00338       if (r->config->graph.confnet) {
00339         /* free confusion network clusters */
00340         cn_free_all(&(r->result.confnet));
00341       } else if (r->config->graph.lattice) {
00342       }
00343       /* clear all wordgraph */
00344       wordgraph_clean(&(r->result.wg));
00345     }
00346     result_sentence_free(r);
00347   }
00348 }
00349 
00350 /* --------------------- speech buffering ------------------ */
00351 
00384 int
00385 adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog)
00386 {
00387   if (recog->speechlen == 0) {          /* first part of a segment */
00388     if (!recog->process_active) {
00389       return(1);
00390     }
00391   }
00392 
00393   if (recog->speechlen + len > recog->speechalloclen) {
00394     while (recog->speechlen + len > recog->speechalloclen) {
00395       recog->speechalloclen += MAX_SPEECH_ALLOC_STEP;
00396     }
00397     if (recog->speech == NULL) {
00398       recog->speech = (SP16 *)mymalloc(sizeof(SP16) * recog->speechalloclen);
00399     } else {
00400       if (debug2_flag) {
00401         jlog("STAT: expanding recog->speech to %d samples\n", recog->speechalloclen);
00402       }
00403       recog->speech = (SP16 *)myrealloc(recog->speech, sizeof(SP16) * recog->speechalloclen);
00404     }
00405   }
00406 
00407   /* store now[0..len] to recog->speech[recog->speechlen] */
00408   memcpy(&(recog->speech[recog->speechlen]), now, len * sizeof(SP16));
00409   recog->speechlen += len;
00410   return(0);                    /* tell adin_go to continue reading */
00411 }
00412 
00413 
00414 /* --------------------- adin check callback --------------- */
00442 static int
00443 callback_check_in_adin(Recog *recog)
00444 {
00445   /* module: check command and terminate recording when requested */
00446   callback_exec(CALLBACK_POLL, recog);
00447   /* With audio input via adinnet, TERMINATE command will issue terminate
00448      command to the adinnet client.  The client then stops recording
00449      immediately and return end-of-segment ack.  Then it will cause this
00450      process to stop recognition as normal.  So we need not to
00451      perform immediate termination at this callback, but just ignore the
00452      results in the main.c.  */
00453 #if 1
00454   if (recog->process_want_terminate) { /* TERMINATE ... force termination */
00455     return(-2);
00456   }
00457   if (recog->process_want_reload) {
00458     return(-1);
00459   }
00460 #else
00461   if (recog->process_want_terminate /* TERMINATE ... force termination */
00462       && recog->jconf->input.speech_input != SP_ADINNET) {
00463     return(-2);
00464   }
00465   if (recog->process_want_reload) {
00466     return(-1);
00467   }
00468 #endif
00469   return(0);
00470 }
00471 
00472 /*********************/
00473 /* open input stream */
00474 /*********************/
00492 int
00493 j_open_stream(Recog *recog, char *file_or_dev_name)
00494 {
00495   Jconf *jconf;
00496   char *p;
00497 
00498   jconf = recog->jconf;
00499 
00500   if (jconf->input.type == INPUT_WAVEFORM) {
00501     /* begin A/D input */
00502     if (adin_begin(recog->adin, file_or_dev_name) == FALSE) {
00503       return -2;
00504     }
00505     /* create A/D-in thread here */
00506 #ifdef HAVE_PTHREAD
00507     if (recog->adin->enable_thread && ! recog->adin->input_side_segment) {
00508       if (adin_thread_create(recog) == FALSE) {
00509         return -2;
00510       }
00511     }
00512 #endif
00513     /* when using adin func, input name should be obtained when called */
00514   } else {
00515     switch(jconf->input.speech_input) {
00516     case SP_MFCMODULE:
00517       param_init_content(recog->mfcclist->param);
00518       if (mfc_module_begin(recog->mfcclist) == FALSE) return -2;
00519       /* when using mfc module func, input name should be obtained when called */
00520       break;
00521     case SP_MFCFILE:
00522       /* read parameter file */
00523       param_init_content(recog->mfcclist->param);
00524       if (rdparam(file_or_dev_name, recog->mfcclist->param) == FALSE) {
00525         jlog("ERROR: error in reading parameter file: %s\n", file_or_dev_name);
00526         return -1;
00527       }
00528       /* check and strip invalid frames */
00529       if (jconf->preprocess.strip_zero_sample) {
00530         param_strip_zero(recog->mfcclist->param);
00531       }
00532       /* output frame length */
00533       callback_exec(CALLBACK_STATUS_PARAM, recog);
00534       /* store the input filename here */
00535       strncpy(recog->adin->current_input_name, file_or_dev_name, MAXPATHLEN);
00536       break;
00537     default:
00538       jlog("ERROR: j_open_stream: none of SP_MFC_*??\n");
00539       return -1;
00540     }
00541   }
00542 
00543   if (jconf->input.speech_input != SP_MFCFILE) {
00544     /* store current input name using input source specific function */
00545     p = j_get_current_filename(recog);
00546     if (p) {
00547       strncpy(recog->adin->current_input_name, p, MAXPATHLEN);
00548     } else {
00549       recog->adin->current_input_name[0] = '\0';
00550     }
00551   }
00552       
00553   return 0;
00554 
00555 }
00556 
00574 int
00575 j_close_stream(Recog *recog)
00576 {
00577   Jconf *jconf;
00578 
00579   jconf = recog->jconf;
00580 
00581   if (jconf->input.type == INPUT_WAVEFORM) {
00582 #ifdef HAVE_PTHREAD
00583     /* close A/D-in thread here */
00584     if (! recog->adin->input_side_segment) {
00585       if (recog->adin->enable_thread) {
00586         if (adin_thread_cancel(recog) == FALSE) {
00587           return -2;
00588         }
00589       } else {
00590         recog->adin->end_of_stream = TRUE;
00591       }
00592     }
00593 #else
00594     if (! recog->adin->input_side_segment) {
00595       recog->adin->end_of_stream = TRUE;
00596     }
00597 #endif
00598     /* end A/D input */
00599     if (adin_end(recog->adin) == FALSE) {
00600       return -2;
00601     }
00602   } else {
00603     switch(jconf->input.speech_input) {
00604     case SP_MFCMODULE:
00605       if (mfc_module_end(recog->mfcclist) == FALSE) return -2;
00606       break;
00607     case SP_MFCFILE:
00608       /* nothing to do */
00609       break;
00610     default:
00611       jlog("ERROR: j_close_stream: none of SP_MFC_*??\n");
00612       return -1;
00613     }
00614   }
00615       
00616   return 0;
00617 
00618 }
00619 
00620 /**********************************************************************/
00621 /**********************************************************************/
00622 /**********************************************************************/
00623 
00636 static void
00637 result_error(Recog *recog, int status)
00638 {
00639   MFCCCalc *mfcc;
00640   RecogProcess *r;
00641   boolean ok_p;
00642 
00643   for(r=recog->process_list;r;r=r->next) r->result.status = status;
00644 
00645   ok_p = FALSE;
00646   for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00647     if (mfcc->f > 0) {
00648       ok_p = TRUE;
00649       break;
00650     }
00651   }
00652   if (ok_p) {                   /* had some input */
00653     /* output as rejected */
00654     callback_exec(CALLBACK_RESULT, recog);
00655 #ifdef ENABLE_PLUGIN
00656     plugin_exec_process_result(recog);
00657 #endif
00658   }
00659 }
00660 
00696 static int
00697 j_recognize_stream_core(Recog *recog)
00698 {
00699   Jconf *jconf;
00700   int ret;
00701   float seclen, mseclen;
00702   RecogProcess *r;
00703   MFCCCalc *mfcc;
00704   PROCESS_AM *am;
00705   PROCESS_LM *lm;
00706   boolean ok_p;
00707   boolean process_segment_last;
00708   boolean on_the_fly;
00709   boolean pass2_p;
00710 
00711   jconf = recog->jconf;
00712 
00713   /* determine whether on-the-fly decoding should be done */
00714   on_the_fly = FALSE;
00715   switch(jconf->input.type) {
00716   case INPUT_VECTOR:
00717     switch(jconf->input.speech_input) {
00718     case SP_MFCFILE: 
00719       on_the_fly = FALSE;
00720       break;
00721     case SP_MFCMODULE:
00722       on_the_fly = TRUE;
00723       break;
00724     }
00725     break;
00726   case INPUT_WAVEFORM:
00727     if (jconf->decodeopt.realtime_flag) {
00728       on_the_fly = TRUE;
00729     } else {
00730       on_the_fly = FALSE;
00731     }
00732     break;
00733   }
00734 
00735   if (jconf->input.type == INPUT_WAVEFORM || jconf->input.speech_input == SP_MFCMODULE) {
00736     for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00737       param_init_content(mfcc->param);
00738     }
00739   }
00740 
00741   /* if no process instance exist, start with terminated */
00742   if (recog->process_list == NULL) {
00743     jlog("STAT: no recog process, engine inactive\n");
00744     j_request_pause(recog);
00745   }
00746 
00747   /* update initial recognition process status */
00748   for(r=recog->process_list;r;r=r->next) {
00749     if (r->active > 0) {
00750       r->live = TRUE;
00751     } else if (r->active < 0) {
00752       r->live = FALSE;
00753     }
00754     r->active = 0;
00755   }
00756 
00757   /******************************************************************/
00758   /* do recognition for each incoming segment from the input stream */
00759   /******************************************************************/
00760   while (1) {
00761     
00762   start_recog:
00763 
00764     /*************************************/
00765     /* Update recognition process status */
00766     /*************************************/
00767     for(r=recog->process_list;r;r=r->next) {
00768       if (r->active > 0) {
00769         r->live = TRUE;
00770         jlog("STAT: SR%02d %s now active\n", r->config->id, r->config->name);
00771       } else if (r->active < 0) {
00772         r->live = FALSE;
00773         jlog("STAT: SR%02d %s now inactive\n", r->config->id, r->config->name);
00774       }
00775       r->active = 0;
00776     }
00777     if (debug2_flag) {
00778       for(r=recog->process_list;r;r=r->next) {
00779         jlog("DEBUG: %s: SR%02d %s\n", r->live ? "live" : "dead", r->config->id, r->config->name);
00780       }
00781     }
00782     /* check if any process is live */
00783     if (recog->process_active) {
00784       ok_p = FALSE;
00785       for(r=recog->process_list;r;r=r->next) {
00786         if (r->live) ok_p = TRUE;
00787       }
00788       if (!ok_p) {              /* no process is alive */
00789         /* make whole process as inactive */
00790         jlog("STAT: all recog process inactive, pause engine now\n");
00791         j_request_pause(recog);
00792       }
00793     }
00794 
00795     /* Check whether process status was changed while in the last run */
00796     if (recog->process_online != recog->process_active) {
00797       recog->process_online = recog->process_active;
00798       if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00799       else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog);
00800     }
00801     /* execute poll callback */
00802     if (recog->process_active) {
00803       callback_exec(CALLBACK_POLL, recog);
00804     }
00805     /* reset reload flag here */
00806     j_reset_reload(recog);
00807 
00808     if (!recog->process_active) {
00809       /* now sleeping, return */
00810       /* in the next call, we will resume from here */
00811       return 1;
00812     }
00813     /* update process status */
00814     if (recog->process_online != recog->process_active) {
00815       recog->process_online = recog->process_active;
00816       if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00817       else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog);
00818     }
00819 
00820     /*********************************************************/
00821     /* check for grammar to change, and rebuild if necessary */
00822     /*********************************************************/
00823     for(lm=recog->lmlist;lm;lm=lm->next) {
00824       if (lm->lmtype == LM_DFA) {
00825         multigram_update(lm); /* some modification occured if return TRUE*/
00826       }
00827     }
00828     for(r=recog->process_list;r;r=r->next) {
00829       if (!r->live) continue;
00830       if (r->lmtype == LM_DFA && r->lm->global_modified) {
00831         multigram_build(r);
00832       }
00833     }
00834     for(lm=recog->lmlist;lm;lm=lm->next) {
00835       if (lm->lmtype == LM_DFA) lm->global_modified = FALSE;
00836     }
00837 
00838     ok_p = FALSE;
00839     for(r=recog->process_list;r;r=r->next) {
00840       if (!r->live) continue;
00841       if (r->lmtype == LM_DFA) {
00842         if (r->lm->winfo == NULL ||
00843             (r->lmvar == LM_DFA_GRAMMAR && r->lm->dfa == NULL)) {
00844           /* make this instance inactive */
00845           r->active = -1;
00846           ok_p = TRUE;
00847         }
00848       }
00849     }
00850     if (ok_p) {                 /* at least one instance has no grammar */
00851       goto start_recog;
00852     }
00853 
00854 
00855     /******************/
00856     /* start 1st pass */
00857     /******************/
00858     if (on_the_fly) {
00859 
00860       /********************************************/
00861       /* REALTIME ON-THE-FLY DECODING OF 1ST-PASS */
00862       /********************************************/
00863       /* store, analysis and search in a pipeline  */
00864       /* main function is RealTimePipeLine() at realtime-1stpass.c, and
00865          it will be periodically called for each incoming input segment
00866          from the AD-in function adin_go().  RealTimePipeLine() will be
00867          called as a callback function from adin_go() */
00868       /* after this part, directly jump to the beginning of the 2nd pass */
00869       
00870       if (recog->process_segment) {
00871         /*****************************************************************/
00872         /* short-pause segmentation: process last remaining frames first */
00873         /*****************************************************************/
00874         /* last was segmented by short pause */
00875         /* the margin segment in the last input will be re-processed first,
00876            and then the speech input will be processed */
00877         /* process the last remaining parameters */
00878         ret = RealTimeResume(recog);
00879         if (ret < 0) {          /* error end in the margin */
00880           jlog("ERROR: failed to process last remaining samples on RealTimeResume\n"); /* exit now! */
00881           return -1;
00882         }
00883         if (ret != 1) { /* if segmented again in the margin, not process the rest */
00884           /* last parameters has been processed, so continue with the
00885              current input as normal */
00886           /* process the incoming input */
00887           if (jconf->input.type == INPUT_WAVEFORM) {
00888             /* get speech and process it on real-time */
00889             ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog);
00890           } else {
00891             /* get feature vector and process it */
00892             ret = mfcc_go(recog, callback_check_in_adin);
00893           }
00894           if (ret < 0) {                /* error end in adin_go */
00895             if (ret == -2 || recog->process_want_terminate) {
00896               /* terminated by callback */
00897               RealTimeTerminate(recog);
00898               /* reset param */
00899               for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00900                 param_init_content(mfcc->param);
00901               }
00902               /* execute callback at end of pass1 */
00903               if (recog->triggered) {
00904                 callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00905                 /* output result terminate */
00906                 result_error(recog, J_RESULT_STATUS_TERMINATE);
00907               }
00908               goto end_recog; /* cancel this recognition */
00909             }
00910             jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n");          /* exit now! */
00911             return(-1);
00912           }
00913         }
00914         
00915       } else {
00916 
00917         /***********************************************************/
00918         /* last was not segmented, process the new incoming input  */
00919         /***********************************************************/
00920         /* end of this input will be determined by either end of stream
00921            (in case of file input), or silence detection by adin_go(), or
00922            'TERMINATE' command from module (if module mode) */
00923         /* prepare work area for on-the-fly processing */
00924         if (RealTimePipeLinePrepare(recog) == FALSE) {
00925           jlog("ERROR: failed to prepare for on-the-fly 1st pass decoding\n");
00926           return (-1);
00927         }
00928         /* process the incoming input */
00929         if (jconf->input.type == INPUT_WAVEFORM) {
00930           /* get speech and process it on real-time */
00931           ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog);
00932         } else {
00933           /* get feature vector and process it */
00934           ret = mfcc_go(recog, callback_check_in_adin);
00935         }
00936         
00937         if (ret < 0) {          /* error end in adin_go */
00938           if (ret == -2 || recog->process_want_terminate) {     
00939             /* terminated by callback */
00940             RealTimeTerminate(recog);
00941             /* reset param */
00942             for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00943               param_init_content(mfcc->param);
00944             }
00945             /* execute callback at end of pass1 */
00946             if (recog->triggered) {
00947               callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00948               /* output result terminate */
00949               result_error(recog, J_RESULT_STATUS_TERMINATE);
00950             }
00951             goto end_recog;
00952           }
00953           jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n");          /* exit now! */
00954           return(-1);
00955         }
00956       }
00957       /******************************************************************/
00958       /* speech stream has been processed on-the-fly, and 1st pass ends */
00959       /******************************************************************/
00960       /* last procedure of 1st-pass */
00961       if (RealTimeParam(recog) == FALSE) {
00962         jlog("ERROR: fatal error occured, program terminates now\n");
00963         return -1;
00964       }
00965       
00966 #ifdef BACKEND_VAD
00967       /* if not triggered, skip this segment */
00968       if (recog->jconf->decodeopt.segment && ! recog->triggered) {
00969         goto end_recog;
00970       }
00971 #endif
00972 
00973       /* execute callback for 1st pass result */
00974       /* result.status <0 must be skipped inside callback */
00975       callback_exec(CALLBACK_RESULT_PASS1, recog);
00976 #ifdef WORD_GRAPH
00977       /* result.wg1 == NULL should be skipped inside callback */
00978       callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);
00979 #endif
00980       /* execute callback at end of pass1 */
00981       callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00982       /* output frame length */
00983       callback_exec(CALLBACK_STATUS_PARAM, recog);
00984       /* if terminate signal has been received, discard this input */
00985       if (recog->process_want_terminate) {
00986         result_error(recog, J_RESULT_STATUS_TERMINATE);
00987         goto end_recog;
00988       }
00989 
00990       /* END OF ON-THE-FLY INPUT AND DECODING OF 1ST PASS */
00991 
00992     } else {
00993 
00994       /******************/
00995       /* buffered input */
00996       /******************/
00997 
00998       if (jconf->input.type == INPUT_VECTOR) {
00999         /***********************/
01000         /* feature vector input */
01001         /************************/
01002         if (jconf->input.speech_input == SP_MFCFILE) {
01003           /************************/
01004           /* parameter file input */
01005           /************************/
01006           /* parameter type check --- compare the type to that of HMM,
01007              and adjust them if necessary */
01008           if (jconf->input.paramtype_check_flag) {
01009             for(am=recog->amlist;am;am=am->next) {
01010               /* return param itself or new malloced param */
01011               if (param_check_and_adjust(am->hmminfo, am->mfcc->param, verbose_flag) == -1) {   /* failed */
01012                 
01013                 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01014                   param_init_content(mfcc->param);
01015                 }
01016                 /* tell failure */
01017                 result_error(recog, J_RESULT_STATUS_FAIL);
01018                 goto end_recog;
01019               }
01020             }
01021           }
01022           /* whole input is already read, so set input status to end of stream */
01023           /* and jump to the start point of 1st pass */
01024           ret = 0;
01025         }
01026       } else {
01027         /*************************/
01028         /* buffered speech input */
01029         /*************************/
01030         if (!recog->process_segment) { /* no segment left */
01031 
01032           /****************************************/
01033           /* store raw speech samples to speech[] */
01034           /****************************************/
01035           recog->speechlen = 0;
01036           for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01037             param_init_content(mfcc->param);
01038           }
01039           /* tell module to start recording */
01040           /* the "adin_cut_callback_store_buffer" simply stores
01041              the input speech to a buffer "speech[]" */
01042           /* end of this input will be determined by either end of stream
01043              (in case of file input), or silence detection by adin_go(), or
01044              'TERMINATE' command from module (if module mode) */
01045           ret = adin_go(adin_cut_callback_store_buffer, callback_check_in_adin, recog);
01046           if (ret < 0) {                /* error end in adin_go */
01047             if (ret == -2 || recog->process_want_terminate) {
01048               /* terminated by module */
01049               /* output fail */
01050               result_error(recog, J_RESULT_STATUS_TERMINATE);
01051               goto end_recog;
01052             }
01053             jlog("ERROR: an error occured while recording input\n");
01054             return -1;
01055           }
01056           
01057           /* output recorded length */
01058           seclen = (float)recog->speechlen / (float)jconf->input.sfreq;
01059           jlog("STAT: %d samples (%.2f sec.)\n", recog->speechlen, seclen);
01060           
01061           /* -rejectshort 指定時, 入力が指定時間以下であれば
01062              ここで入力を棄却する */
01063           /* when using "-rejectshort", and input was shorter than
01064              specified, reject the input here */
01065           if (jconf->reject.rejectshortlen > 0) {
01066             if (seclen * 1000.0 < jconf->reject.rejectshortlen) {
01067               result_error(recog, J_RESULT_STATUS_REJECT_SHORT);
01068               goto end_recog;
01069             }
01070           }
01071         
01072           /**********************************************/
01073           /* acoustic analysis and encoding of speech[] */
01074           /**********************************************/
01075           jlog("STAT: ### speech analysis (waveform -> MFCC)\n");
01076           /* CMN will be computed for the whole buffered input */
01077           if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) {
01078             /* error end, end stream */
01079             ret = -1;
01080             /* tell failure */
01081             result_error(recog, J_RESULT_STATUS_FAIL);
01082             goto end_recog;
01083           }
01084           
01085           /* if terminate signal has been received, cancel this input */
01086           if (recog->process_want_terminate) {
01087             result_error(recog, J_RESULT_STATUS_TERMINATE);
01088             goto end_recog;
01089           }
01090           
01091           /* output frame length */
01092           callback_exec(CALLBACK_STATUS_PARAM, recog);
01093         }
01094       }
01095 
01096 #ifdef ENABLE_PLUGIN
01097       /* call post-process plugin if exist */
01098       plugin_exec_vector_postprocess_all(recog->mfcclist->param);
01099 #endif
01100 
01101       /******************************************************/
01102       /* 1st-pass --- backward search to compute heuristics */
01103       /******************************************************/
01104       if (!jconf->decodeopt.realtime_flag) {
01105         /* prepare for outprob cache for each HMM state and time frame */
01106         /* assume all MFCCCalc has params of the same sample num */
01107         for(am=recog->amlist;am;am=am->next) {
01108           outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
01109         }
01110       }
01111       
01112       /* if terminate signal has been received, cancel this input */
01113       if (recog->process_want_terminate) {
01114         result_error(recog, J_RESULT_STATUS_TERMINATE);
01115         goto end_recog;
01116       }
01117     
01118       /* execute computation of left-to-right backtrellis */
01119       if (get_back_trellis(recog) == FALSE) {
01120         jlog("ERROR: fatal error occured, program terminates now\n");
01121         return -1;
01122       }
01123 #ifdef BACKEND_VAD
01124       /* if not triggered, skip this segment */
01125       if (recog->jconf->decodeopt.segment && ! recog->triggered) {
01126         goto end_recog;
01127       }
01128 #endif
01129       
01130       /* execute callback for 1st pass result */
01131       /* result.status <0 must be skipped inside callback */
01132       callback_exec(CALLBACK_RESULT_PASS1, recog);
01133 #ifdef WORD_GRAPH
01134       /* result.wg1 == NULL should be skipped inside callback */
01135       callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);
01136 #endif
01137       
01138       /* execute callback at end of pass1 */
01139       if (recog->triggered) {
01140         callback_exec(CALLBACK_EVENT_PASS1_END, recog);
01141       }
01142 
01143       /* END OF BUFFERED 1ST PASS */
01144 
01145     }
01146 
01147     /**********************************/
01148     /* end processing of the 1st-pass */
01149     /**********************************/
01150     /* on-the-fly 1st pass processing will join here */
01151     
01152     /* -rejectshort 指定時, 入力が指定時間以下であれば探索失敗として */
01153     /* 第２パスを実行せずにここで終了する */
01154     /* when using "-rejectshort", and input was shorter than the specified
01155        length, terminate search here and output recognition failure */
01156     if (jconf->reject.rejectshortlen > 0) {
01157       mseclen = (float)recog->mfcclist->param->samplenum * (float)jconf->input.period * (float)jconf->input.frameshift / 10000.0;
01158       if (mseclen < jconf->reject.rejectshortlen) {
01159         result_error(recog, J_RESULT_STATUS_REJECT_SHORT);
01160         goto end_recog;
01161       }
01162     }
01163 #ifdef POWER_REJECT
01164     if (power_reject(recog)) {
01165       result_error(recog, J_RESULT_STATUS_REJECT_POWER);
01166       goto end_recog;
01167     }
01168 #endif
01169     
01170     /* if terminate signal has been received, cancel this input */
01171     if (recog->process_want_terminate) {
01172       result_error(recog, J_RESULT_STATUS_TERMINATE);
01173       goto end_recog;
01174     }
01175     
01176     /* if GMM is specified and result are to be rejected, terminate search here */
01177     if (jconf->reject.gmm_reject_cmn_string != NULL) {
01178       if (! gmm_valid_input(recog)) {
01179         result_error(recog, J_RESULT_STATUS_REJECT_GMM);
01180         goto end_recog;
01181       }
01182     }
01183 
01184     /* for instances with "-1pass", copy 1st pass result as final */
01185     /* execute stack-decoding search */
01186     /* they will be skipepd in the next pass */
01187     for(r=recog->process_list;r;r=r->next) {
01188       if (!r->live) continue;
01189       /* skip if 1st pass was failed */
01190       if (r->result.status < 0) continue;
01191       /* already stored on word recognition, so skip this */
01192       if (r->lmvar == LM_DFA_WORD) continue;
01193       if (r->config->compute_only_1pass) {
01194         if (verbose_flag) {
01195           jlog("%02d %s: \"-1pass\" specified, output 1st pass result as a final result\n", r->config->id, r->config->name);
01196         }
01197         /* prepare result storage */
01198         result_sentence_malloc(r, 1);
01199         /* finalize result when no hypothesis was obtained */
01200         pass2_finalize_on_no_result(r, TRUE);
01201       }
01202     }
01203 
01204     /***********************************************/
01205     /* 2nd-pass --- forward search with heuristics */
01206     /***********************************************/
01207     pass2_p = FALSE;
01208     for(r=recog->process_list;r;r=r->next) {
01209       if (!r->live) continue;
01210       /* if [-1pass] is specified, skip 2nd pass */
01211       if (r->config->compute_only_1pass) continue;
01212       /* if search already failed on 1st pass, skip 2nd pass */
01213       if (r->result.status < 0) continue;
01214       pass2_p = TRUE;
01215     }
01216     if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_BEGIN, recog);
01217 
01218 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE)    
01219     /* adjust trellis score not to contain outprob of the last frames */
01220     for(r=recog->process_list;r;r=r->next) {
01221       if (!r->live) continue;
01222       /* if [-1pass] is specified, skip 2nd pass */
01223       if (r->config->compute_only_1pass) continue;
01224       /* if search already failed on 1st pass, skip 2nd pass */
01225       if (r->result.status < 0) continue;
01226       if (! r->am->hmminfo->multipath) {
01227         bt_discount_pescore(r->wchmm, r->backtrellis, r->am->mfcc->param);
01228       }
01229 #ifdef LM_FIX_DOUBLE_SCORING
01230       if (r->lmtype == LM_PROB) {
01231         bt_discount_lm(r->backtrellis);
01232       }
01233 #endif
01234     }
01235 #endif
01236     
01237     /* execute stack-decoding search */
01238     for(r=recog->process_list;r;r=r->next) {
01239       if (!r->live) continue;
01240       /* if [-1pass] is specified, just copy from 1st pass result */
01241       if (r->config->compute_only_1pass) continue;
01242       /* if search already failed on 1st pass, skip 2nd pass */
01243       if (r->result.status < 0) continue;
01244       /* prepare result storage */
01245       if (r->lmtype == LM_DFA && r->config->output.multigramout_flag) {
01246         result_sentence_malloc(r, r->config->output.output_hypo_maxnum * multigram_get_all_num(r->lm));
01247       } else {
01248         result_sentence_malloc(r, r->config->output.output_hypo_maxnum);
01249       }
01250       /* do 2nd pass */
01251       if (r->lmtype == LM_PROB) {
01252         wchmm_fbs(r->am->mfcc->param, r, 0, 0);
01253       } else if (r->lmtype == LM_DFA) {
01254         if (r->config->output.multigramout_flag) {
01255           /* execute 2nd pass multiple times for each grammar sequencially */
01256           /* to output result for each grammar */
01257           MULTIGRAM *m;
01258           boolean has_success = FALSE;
01259           for(m = r->lm->grammars; m; m = m->next) {
01260             if (m->active) {
01261               jlog("STAT: execute 2nd pass limiting words for gram #%d\n", m->id);
01262               wchmm_fbs(r->am->mfcc->param, r, m->cate_begin, m->dfa->term_num);
01263               if (r->result.status == J_RESULT_STATUS_SUCCESS) {
01264                 has_success = TRUE;
01265               }
01266             }
01267           }
01268           r->result.status = (has_success == TRUE) ? J_RESULT_STATUS_SUCCESS : J_RESULT_STATUS_FAIL;
01269         } else {
01270           /* only the best among all grammar will be output */
01271           wchmm_fbs(r->am->mfcc->param, r, 0, r->lm->dfa->term_num);
01272         }
01273       }
01274     }
01275 
01276     /* do forced alignment if needed */
01277     for(r=recog->process_list;r;r=r->next) {
01278       if (!r->live) continue;
01279       /* if search failed on 2nd pass, skip this */
01280       if (r->result.status < 0) continue;
01281       /* do needed alignment */
01282       do_alignment_all(r, r->am->mfcc->param);
01283     }
01284 
01285     /* output result */
01286     callback_exec(CALLBACK_RESULT, recog);
01287 #ifdef ENABLE_PLUGIN
01288     plugin_exec_process_result(recog);
01289 #endif
01290     /* output graph */
01291     /* r->result.wg == NULL should be skipped inside the callback */
01292     ok_p = FALSE;
01293     for(r=recog->process_list;r;r=r->next) {
01294       if (!r->live) continue;
01295       if (r->config->compute_only_1pass) continue;
01296       if (r->result.status < 0) continue;
01297       if (r->config->graph.lattice) ok_p = TRUE;
01298     }
01299     if (ok_p) callback_exec(CALLBACK_RESULT_GRAPH, recog);
01300     /* output confnet */
01301     /* r->result.confnet == NULL should be skipped inside the callback */
01302     ok_p = FALSE;
01303     for(r=recog->process_list;r;r=r->next) {
01304       if (!r->live) continue;
01305       if (r->config->compute_only_1pass) continue;
01306       if (r->result.status < 0) continue;
01307       if (r->config->graph.confnet) ok_p = TRUE;
01308     }
01309     if (ok_p) callback_exec(CALLBACK_RESULT_CONFNET, recog);
01310 
01311     /* clear work area for output */
01312     for(r=recog->process_list;r;r=r->next) {
01313       if (!r->live) continue;
01314       clear_result(r);
01315     }
01316     
01317     /* output end of 2nd pass */
01318     if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_END, recog);
01319 
01320 #ifdef DEBUG_VTLN_ALPHA_TEST
01321     if (r->am->mfcc->para->vtln_alpha == 1.0) {
01322       /* if vtln parameter remains default, search for VTLN parameter */
01323       vtln_alpha(recog, r);
01324     }
01325 #endif
01326 
01327   end_recog:
01328     /**********************/
01329     /* end of recognition */
01330     /**********************/
01331 
01332     /* update CMN info for next input (in case of realtime wave input) */
01333     if (jconf->input.type == INPUT_WAVEFORM && jconf->decodeopt.realtime_flag) {
01334       for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01335         if (mfcc->param->samplenum > 0) {
01336           RealTimeCMNUpdate(mfcc, recog);
01337         }
01338       }
01339     }
01340     
01341     process_segment_last = recog->process_segment;
01342     if (jconf->decodeopt.segment) { /* sp-segment mode */
01343       /* param is now shrinked to hold only the processed input, and */
01344       /* the rests are holded in (newly allocated) "rest_param" */
01345       /* if this is the last segment, rest_param is NULL */
01346       /* assume all segmentation are synchronized */
01347       recog->process_segment = FALSE;
01348       for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01349         if (mfcc->rest_param != NULL) {
01350           /* process the rest parameters in the next loop */
01351           recog->process_segment = TRUE;
01352           free_param(mfcc->param);
01353           mfcc->param = mfcc->rest_param;
01354           mfcc->rest_param = NULL;
01355         }
01356       }
01357     }
01358 
01359     /* callback of recognition end */
01360     if (jconf->decodeopt.segment) {
01361 #ifdef BACKEND_VAD
01362       if (recog->triggered) callback_exec(CALLBACK_EVENT_SEGMENT_END, recog);
01363       if (process_segment_last && !recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01364 #else
01365       callback_exec(CALLBACK_EVENT_SEGMENT_END, recog);
01366       if (!recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01367 #endif
01368     } else {
01369       callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01370     }
01371 
01372 
01373     if (verbose_flag) jlog("\n");
01374     jlog_flush();
01375 
01376     if (jconf->decodeopt.segment) { /* sp-segment mode */
01377       if (recog->process_segment == TRUE) {
01378         if (verbose_flag) jlog("STAT: <<<restart the rest>>>\n\n");
01379       } else {
01380         /* input has reached end of stream, terminate program */
01381         if (ret <= 0 && ret != -2) break;
01382       }
01383     } else {                    /* not sp-segment mode */
01384       /* input has reached end of stream, terminate program */
01385       if (ret <= 0 && ret != -2) break;
01386     }
01387 
01388     /* recognition continues for next (silence-aparted) segment */
01389       
01390   } /* END OF STREAM LOOP */
01391     
01392   /* close the stream */
01393   if (jconf->input.type == INPUT_WAVEFORM) {
01394     if (adin_end(recog->adin) == FALSE) return -1;
01395   }
01396   if (jconf->input.speech_input == SP_MFCMODULE) {
01397     if (mfc_module_end(recog->mfcclist) == FALSE) return -1;
01398   }
01399 
01400   /* return to the opening of input stream */
01401 
01402   return(0);
01403 
01404 }
01405 
01450 int
01451 j_recognize_stream(Recog *recog)
01452 {
01453   int ret;
01454 
01455   do {
01456     
01457     ret = j_recognize_stream_core(recog);
01458 
01459     switch(ret) {
01460     case 1:           /* paused by a callback (stream will continue) */
01461       /* call pause event callbacks */
01462       callback_exec(CALLBACK_EVENT_PAUSE, recog);
01463       /* call pause functions */
01464       /* block until all pause functions exits */
01465       if (! callback_exist(recog, CALLBACK_PAUSE_FUNCTION)) {
01466         jlog("WARNING: pause requested but no pause function specified\n");
01467         jlog("WARNING: engine will resume now immediately\n");
01468       }
01469       callback_exec(CALLBACK_PAUSE_FUNCTION, recog);
01470       /* after here, recognition will restart for the rest input */
01471       /* call resume event callbacks */
01472       callback_exec(CALLBACK_EVENT_RESUME, recog);
01473       break;
01474     case 0:                     /* end of stream */
01475       /* go on to the next input */
01476       break;
01477     case -1:            /* error */
01478       jlog("ERROR: an error occured while recognition, terminate stream\n");
01479       return -1;
01480     }
01481   } while (ret == 1);           /* loop when paused by callback */
01482 
01483   return 0;
01484 }
01485 
01486 /* end of file */