Julius 4.2
|
00001 00046 /* 00047 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00048 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00049 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00050 * All rights reserved 00051 */ 00052 00053 #include <julius/julius.h> 00054 00055 #include <sys/stat.h> 00056 00084 boolean 00085 wav2mfcc(SP16 speech[], int speechlen, Recog *recog) 00086 { 00087 int framenum; 00088 int len; 00089 Value *para; 00090 MFCCCalc *mfcc; 00091 00092 /* calculate frame length from speech length, frame size and frame shift */ 00093 framenum = (int)((speechlen - recog->jconf->input.framesize) / recog->jconf->input.frameshift) + 1; 00094 if (framenum < 1) { 00095 jlog("WARNING: input too short (%d samples), ignored\n", speechlen); 00096 return FALSE; 00097 } 00098 00099 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00100 00101 if (mfcc->frontend.ssload_filename) { 00102 /* setup for spectral subtraction using file */ 00103 if (mfcc->frontend.ssbuf == NULL) { 00104 /* load noise spectrum for spectral subtraction from file (once) */ 00105 if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) { 00106 jlog("ERROR: wav2mfcc: failed to read noise spectrum from file \"%s\"\n", mfcc->frontend.ssload_filename); 00107 return FALSE; 00108 } 00109 } 00110 } 00111 00112 if (mfcc->frontend.sscalc) { 00113 /* compute noise spectrum from head silence for each input */ 00114 len = mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000; 00115 if (len > speechlen) len = speechlen; 00116 #ifdef SSDEBUG 00117 jlog("DEBUG: [%d]\n", len); 00118 #endif 00119 mfcc->frontend.ssbuf = new_SS_calculate(speech, len, &(mfcc->frontend.sslen), mfcc->frontend.mfccwrk_ss, mfcc->para); 00120 } 00121 00122 } 00123 00124 /* compute mfcc from speech file for each mfcc instances */ 00125 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00126 00127 para = mfcc->para; 00128 00129 /* malloc new param */ 00130 param_init_content(mfcc->param); 00131 if (param_alloc(mfcc->param, framenum, para->veclen) == FALSE) { 00132 jlog("ERROR: failed to allocate memory for converted parameter vectors\n"); 00133 return FALSE; 00134 } 00135 00136 if (mfcc->frontend.ssload_filename || mfcc->frontend.sscalc) { 00137 /* make link from mfccs to this buffer */ 00138 mfcc->wrk->ssbuf = mfcc->frontend.ssbuf; 00139 mfcc->wrk->ssbuflen = mfcc->frontend.sslen; 00140 mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha; 00141 mfcc->wrk->ss_floor = mfcc->frontend.ss_floor; 00142 } 00143 00144 /* make MFCC from speech data */ 00145 if (Wav2MFCC(speech, mfcc->param->parvec, para, speechlen, mfcc->wrk) == FALSE) { 00146 jlog("ERROR: failed to compute MFCC from input speech\n"); 00147 if (mfcc->frontend.sscalc) { 00148 free(mfcc->frontend.ssbuf); 00149 mfcc->frontend.ssbuf = NULL; 00150 } 00151 return FALSE; 00152 } 00153 00154 /* set miscellaneous parameters */ 00155 mfcc->param->header.samplenum = framenum; 00156 mfcc->param->header.wshift = para->smp_period * para->frameshift; 00157 mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */ 00158 mfcc->param->header.samptype = F_MFCC; 00159 if (para->delta) mfcc->param->header.samptype |= F_DELTA; 00160 if (para->acc) mfcc->param->header.samptype |= F_ACCL; 00161 if (para->energy) mfcc->param->header.samptype |= F_ENERGY; 00162 if (para->c0) mfcc->param->header.samptype |= F_ZEROTH; 00163 if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP; 00164 if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM; 00165 mfcc->param->veclen = para->veclen; 00166 mfcc->param->samplenum = framenum; 00167 00168 if (mfcc->frontend.sscalc) { 00169 free(mfcc->frontend.ssbuf); 00170 mfcc->frontend.ssbuf = NULL; 00171 } 00172 } 00173 00174 return TRUE; 00175 } 00176 00177 /* end of file */