Julius 4.2
|
00001 00035 /************************************************************************/ 00036 /* mfcc.h */ 00037 /* */ 00038 /* Author : Yuichiro Nakano */ 00039 /************************************************************************/ 00040 00041 #ifndef __MFCC_H__ 00042 #define __MFCC_H__ 00043 00045 #undef MFCC_TABLE_DEBUG 00046 00047 #define CPMAX 500 ///< Maximum number of frames to store ceptral mean for realtime CMN update 00048 #define CPSTEP 5 ///< allocate step of cmean list per sentence 00049 00050 #include <sent/stddefs.h> 00051 #include <sent/htk_defs.h> 00052 #include <sent/htk_param.h> 00053 #include <ctype.h> 00054 00055 #define DEF_SMPPERIOD 625 ///< Default sampling period in 100ns (625 = 16kHz) 00056 #define DEF_FRAMESIZE 400 ///< Default Window size in samples, similar to WINDOWSIZE in HTK (unit is different) 00057 #define DEF_FFTNUM 512 ///< Number of FFT steps 00058 #define DEF_FRAMESHIFT 160 ///< Default frame shift length in samples 00059 #define DEF_PREENPH 0.97 ///< Default pre-emphasis coefficient, corresponds to PREEMCOEF in HTK 00060 #define DEF_MFCCDIM 12 ///< Default number of MFCC dimension, corresponds to NUMCEPS in HTK 00061 #define DEF_CEPLIF 22 ///< Default cepstral Liftering coefficient, corresponds to CEPLIFTER in HTK 00062 #define DEF_FBANK 24 ///< Default number of filterbank channels, corresponds to NUMCHANS in HTK 00063 #define DEF_DELWIN 2 ///< Default delta window size, corresponds to DELTAWINDOW in HTK 00064 #define DEF_ACCWIN 2 ///< Default acceleration window size, corresponds to ACCWINDOW in HTK 00065 #define DEF_SILFLOOR 50.0 ///< Default energy silence floor in dBs, corresponds to SILFLOOR in HTK 00066 #define DEF_ESCALE 1.0 ///< Default scaling coefficient of log energy, corresponds to ESCALE in HTK 00067 00068 #define DEF_SSALPHA 2.0 ///< Default alpha coefficient for spectral subtraction 00069 #define DEF_SSFLOOR 0.5 ///< Default flooring coefficient for spectral subtraction 00070 00071 /* version 2 ... ss_floor and ss_alpha removed */ 00072 /* version 3 add usepower */ 00073 #define VALUE_VERSION 3 ///< Integer version number of Value, for embedding 00074 00076 typedef struct { 00077 long smp_period; 00078 long smp_freq; 00079 int framesize; 00080 int frameshift; 00081 float preEmph; 00082 int lifter; 00083 int fbank_num; 00084 int delWin; 00085 int accWin; 00086 float silFloor; 00087 float escale; 00088 int hipass; 00089 int lopass; 00090 int enormal; 00091 int raw_e; 00092 int zmeanframe; 00093 int usepower; 00094 float vtln_alpha; 00095 float vtln_upper; 00096 float vtln_lower; 00097 00098 /* items below does not need to be embedded, because they can be 00099 detemined from the acoustic model header, or should be computed 00100 from run-time variables */ 00101 int delta; 00102 int acc; 00103 int energy; 00104 int c0; 00105 int absesup; 00106 int cmn; 00107 int cvn; 00108 int mfcc_dim; 00109 int baselen; 00110 int vecbuflen; 00111 int veclen; 00112 00113 int loaded; 00114 }Value; 00115 00117 typedef struct { 00118 int fftN; 00119 int n; 00120 int klo; 00121 int khi; 00122 float fres; 00123 float *cf; 00124 short *loChan; 00125 float *loWt; 00126 float *Re; 00127 float *Im; 00128 } FBankInfo; 00129 00131 typedef struct { 00132 float **mfcc; 00133 int veclen; 00134 float *vec; 00135 int win; 00136 int len; 00137 int store; 00138 boolean *is_on; 00139 int B; 00140 } DeltaBuf; 00141 00143 typedef struct { 00144 float *bf; 00145 double *fbank; 00146 FBankInfo fb; 00147 int bflen; 00148 #ifdef MFCC_SINCOS_TABLE 00149 double *costbl_hamming; 00150 int costbl_hamming_len; 00151 /* cos/-sin table for FFT */ 00152 double *costbl_fft; 00153 double *sintbl_fft; 00154 int tbllen; 00155 /* cos table for MakeMFCC */ 00156 double *costbl_makemfcc; 00157 int costbl_makemfcc_len; 00158 /* sin table for WeightCepstrum */ 00159 double *sintbl_wcep; 00160 int sintbl_wcep_len; 00161 #endif /* MFCC_SINCOS_TABLE */ 00162 float sqrt2var; 00163 float *ssbuf; 00164 int ssbuflen; 00165 float ss_floor; 00166 float ss_alpha; 00167 } MFCCWork; 00168 00173 typedef struct { 00174 float *mfcc_sum; 00175 float *mfcc_var; 00176 int framenum; 00177 } CMEAN; 00178 00183 typedef struct { 00184 CMEAN *clist; 00185 int clist_max; 00186 int clist_num; 00187 float cweight; 00188 float *cmean_init; 00189 float *cvar_init; 00190 int mfcc_dim; 00191 int veclen; 00192 boolean mean; 00193 boolean var; 00194 boolean cmean_init_set; 00195 CMEAN now; 00196 } CMNWork; 00197 00202 typedef struct { 00203 LOGPROB max_last; 00204 LOGPROB min_last; 00205 LOGPROB max; 00206 } ENERGYWork; 00207 00208 00209 #ifdef __cplusplus 00210 extern "C" { 00211 #endif 00212 00213 /**** mfcc-core.c ****/ 00214 MFCCWork *WMP_work_new(Value *para); 00215 void WMP_calc(MFCCWork *w, float *mfcc, Value *para); 00216 void WMP_free(MFCCWork *w); 00217 /* Get filterbank information */ 00218 boolean InitFBank(MFCCWork *w, Value *para); 00219 void FreeFBank(FBankInfo *fb); 00220 /* Apply hamming window */ 00221 void Hamming (float *wave, int framesize, MFCCWork *w); 00222 /* Apply pre-emphasis filter */ 00223 void PreEmphasise (float *wave, int framesize, float preEmph); 00224 /* Return mel-frequency */ 00225 float Mel(int k, float fres); 00226 /* Apply FFT */ 00227 void FFT(float *xRe, float *xIm, int p, MFCCWork *w); 00228 /* Convert wave -> mel-frequency filterbank */ 00229 void MakeFBank(float *wave, MFCCWork *w, Value *para); 00230 /* Apply the DCT to filterbank */ 00231 void MakeMFCC(float *mfcc, Value *para, MFCCWork *w); 00232 /* Calculate 0'th Cepstral parameter*/ 00233 float CalcC0(MFCCWork *w, Value *para); 00234 /* Calculate Log Raw Energy */ 00235 float CalcLogRawE(float *wave, int framesize); 00236 /* Zero Mean Souce by frame */ 00237 void ZMeanFrame(float *wave, int framesize); 00238 /* Re-scale cepstral coefficients */ 00239 void WeightCepstrum (float *mfcc, Value *para, MFCCWork *w); 00240 00241 /**** wav2mfcc-buffer.c ****/ 00242 /* Convert wave -> MFCC_E_D_(Z) (batch) */ 00243 int Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w); 00244 /* Calculate delta coefficients (batch) */ 00245 void Delta(float **c, int frame, Value *para); 00246 /* Calculate acceleration coefficients (batch) */ 00247 void Accel(float **c, int frame, Value *para); 00248 /* Normalise log energy (batch) */ 00249 void NormaliseLogE(float **c, int frame_num, Value *para); 00250 /* Cepstrum Mean Normalization (batch) */ 00251 void CMN(float **mfcc, int frame_num, int dim); 00252 void MVN(float **mfcc, int frame_num, Value *para); 00253 00254 /**** wav2mfcc-pipe.c ****/ 00255 DeltaBuf *WMP_deltabuf_new(int veclen, int windowlen); 00256 void WMP_deltabuf_free(DeltaBuf *db); 00257 void WMP_deltabuf_prepare(DeltaBuf *db); 00258 boolean WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc); 00259 boolean WMP_deltabuf_flush(DeltaBuf *db); 00260 00261 CMNWork *CMN_realtime_new(Value *para, float weight); 00262 void CMN_realtime_free(CMNWork *c); 00263 void CMN_realtime_prepare(CMNWork *c); 00264 void CMN_realtime(CMNWork *c, float *mfcc); 00265 void CMN_realtime_update(CMNWork *c, HTK_Param *param); 00266 boolean CMN_load_from_file(CMNWork *c, char *filename); 00267 boolean CMN_save_to_file(CMNWork *c, char *filename); 00268 00269 void energy_max_init(ENERGYWork *energy); 00270 void energy_max_prepare(ENERGYWork *energy, Value *para); 00271 LOGPROB energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para); 00272 00273 /**** ss.c ****/ 00274 /* spectral subtraction */ 00275 float *new_SS_load_from_file(char *filename, int *slen); 00276 float *new_SS_calculate(SP16 *wave, int wavelen, int *slen, MFCCWork *w, Value *para); 00277 00278 /**** para.c *****/ 00279 void undef_para(Value *para); 00280 void make_default_para(Value *para); 00281 void make_default_para_htk(Value *para); 00282 void apply_para(Value *dst, Value *src); 00283 boolean htk_config_file_parse(char *HTKconffile, Value *para); 00284 void calc_para_from_header(Value *para, short param_type, short vec_size); 00285 void put_para(FILE *fp, Value *para); 00286 00287 #ifdef __cplusplus 00288 } 00289 #endif 00290 00291 #endif /* __MFCC_H__ */