Julius 4.2
libsent/include/sent/mfcc.h
説明を見る。
00001 
00035 /************************************************************************/
00036 /*    mfcc.h                                                            */
00037 /*                                                                      */
00038 /*    Author    : Yuichiro Nakano                                       */
00039 /************************************************************************/
00040 
00041 #ifndef __MFCC_H__
00042 #define __MFCC_H__
00043 
00045 #undef MFCC_TABLE_DEBUG
00046 
00047 #define CPMAX 500               ///< Maximum number of frames to store ceptral mean for realtime CMN update
00048 #define CPSTEP 5                ///< allocate step of cmean list per sentence
00049 
00050 #include <sent/stddefs.h>
00051 #include <sent/htk_defs.h>
00052 #include <sent/htk_param.h>
00053 #include <ctype.h>
00054 
00055 #define DEF_SMPPERIOD   625     ///< Default sampling period in 100ns (625 = 16kHz)
00056 #define DEF_FRAMESIZE   400     ///< Default Window size in samples, similar to WINDOWSIZE in HTK (unit is different)
00057 #define DEF_FFTNUM      512     ///< Number of FFT steps
00058 #define DEF_FRAMESHIFT  160     ///< Default frame shift length in samples
00059 #define DEF_PREENPH     0.97    ///< Default pre-emphasis coefficient, corresponds to PREEMCOEF in HTK
00060 #define DEF_MFCCDIM     12      ///< Default number of MFCC dimension, corresponds to NUMCEPS in HTK
00061 #define DEF_CEPLIF      22      ///< Default cepstral Liftering coefficient, corresponds to CEPLIFTER in HTK
00062 #define DEF_FBANK       24      ///< Default number of filterbank channels, corresponds to NUMCHANS in HTK
00063 #define DEF_DELWIN      2       ///< Default delta window size, corresponds to DELTAWINDOW in HTK
00064 #define DEF_ACCWIN      2       ///< Default acceleration window size, corresponds to ACCWINDOW in HTK
00065 #define DEF_SILFLOOR    50.0    ///< Default energy silence floor in dBs, corresponds to SILFLOOR in HTK
00066 #define DEF_ESCALE      1.0     ///< Default scaling coefficient of log energy, corresponds to ESCALE in HTK
00067 
00068 #define DEF_SSALPHA     2.0     ///< Default alpha coefficient for spectral subtraction
00069 #define DEF_SSFLOOR     0.5     ///< Default flooring coefficient for spectral subtraction
00070 
00071 /* version 2 ... ss_floor and ss_alpha removed */
00072 /* version 3 add usepower */
00073 #define VALUE_VERSION 3 ///< Integer version number of Value, for embedding
00074 
00076 typedef struct {
00077   long smp_period;      
00078   long smp_freq;        
00079   int framesize;        
00080   int frameshift;       
00081   float preEmph;        
00082   int lifter;           
00083   int fbank_num;        
00084   int delWin;           
00085   int accWin;           
00086   float silFloor;       
00087   float escale;         
00088   int hipass;           
00089   int lopass;           
00090   int enormal;          
00091   int raw_e;            
00092   int zmeanframe;       
00093   int usepower;         
00094   float vtln_alpha;     
00095   float vtln_upper;     
00096   float vtln_lower;     
00097 
00098   /* items below does not need to be embedded, because they can be
00099      detemined from the acoustic model header, or should be computed
00100      from run-time variables */
00101   int delta;            
00102   int acc;              
00103   int energy;           
00104   int c0;               
00105   int absesup;          
00106   int cmn;              
00107   int cvn;              
00108   int mfcc_dim;         
00109   int baselen;          
00110   int vecbuflen;        
00111   int veclen;           
00112 
00113   int loaded;           
00114 }Value;
00115 
00117 typedef struct {
00118    int fftN;            
00119    int n;               
00120    int klo;             
00121    int khi;             
00122    float fres;          
00123    float *cf;           
00124    short *loChan;       
00125    float *loWt;         
00126    float *Re;           
00127    float *Im;           
00128 } FBankInfo;
00129 
00131 typedef struct {
00132   float **mfcc;                 
00133   int veclen;                   
00134   float *vec;                   
00135   int win;                      
00136   int len;                      
00137   int store;                    
00138   boolean *is_on;               
00139   int B;                        
00140 } DeltaBuf;
00141 
00143 typedef struct {
00144   float *bf;                    
00145   double *fbank;   
00146   FBankInfo fb; 
00147   int bflen;                    
00148 #ifdef MFCC_SINCOS_TABLE
00149   double *costbl_hamming; 
00150   int costbl_hamming_len; 
00151   /* cos/-sin table for FFT */
00152   double *costbl_fft; 
00153   double *sintbl_fft; 
00154   int tbllen; 
00155   /* cos table for MakeMFCC */
00156   double *costbl_makemfcc; 
00157   int costbl_makemfcc_len; 
00158   /* sin table for WeightCepstrum */
00159   double *sintbl_wcep; 
00160   int sintbl_wcep_len; 
00161 #endif /* MFCC_SINCOS_TABLE */
00162   float sqrt2var; 
00163   float *ssbuf;                 
00164   int ssbuflen;                 
00165   float ss_floor;               
00166   float ss_alpha;               
00167 } MFCCWork;
00168 
00173 typedef struct {
00174   float *mfcc_sum;              
00175   float *mfcc_var;              
00176   int framenum;                 
00177 } CMEAN;
00178 
00183 typedef struct {
00184   CMEAN *clist;         
00185   int clist_max;                
00186   int clist_num;                
00187   float cweight;                
00188   float *cmean_init;    
00189   float *cvar_init;             
00190   int mfcc_dim;                 
00191   int veclen;                   
00192   boolean mean;                 
00193   boolean var;                  
00194   boolean cmean_init_set;       
00195   CMEAN now;            
00196 } CMNWork;
00197 
00202 typedef struct {
00203   LOGPROB max_last;     
00204   LOGPROB min_last;     
00205   LOGPROB max;  
00206 } ENERGYWork;
00207 
00208 
00209 #ifdef __cplusplus
00210 extern "C" {
00211 #endif
00212 
00213 /**** mfcc-core.c ****/
00214 MFCCWork *WMP_work_new(Value *para);
00215 void WMP_calc(MFCCWork *w, float *mfcc, Value *para);
00216 void WMP_free(MFCCWork *w);
00217 /* Get filterbank information */
00218 boolean InitFBank(MFCCWork *w, Value *para);
00219 void FreeFBank(FBankInfo *fb);
00220 /* Apply hamming window */
00221 void Hamming (float *wave, int framesize, MFCCWork *w);
00222 /* Apply pre-emphasis filter */
00223 void PreEmphasise (float *wave, int framesize, float preEmph);
00224 /* Return mel-frequency */
00225 float Mel(int k, float fres);
00226 /* Apply FFT */
00227 void FFT(float *xRe, float *xIm, int p, MFCCWork *w);
00228 /* Convert wave -> mel-frequency filterbank */
00229 void MakeFBank(float *wave, MFCCWork *w, Value *para);
00230 /* Apply the DCT to filterbank */ 
00231 void MakeMFCC(float *mfcc, Value *para, MFCCWork *w);
00232 /* Calculate 0'th Cepstral parameter*/
00233 float CalcC0(MFCCWork *w, Value *para);
00234 /* Calculate Log Raw Energy */
00235 float CalcLogRawE(float *wave, int framesize);
00236 /* Zero Mean Souce by frame */
00237 void ZMeanFrame(float *wave, int framesize);
00238 /* Re-scale cepstral coefficients */
00239 void WeightCepstrum (float *mfcc, Value *para, MFCCWork *w);
00240 
00241 /**** wav2mfcc-buffer.c ****/
00242 /* Convert wave -> MFCC_E_D_(Z) (batch) */
00243 int Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w);
00244 /* Calculate delta coefficients (batch) */
00245 void Delta(float **c, int frame, Value *para);
00246 /* Calculate acceleration coefficients (batch) */
00247 void Accel(float **c, int frame, Value *para);
00248 /* Normalise log energy (batch) */
00249 void NormaliseLogE(float **c, int frame_num, Value *para);
00250 /* Cepstrum Mean Normalization (batch) */
00251 void CMN(float **mfcc, int frame_num, int dim);
00252 void MVN(float **mfcc, int frame_num, Value *para);
00253 
00254 /**** wav2mfcc-pipe.c ****/
00255 DeltaBuf *WMP_deltabuf_new(int veclen, int windowlen);
00256 void WMP_deltabuf_free(DeltaBuf *db);
00257 void WMP_deltabuf_prepare(DeltaBuf *db);
00258 boolean WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc);
00259 boolean WMP_deltabuf_flush(DeltaBuf *db);
00260 
00261 CMNWork *CMN_realtime_new(Value *para, float weight);
00262 void CMN_realtime_free(CMNWork *c);
00263 void CMN_realtime_prepare(CMNWork *c);
00264 void CMN_realtime(CMNWork *c, float *mfcc);
00265 void CMN_realtime_update(CMNWork *c, HTK_Param *param);
00266 boolean CMN_load_from_file(CMNWork *c, char *filename);
00267 boolean CMN_save_to_file(CMNWork *c, char *filename);
00268 
00269 void energy_max_init(ENERGYWork *energy);
00270 void energy_max_prepare(ENERGYWork *energy, Value *para);
00271 LOGPROB energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para);
00272 
00273 /**** ss.c ****/
00274 /* spectral subtraction */
00275 float *new_SS_load_from_file(char *filename, int *slen);
00276 float *new_SS_calculate(SP16 *wave, int wavelen, int *slen, MFCCWork *w, Value *para);
00277 
00278 /**** para.c *****/
00279 void undef_para(Value *para);
00280 void make_default_para(Value *para);
00281 void make_default_para_htk(Value *para);
00282 void apply_para(Value *dst, Value *src);
00283 boolean htk_config_file_parse(char *HTKconffile, Value *para);
00284 void calc_para_from_header(Value *para, short param_type, short vec_size);
00285 void put_para(FILE *fp, Value *para);
00286 
00287 #ifdef __cplusplus
00288 }
00289 #endif
00290 
00291 #endif /* __MFCC_H__ */