MFCC計算のための定義. [詳細]
#include <sent/stddefs.h>
#include <sent/htk_defs.h>
#include <ctype.h>
データ構造 | |
struct | Value |
mfcc configuration parameter values [詳細] | |
struct | FBankInfo |
Workspace for filterbank analysis. [詳細] | |
struct | DeltaBuf |
Cycle buffer for delta computation. [詳細] | |
マクロ定義 | |
#define | DEF_SMPPERIOD 625 |
DEBUG: define if you want to enable debug messages for sin/cos table operation. | |
#define | DEF_FRAMESIZE 400 |
Default Window size in samples, similar to WINDOWSIZE in HTK (unit is different). | |
#define | DEF_FFTNUM 512 |
Number of FFT steps. | |
#define | DEF_FRAMESHIFT 160 |
Default frame shift length in samples. | |
#define | DEF_PREENPH 0.97 |
Default pre-emphasis coefficient, corresponds to PREEMCOEF in HTK. | |
#define | DEF_MFCCDIM 12 |
Default number of MFCC dimension, corresponds to NUMCEPS in HTK. | |
#define | DEF_CEPLIF 22 |
Default cepstral Liftering coefficient, corresponds to CEPLIFTER in HTK. | |
#define | DEF_FBANK 24 |
Default number of filterbank channels, corresponds to NUMCHANS in HTK. | |
#define | DEF_DELWIN 2 |
Default delta window size, corresponds to DELTAWINDOW in HTK. | |
#define | DEF_ACCWIN 2 |
Default acceleration window size, corresponds to ACCWINDOW in HTK. | |
#define | DEF_SILFLOOR 50.0 |
Default energy silence floor in dBs, corresponds to SILFLOOR in HTK. | |
#define | DEF_ESCALE 1.0 |
Default scaling coefficient of log energy, corresponds to ESCALE in HTK. | |
#define | DEF_SSALPHA 2.0 |
Default alpha coefficient for spectral subtraction. | |
#define | DEF_SSFLOOR 0.5 |
Default flooring coefficient for spectral subtraction. | |
#define | VALUE_VERSION 1 |
Integer version number of Value, for embedding. | |
関数 | |
void | WMP_calc_init (Value para, float **bf, int *bflen) |
void | WMP_calc (float *mfcc, float *bf, Value para, float *ssbuf) |
void | WMP_calc_fin (float *bf) |
void | make_costbl_hamming (int framesize) |
void | make_fft_table (int n) |
void | make_costbl_makemfcc (int fbank_num, int mfcc_dim) |
void | make_sintbl_wcep (int lifter, int mfcc_dim) |
FBankInfo | InitFBank (Value para) |
void | FreeFBank (FBankInfo fb) |
void | Hamming (float *wave, int framesize) |
void | PreEmphasise (float *wave, Value para) |
float | Mel (int k, float fres) |
void | FFT (float *xRe, float *xIm, int p) |
void | MakeFBank (float *wave, double *fbank, FBankInfo info, Value para, float *ssbuf) |
void | MakeMFCC (double *fbank, float *mfcc, Value para) |
float | CalcC0 (double *fbank, Value para) |
float | CalcLogRawE (float *wave, int framesize) |
void | ZMeanFrame (float *wave, int framesize) |
void | WeightCepstrum (float *mfcc, Value para) |
int | Wav2MFCC (SP16 *wave, float **mfcc, Value para, int nSamples, float *ssbuf, int ssbuflen) |
void | Delta (float **c, int frame, Value para) |
void | Accel (float **c, int frame, Value para) |
void | NormaliseLogE (float **c, int frame_num, Value para) |
void | CMN (float **mfcc, int frame_num, int dim) |
void | WMP_init (Value para, float **bf, float *ssbuf, int ssbuflen) |
DeltaBuf * | WMP_deltabuf_new (int veclen, int windowlen) |
void | WMP_deltabuf_free (DeltaBuf *db) |
void | WMP_deltabuf_prepare (DeltaBuf *db) |
boolean | WMP_deltabuf_proceed (DeltaBuf *db, float *new_mfcc) |
boolean | WMP_deltabuf_flush (DeltaBuf *db) |
void | CMN_realtime_init (int dimension, float weight) |
void | CMN_realtime_prepare () |
void | CMN_realtime (float *mfcc, int dim) |
void | CMN_realtime_update () |
boolean | CMN_load_from_file (char *filename, int dim) |
boolean | CMN_save_to_file (char *filename) |
void | energy_max_init () |
void | energy_max_prepare (Value *para) |
LOGPROB | energy_max_normalize (LOGPROB f, Value *para) |
float * | new_SS_load_from_file (char *filename, int *slen) |
float * | new_SS_calculate (SP16 *wave, int wavelen, Value para, int *slen) |
void | undef_para (Value *para) |
void | make_default_para (Value *para) |
void | make_default_para_htk (Value *para) |
void | apply_para (Value *dst, Value *src) |
boolean | htk_config_file_parse (char *HTKconffile, Value *para) |
void | calc_para_from_header (Value *para, short param_type, short vec_size) |
void | put_para (Value *para) |
MFCC計算のための定義.
このファイルには,音声波形データからMFCC形式の特徴量ベクトル系列を 計算するための構造体の定義およびデフォルト値が含まれています. デフォルト値は Julius とともに配布されている音響モデルで使用している 値であり,HTKのデフォルトとは値が異なる部分がありますので注意して下さい.
mfcc.h で定義されています。
#define DEF_SMPPERIOD 625 |
void WMP_calc_init | ( | Value | para, | |
float ** | bf, | |||
int * | bflen | |||
) |
Initialize calculation functions and work areas.
para | [in] configuration parameters | |
bf | [out] returns pointer to newly allocated window buffer | |
bflen | [out] length of bf |
mfcc-core.c の 605 行で定義されています。
参照元 WMP_init().
void WMP_calc | ( | float * | mfcc, | |
float * | bf, | |||
Value | para, | |||
float * | ssbuf | |||
) |
Calculate MFCC and log energy for one frame. Perform spectral subtraction if ssbuf is specified.
mfcc | [out] buffer to hold the resulting MFCC vector | |
bf | [i/o] work area for FFT | |
para | [in] configuration parameters | |
ssbuf | [in] noise spectrum, or NULL if not using spectral subtraction |
mfcc-core.c の 632 行で定義されています。
参照元 RealTimePipeLine(), と Wav2MFCC().
void WMP_calc_fin | ( | float * | bf | ) |
Free work area for MFCC computation
bf | [in] window buffer previously allocated by WMP_calc_init() |
mfcc-core.c の 674 行で定義されています。
参照元 Wav2MFCC().
void make_costbl_hamming | ( | int | framesize | ) |
Generate table for hamming window.
framesize | [in] window size |
mfcc-core.c の 63 行で定義されています。
参照元 InitFBank(), と new_SS_calculate().
void make_fft_table | ( | int | n | ) |
Build tables for FFT.
n | [in] 2^n = FFT point |
mfcc-core.c の 94 行で定義されています。
参照元 InitFBank(), と new_SS_calculate().
void make_costbl_makemfcc | ( | int | fbank_num, | |
int | mfcc_dim | |||
) |
Generate table for DCT operation to make mfcc from fbank.
fbank_num | [in] number of filer banks | |
mfcc_dim | [in] number of dimensions in MFCC |
mfcc-core.c の 127 行で定義されています。
参照元 InitFBank().
void make_sintbl_wcep | ( | int | lifter, | |
int | mfcc_dim | |||
) |
Generate table for weighing cepstrum.
lifter | [in] cepstral liftering coefficient | |
mfcc_dim | [in] number of dimensions in MFCC |
mfcc-core.c の 165 行で定義されています。
参照元 InitFBank().
Build filterbank information and generate tables for MFCC comptutation.
para | [in] configuration parameters |
mfcc-core.c の 211 行で定義されています。
参照元 WMP_calc_init().
void FreeFBank | ( | FBankInfo | fb | ) |
Free FBankInfo.
fb | [in] filterbank information |
mfcc-core.c の 305 行で定義されています。
参照元 WMP_calc_fin().
void Hamming | ( | float * | wave, | |
int | framesize | |||
) |
Apply hamming window.
wave | [i/o] waveform data in the current frame | |
framesize | [in] frame size |
mfcc-core.c の 375 行で定義されています。
参照元 new_SS_calculate(), と WMP_calc().
void PreEmphasise | ( | float * | wave, | |
Value | para | |||
) |
Apply pre-emphasis filter.
wave | [i/o] waveform data in the current frame | |
para | [in] configuration parameters |
mfcc-core.c の 360 行で定義されています。
参照元 new_SS_calculate(), と WMP_calc().
float Mel | ( | int | k, | |
float | fres | |||
) |
Return mel-frequency.
k | [in] channel number of filter bank | |
fres | [in] constant value computed by "1.0E7 / (para.smp_period * fb.fftN * 700.0)" |
mfcc-core.c の 199 行で定義されています。
参照元 InitFBank().
void FFT | ( | float * | xRe, | |
float * | xIm, | |||
int | p | |||
) |
Apply FFT
xRe | [i/o] real part of waveform | |
xIm | [i/o] imaginal part of waveform | |
p | [in] 2^p = FFT point |
mfcc-core.c の 396 行で定義されています。
参照元 MakeFBank(), と new_SS_calculate().
Convert wave -> (spectral subtraction) -> mel-frequency filterbank
wave | [in] waveform data in the current frame | |
fbank | [out] the resulting mel-frequency filterbank | |
fb | [in] filterbank information | |
para | [in] configuration parameters | |
ssbuf | [in] noise spectrum, or NULL if not apply subtraction |
mfcc-core.c の 450 行で定義されています。
参照元 WMP_calc().
void MakeMFCC | ( | double * | fbank, | |
float * | mfcc, | |||
Value | para | |||
) |
Apply DCT to filterbank to make MFCC.
fbank | [in] filterbank | |
mfcc | [out] output MFCC vector | |
para | [in] configuration parameters |
mfcc-core.c の 528 行で定義されています。
参照元 WMP_calc().
float CalcC0 | ( | double * | fbank, | |
Value | para | |||
) |
Calculate 0'th cepstral coefficient.
fbank | [in] filterbank | |
para | [in] configuration parameters |
mfcc-core.c の 510 行で定義されています。
参照元 WMP_calc().
float CalcLogRawE | ( | float * | wave, | |
int | framesize | |||
) |
Calculate Log Raw Energy.
wave | [in] waveform data in the current frame | |
framesize | [in] frame size |
mfcc-core.c の 341 行で定義されています。
参照元 WMP_calc().
void ZMeanFrame | ( | float * | wave, | |
int | framesize | |||
) |
Remove DC offset per frame
wave | [i/o] waveform data in the current frame | |
framesize | [in] frame size |
mfcc-core.c の 322 行で定義されています。
参照元 new_SS_calculate(), と WMP_calc().
void WeightCepstrum | ( | float * | mfcc, | |
Value | para | |||
) |
Re-scale cepstral coefficients.
mfcc | [i/o] a MFCC vector | |
para | [in] configuration parameters |
mfcc-core.c の 562 行で定義されています。
参照元 WMP_calc().
Convert wave data to MFCC. Also does spectral subtraction if ssbuf specified.
wave | [in] waveform data | |
mfcc | [out] buffer to store the resulting MFCC parameter vector [t][0..veclen-1], should be already allocated | |
para | [in] configuration parameters | |
nSamples | [in] length of waveform data | |
ssbuf | [in] buffer that holds noise spectrum to be subtracted from input, or NULL if not use spectral subtraction | |
ssbuflen | [in] length of above, ignored when ssbuf is NULL |
wav2mfcc-buffer.c の 56 行で定義されています。
参照元 new_wav2mfcc().
void Delta | ( | float ** | c, | |
int | frame, | |||
Value | para | |||
) |
Calculate delta coefficients
c | [i/o] MFCC vectors, in which the delta coeff. will be appended. | |
frame | [in] number of frames | |
para | [in] configuration parameters |
wav2mfcc-buffer.c の 140 行で定義されています。
参照元 Wav2MFCC().
void Accel | ( | float ** | c, | |
int | frame, | |||
Value | para | |||
) |
Calculate acceleration coefficients.
c | [i/o] MFCC vectors, in which the delta coeff. will be appended. | |
frame | [in] number of frames | |
para | [in] configuration parameters |
wav2mfcc-buffer.c の 188 行で定義されています。
参照元 Wav2MFCC().
void NormaliseLogE | ( | float ** | mfcc, | |
int | frame_num, | |||
Value | para | |||
) |
Normalise log energy
mfcc | [i/o] array of MFCC vectors | |
frame_num | [in] number of frames | |
para | [in] configuration parameters |
wav2mfcc-buffer.c の 108 行で定義されています。
参照元 Wav2MFCC().
void CMN | ( | float ** | mfcc, | |
int | frame_num, | |||
int | dim | |||
) |
Cepstrum Mean Normalization (buffered) Cepstral mean will be computed within the given MFCC vectors.
mfcc | [i/o] array of MFCC vectors | |
frame_num | [in] number of frames | |
dim | [in] total dimension of MFCC vectors |
wav2mfcc-buffer.c の 227 行で定義されています。
参照元 Wav2MFCC().
void WMP_init | ( | Value | para, | |
float ** | bf, | |||
float * | ssbuf, | |||
int | ssbuflen | |||
) |
initialize and setup buffers for a MFCC computataion.
para | [in] configuration parameters | |
bf | [out] pointer to the entry point of workspace for FFT | |
ssbuf | [in] noise spectrum, or NULL if not using spectral subtraction | |
ssbuflen | [in] length of above, ignoredwhen ssbuf is NULL |
wav2mfcc-pipe.c の 57 行で定義されています。
参照元 RealTimeInit(), と Wav2MFCC().
DeltaBuf* WMP_deltabuf_new | ( | int | veclen, | |
int | windowlen | |||
) |
Allocate a new delta cycle buffer.
veclen | [in] length of a vector | |
windowlen | [in] window width for computing delta |
wav2mfcc-pipe.c の 83 行で定義されています。
参照元 RealTimeInit().
void WMP_deltabuf_free | ( | DeltaBuf * | db | ) |
void WMP_deltabuf_prepare | ( | DeltaBuf * | db | ) |
Reset and clear the delta cycle buffer.
db | [i/o] delta cycle buffer |
wav2mfcc-pipe.c の 128 行で定義されています。
Store the given MFCC vector into the delta cycle buffer, and compute the latest delta coefficients.
db | [i/o] delta cycle buffer | |
new_mfcc | [in] MFCC vector |
wav2mfcc-pipe.c の 187 行で定義されています。
参照元 RealTimeParam(), と RealTimePipeLine().
Flush the delta cycle buffer the delta coefficients left in the cycle buffer.
db | [i/o] delta cycle buffer |
wav2mfcc-pipe.c の 229 行で定義されています。
参照元 RealTimeParam().
void CMN_realtime_init | ( | int | dimension, | |
float | weight | |||
) |
Initialize MAP-CMN at startup.
dimension | [in] vector dimension | |
weight | [in] initial cepstral mean weight |
wav2mfcc-pipe.c の 290 行で定義されています。
参照元 RealTimeInit().
void CMN_realtime_prepare | ( | ) |
Prepare for MAP-CMN at start of each input
wav2mfcc-pipe.c の 317 行で定義されています。
void CMN_realtime | ( | float * | mfcc, | |
int | dim | |||
) |
Perform MAP-CMN for incoming MFCC vectors
mfcc | [in] MFCC vector | |
dim | [in] dimension |
wav2mfcc-pipe.c の 332 行で定義されています。
参照元 RealTimeParam(), と RealTimePipeLine().
void CMN_realtime_update | ( | ) |
Update initial cepstral mean from previous utterances for next input.
wav2mfcc-pipe.c の 360 行で定義されています。
参照元 RealTimeCMNUpdate().
boolean CMN_load_from_file | ( | char * | filename, | |
int | dim | |||
) |
Load CMN parameter from file. If the number of MFCC dimension in the file does not match the specified one, an error will occur.
filename | [in] file name | |
dim | [in] required number of MFCC dimensions |
wav2mfcc-pipe.c の 460 行で定義されています。
参照元 RealTimeInit().
boolean CMN_save_to_file | ( | char * | filename | ) |
Save the current CMN vector to a file.
filename | [in] filename to save the data. |
wav2mfcc-pipe.c の 504 行で定義されています。
参照元 RealTimeCMNUpdate().
void energy_max_init | ( | ) |
Initialize work area for energy normalization on live input. This should be called once on startup.
wav2mfcc-pipe.c の 543 行で定義されています。
参照元 RealTimeInit().
void energy_max_prepare | ( | Value * | para | ) |
Prepare values for energy normalization on live input. This should be called before each input segment.
para | [in] MFCC computation configuration parameter |
wav2mfcc-pipe.c の 555 行で定義されています。
Peform energy normalization using maximum of last input.
f | [in] raw energy | |
para | [in] MFCC computation configuration parameter |
wav2mfcc-pipe.c の 571 行で定義されています。
参照元 RealTimePipeLine().
float* new_SS_load_from_file | ( | char * | filename, | |
int * | slen | |||
) |
Load a noise spectrum from file.
filename | [in] path name of noise spectrum file | |
slen | [out] length of the returned buffer |
参照元 new_wav2mfcc(), と RealTimeInit().
Compute average spectrum of audio input. This is used to estimate a noise spectrum from input samples.
wave | [in] input audio data sequence | |
wavelen | [in] length of above | |
para | [in] parameter | |
slen | [out] length of returned buffer |
参照元 new_wav2mfcc().
void undef_para | ( | Value * | para | ) |
Reset configuration parameters for MFCC computation.
para | [out] feature extraction parameters |
参照元 initialize_GMM(), initialize_GSHMM(), と system_bootup().
void make_default_para | ( | Value * | para | ) |
Set Julius default parameters for MFCC computation.
para | [out] feature extraction parameters |
参照元 system_bootup().
void make_default_para_htk | ( | Value * | para | ) |
Set HTK default configuration parameters for MFCC computation. This will be refered when parameters are given as HTK Config file.
para | [out] feature extraction parameters |
参照元 system_bootup().
Merge two configuration parameters for MFCC computation.
dst | [out] feature extraction parameters to set to | |
src | [out] feature extraction parameters to set from |
参照元 final_fusion().
Read and parse an HTK Config file, and set the specified option values.
HTKconffile | [in] HTK Config file path name | |
para | [out] MFCC parameter to set |
参照元 opt_parse().
void calc_para_from_header | ( | Value * | para, | |
short | param_type, | |||
short | vec_size | |||
) |
void put_para | ( | Value * | para | ) |
Output acoustic analysis configuration parameters to stdout.
para | [in] configuration parameter |
参照元 print_info().