音声波形から MFCC 特徴量へ変換する (フレーム単位) [詳細]
#include <sent/stddefs.h>
#include <sent/mfcc.h>
データ構造 | |
struct | CMEAN |
マクロ定義 | |
#define | CPMAX 500 |
Maximum number of frames to store ceptral mean for CMN update. | |
#define | CPSTEP 5 |
clist allocate step | |
関数 | |
void | WMP_init (Value para, float **bf, float *ssbuf, int ssbuflen) |
DeltaBuf * | WMP_deltabuf_new (int veclen, int windowlen) |
void | WMP_deltabuf_free (DeltaBuf *db) |
void | WMP_deltabuf_prepare (DeltaBuf *db) |
static void | WMP_deltabuf_calc (DeltaBuf *db, int cur) |
boolean | WMP_deltabuf_proceed (DeltaBuf *db, float *new_mfcc) |
boolean | WMP_deltabuf_flush (DeltaBuf *db) |
void | CMN_realtime_init (int dimension, float weight) |
void | CMN_realtime_prepare () |
void | CMN_realtime (float *mfcc, int dim) |
void | CMN_realtime_update () |
static boolean | myread (void *buf, size_t unitbyte, int unitnum, FILE *fp) |
static boolean | mywrite (void *buf, size_t unitbyte, int unitnum, int fd) |
boolean | CMN_load_from_file (char *filename, int dim) |
boolean | CMN_save_to_file (char *filename) |
void | energy_max_init () |
void | energy_max_prepare (Value *para) |
LOGPROB | energy_max_normalize (LOGPROB f, Value *para) |
変数 | |
static CMEAN * | clist |
List of MFCC sum for previous inputs. | |
static int | clist_max |
Allocated number of CMEAN in clist. | |
static int | clist_num |
Currentlly filled CMEAN in clist. | |
static int | dim |
Local workarea to store the number of MFCC dimension. | |
static float | cweight |
Weight of initial cepstral mean. | |
static float * | cmean_init |
Initial cepstral mean for each input. | |
static boolean | cmean_init_set |
TRUE if cmean_init was set. | |
static CMEAN | now |
Work area to hold current cepstral mean. | |
static LOGPROB | energy_max_last |
Maximum energy value of last input. | |
static LOGPROB | energy_min_last |
Minimum floored energy value of last input. | |
static LOGPROB | energy_max |
Maximum energy value of current input. |
音声波形から MFCC 特徴量へ変換する (フレーム単位)
ここでは wav2mfcc.c の関数をフレーム同期に処理するために変換した 関数が納められています.認識処理を音声入力と平行して行う場合,こちらの 関数が用いられます.
wav2mfcc-pipe.c で定義されています。
void WMP_init | ( | Value | para, | |
float ** | bf, | |||
float * | ssbuf, | |||
int | ssbuflen | |||
) |
initialize and setup buffers for a MFCC computataion.
para | [in] configuration parameters | |
bf | [out] pointer to the entry point of workspace for FFT | |
ssbuf | [in] noise spectrum, or NULL if not using spectral subtraction | |
ssbuflen | [in] length of above, ignoredwhen ssbuf is NULL |
wav2mfcc-pipe.c の 57 行で定義されています。
参照元 RealTimeInit(), と Wav2MFCC().
DeltaBuf* WMP_deltabuf_new | ( | int | veclen, | |
int | windowlen | |||
) |
Allocate a new delta cycle buffer.
veclen | [in] length of a vector | |
windowlen | [in] window width for computing delta |
wav2mfcc-pipe.c の 83 行で定義されています。
参照元 RealTimeInit().
void WMP_deltabuf_free | ( | DeltaBuf * | db | ) |
void WMP_deltabuf_prepare | ( | DeltaBuf * | db | ) |
Reset and clear the delta cycle buffer.
db | [i/o] delta cycle buffer |
wav2mfcc-pipe.c の 128 行で定義されています。
static void WMP_deltabuf_calc | ( | DeltaBuf * | db, | |
int | cur | |||
) | [static] |
Calculate delta coefficients of the specified point in the cycle buffer.
db | [i/o] delta cycle buffer | |
cur | [in] target point to calculate the delta coefficients |
wav2mfcc-pipe.c の 144 行で定義されています。
Store the given MFCC vector into the delta cycle buffer, and compute the latest delta coefficients.
db | [i/o] delta cycle buffer | |
new_mfcc | [in] MFCC vector |
wav2mfcc-pipe.c の 187 行で定義されています。
参照元 RealTimeParam(), と RealTimePipeLine().
Flush the delta cycle buffer the delta coefficients left in the cycle buffer.
db | [i/o] delta cycle buffer |
wav2mfcc-pipe.c の 229 行で定義されています。
参照元 RealTimeParam().
void CMN_realtime_init | ( | int | dimension, | |
float | weight | |||
) |
Initialize MAP-CMN at startup.
dimension | [in] vector dimension | |
weight | [in] initial cepstral mean weight |
wav2mfcc-pipe.c の 290 行で定義されています。
参照元 RealTimeInit().
void CMN_realtime_prepare | ( | ) |
Prepare for MAP-CMN at start of each input
wav2mfcc-pipe.c の 317 行で定義されています。
void CMN_realtime | ( | float * | mfcc, | |
int | dim | |||
) |
Perform MAP-CMN for incoming MFCC vectors
mfcc | [in] MFCC vector | |
dim | [in] dimension |
wav2mfcc-pipe.c の 332 行で定義されています。
参照元 RealTimeParam(), と RealTimePipeLine().
void CMN_realtime_update | ( | ) |
Update initial cepstral mean from previous utterances for next input.
wav2mfcc-pipe.c の 360 行で定義されています。
参照元 RealTimeCMNUpdate().
static boolean myread | ( | void * | buf, | |
size_t | unitbyte, | |||
int | unitnum, | |||
FILE * | fp | |||
) | [static] |
Read binary with byte swap (assume file is Big Endian)
buf | [out] data buffer | |
unitbyte | [in] size of unit in bytes | |
unitnum | [in] number of units to be read | |
fp | [in] file pointer |
wav2mfcc-pipe.c の 414 行で定義されています。
参照元 CMN_load_from_file().
static boolean mywrite | ( | void * | buf, | |
size_t | unitbyte, | |||
int | unitnum, | |||
int | fd | |||
) | [static] |
Write binary with byte swap (assume data is Big Endian)
buf | [in] data buffer | |
unitbyte | [in] size of unit in bytes | |
unitnum | [in] number of units to write | |
fd | [in] file descriptor |
wav2mfcc-pipe.c の 436 行で定義されています。
参照元 CMN_save_to_file().
boolean CMN_load_from_file | ( | char * | filename, | |
int | dim | |||
) |
Load CMN parameter from file. If the number of MFCC dimension in the file does not match the specified one, an error will occur.
filename | [in] file name | |
dim | [in] required number of MFCC dimensions |
wav2mfcc-pipe.c の 460 行で定義されています。
参照元 RealTimeInit().
boolean CMN_save_to_file | ( | char * | filename | ) |
Save the current CMN vector to a file.
filename | [in] filename to save the data. |
wav2mfcc-pipe.c の 504 行で定義されています。
参照元 RealTimeCMNUpdate().
void energy_max_init | ( | ) |
Initialize work area for energy normalization on live input. This should be called once on startup.
wav2mfcc-pipe.c の 543 行で定義されています。
参照元 RealTimeInit().
void energy_max_prepare | ( | Value * | para | ) |
Prepare values for energy normalization on live input. This should be called before each input segment.
para | [in] MFCC computation configuration parameter |
wav2mfcc-pipe.c の 555 行で定義されています。
Peform energy normalization using maximum of last input.
f | [in] raw energy | |
para | [in] MFCC computation configuration parameter |
wav2mfcc-pipe.c の 571 行で定義されています。
参照元 RealTimePipeLine().