Julius 4.2
libsent/src/wav2mfcc/ss.c
説明を見る。
00001 
00025 /*
00026  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00027  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00028  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00029  * All rights reserved
00030  */
00031 
00032 #include <sent/stddefs.h>
00033 #include <sent/mfcc.h>
00034 
00035 
00044 static boolean
00045 myread(void *buf, size_t unitbyte, int unitnum, FILE *fp)
00046 {
00047   size_t tmp;
00048   if ((tmp = myfread(buf, unitbyte, unitnum, fp)) < (size_t)unitnum) {
00049     return(FALSE);
00050   }
00051 #ifndef WORDS_BIGENDIAN
00052   swap_bytes(buf, unitbyte, unitnum);
00053 #endif
00054   return(TRUE);
00055 }
00056 
00065 float *
00066 new_SS_load_from_file(char *filename, int *slen)
00067 {
00068   FILE *fp;
00069   int num;
00070   float *sbuf;
00071 
00072   /* open file */
00073   jlog("Stat: ss: reading Noise Spectrum for SS\n");
00074   if ((fp = fopen_readfile(filename)) == NULL) {
00075     jlog("Error: ss: failed to open \"%s\"\n", filename);
00076     return(NULL);
00077   }
00078   /* read length */
00079   if (myread(&num, sizeof(int), 1, fp) == FALSE) {
00080     jlog("Error: ss: failed to read \"%s\"\n", filename);
00081     return(NULL);
00082   }
00083   /* allocate */
00084   sbuf = (float *)mymalloc(sizeof(float) * num);
00085   /* read data */
00086   if (myread(sbuf, sizeof(float), num, fp) == FALSE) {
00087     jlog("Error: ss: failed to read \"%s\"\n", filename);
00088     return(NULL);
00089   }
00090   /* close file */
00091   fclose_readfile(fp);
00092 
00093   *slen = num;
00094   jlog("Stat: ss: done\n");
00095   return(sbuf);
00096 }
00097 
00110 float *
00111 new_SS_calculate(SP16 *wave, int wavelen, int *slen, MFCCWork *w, Value *para)
00112 {
00113   float *spec;
00114   int t, framenum, start, end, k, i;
00115   double x, y;
00116   
00117   /* allocate work area */
00118   spec = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float));
00119   for(i=0;i<w->fb.fftN;i++) spec[i] = 0.0;
00120   
00121   /* Caluculate sum of noise power spectrum */
00122   framenum = (int)((wavelen - para->framesize) / para->frameshift) + 1;
00123   if (framenum < 1) {
00124     jlog("Error: too short to get noise spectrum: length < 1 frame\n");
00125     jlog("Error: no SS will be performed\n");
00126     *slen = w->fb.fftN;
00127     return spec;
00128   }
00129     
00130   start = 1;
00131   end = 0;
00132   for (t = 0; t < framenum; t++) {
00133     if (end != 0) start = end - (para->framesize - para->frameshift) - 1;
00134     k = 1;
00135     for (i = start; i <= start + para->framesize; i++) {
00136       w->bf[k] = (float)wave[i-1];
00137       k++;
00138     }
00139     end = i;
00140 
00141     if (para->zmeanframe) {
00142       ZMeanFrame(w->bf, para->framesize);
00143     }
00144 
00145     /* Pre-emphasis */
00146     PreEmphasise(w->bf, para->framesize, para->preEmph);
00147     /* Hamming Window */
00148     Hamming(w->bf, para->framesize, w);
00149     /* FFT Spectrum */
00150     for (i = 1; i <= para->framesize; i++) {
00151       w->fb.Re[i-1] = w->bf[i]; w->fb.Im[i-1] = 0.0;
00152     }
00153     for (i = para->framesize + 1; i <= w->fb.fftN; i++) {
00154       w->fb.Re[i-1] = 0.0;   w->fb.Im[i-1] = 0.0;
00155     }
00156     FFT(w->fb.Re, w->fb.Im, w->fb.n, w);
00157     /* Sum noise spectrum */
00158     for(i = 1; i <= w->fb.fftN; i++){
00159       x = w->fb.Re[i - 1];  y = w->fb.Im[i - 1];
00160       spec[i - 1] += sqrt(x * x + y * y);
00161     }
00162   }
00163 
00164   /* Calculate average noise spectrum */
00165   for(t=0;t<w->fb.fftN;t++) {
00166     spec[t] /= (float)framenum;
00167   }
00168 
00169   /* return the new spec[] */
00170   *slen = w->fb.fftN;
00171   return(spec);
00172 }