Julius 4.2
libsent/src/voca/voca_lookup.c
説明を見る。
00001 
00024 /*
00025  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00026  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00027  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00028  * All rights reserved
00029  */
00030 
00031 #include <sent/stddefs.h>
00032 #include <sent/vocabulary.h>
00033 
00042 WORD_ID
00043 voca_lookup_wid(char *keyword, WORD_INFO *winfo)
00044 {
00045   WORD_ID i, found;
00046   int plen,totallen;
00047   boolean numflag = TRUE;
00048   int wid;
00049   char *c;
00050 
00051   if (keyword == NULL) return WORD_INVALID;
00052   
00053   if (keyword[0] == '#') {
00054     
00055     for(i=1;i<strlen(keyword);i++) {
00056       if (keyword[i] < '0' || keyword[i] > '9') {
00057         numflag = FALSE;
00058         break;
00059       }
00060     }
00061     if (numflag) {
00062       wid = atoi(&(keyword[1]));
00063       if (wid < 0 || wid >= winfo->num) {
00064         return(WORD_INVALID);
00065       } else {
00066         return(wid);
00067       }
00068     } else {
00069       return(WORD_INVALID);
00070     }
00071   }
00072       
00073   found = WORD_INVALID;
00074   totallen = strlen(keyword);
00075   if ((c = strchr(keyword, '[')) != NULL) {
00076     plen = c - keyword;
00077     for (i=0;i<winfo->num;i++) {
00078       if (strnmatch(keyword,winfo->wname[i], plen)
00079           && strnmatch(c+1, winfo->woutput[i], totallen-plen-2)) {
00080         if (found == WORD_INVALID) {
00081           found = i;
00082         } else {
00083           jlog("Warning: voca_lookup: several \"%s\" found in dictionary, use the first one..\n");
00084           break;
00085         }
00086       }
00087     }
00088   } else {
00089     for (i=0;i<winfo->num;i++) {
00090       if (strmatch(keyword,winfo->wname[i])) {
00091         if (found == WORD_INVALID) {
00092           found = i;
00093         } else {
00094           jlog("Warning: voca_lookup: several \"%s\" found in dictionary, use the first one..\n");
00095           break;
00096         }
00097       }
00098     }
00099   }
00100   return found;
00101 }
00102 
00103 /* convert space-separated words string -> array of wid */
00104 /* return malloced array */
00105 #define WSSTEP 10 ///< Allocation step 
00106 
00116 WORD_ID *
00117 new_str2wordseq(WORD_INFO *winfo, char *s, int *len_return)
00118 {
00119   char *p;
00120   int num;
00121   int maxnum;
00122   WORD_ID *wseq;
00123 
00124   maxnum = WSSTEP;
00125   wseq = (WORD_ID *)mymalloc(sizeof(WORD_ID)*maxnum);
00126   num = 0;
00127   for (p = strtok(s, " "); p != NULL; p = strtok(NULL, " ")) {
00128     if (num >= maxnum) {
00129       maxnum += WSSTEP;
00130       wseq = (WORD_ID *)myrealloc(wseq, sizeof(WORD_ID) * maxnum);
00131     }
00132     if ((wseq[num] = voca_lookup_wid(p, winfo)) == WORD_INVALID) {
00133       /* not found */
00134       jlog("Error: voca_lookup: word \"%s\" not found in dict\n", p);
00135       free(wseq);
00136       return NULL;
00137     }
00138     num++;
00139   }
00140 
00141   *len_return = num;
00142   return(wseq);
00143 }