Julius 4.2
libsent/src/ngram/ngram_lookup.c
説明を見る。
00001 
00018 /*
00019  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/ngram2.h>
00027 #include <sent/ptree.h>
00028 
00034 void
00035 ngram_make_lookup_tree(NGRAM_INFO *ndata)
00036 {
00037   int i;
00038   int *windex;
00039   char **wnameindex;
00040   
00041   windex = (int *)mymalloc(sizeof(int)*ndata->max_word_num);
00042   for (i=0;i<ndata->max_word_num;i++) {
00043     windex[i] = i;
00044   }
00045   wnameindex = (char **)mymalloc(sizeof(char *)*ndata->max_word_num);
00046   for (i=0;i<ndata->max_word_num;i++) {
00047     wnameindex[i] = ndata->wname[i];
00048   }
00049 
00050   ndata->root = make_ptree(wnameindex, windex, ndata->max_word_num, 0, &(ndata->mroot));
00051 
00052   free(windex);
00053   free(wnameindex);
00054 }
00055 
00064 WORD_ID
00065 ngram_lookup_word(NGRAM_INFO *ndata, char *wordstr)
00066 {
00067   int data;
00068   data = ptree_search_data(wordstr, ndata->root);
00069   if (data == -1 || strcmp(wordstr, ndata->wname[data]) != 0) {
00070     return WORD_INVALID;
00071   } else {
00072     return(data);
00073   }
00074 }
00075 
00084 WORD_ID
00085 make_ngram_ref(NGRAM_INFO *ndata, char *wstr)
00086 {
00087   WORD_ID nw;
00088 
00089   nw = ngram_lookup_word(ndata, wstr);
00090   if (nw == WORD_INVALID) {     /* not found */
00091     if (ndata->isopen) {
00092       jlog("Warning: ngram_lookup: \"%s\" not exist in N-gram, treat as unknown\n", wstr);
00093       return(ndata->unk_id);
00094     } else {
00095       jlog("Error: ngram_lookup: \"%s\" not exist in N-gram\n", wstr);
00096       return WORD_INVALID;
00097     }
00098   } else {
00099     return(nw);
00100   }
00101 }