Julius 4.2
|
00001 00018 /* 00019 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00020 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00021 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00022 * All rights reserved 00023 */ 00024 00025 #include <sent/stddefs.h> 00026 #include <sent/ngram2.h> 00027 #include <sent/ptree.h> 00028 00034 void 00035 ngram_make_lookup_tree(NGRAM_INFO *ndata) 00036 { 00037 int i; 00038 int *windex; 00039 char **wnameindex; 00040 00041 windex = (int *)mymalloc(sizeof(int)*ndata->max_word_num); 00042 for (i=0;i<ndata->max_word_num;i++) { 00043 windex[i] = i; 00044 } 00045 wnameindex = (char **)mymalloc(sizeof(char *)*ndata->max_word_num); 00046 for (i=0;i<ndata->max_word_num;i++) { 00047 wnameindex[i] = ndata->wname[i]; 00048 } 00049 00050 ndata->root = make_ptree(wnameindex, windex, ndata->max_word_num, 0, &(ndata->mroot)); 00051 00052 free(windex); 00053 free(wnameindex); 00054 } 00055 00064 WORD_ID 00065 ngram_lookup_word(NGRAM_INFO *ndata, char *wordstr) 00066 { 00067 int data; 00068 data = ptree_search_data(wordstr, ndata->root); 00069 if (data == -1 || strcmp(wordstr, ndata->wname[data]) != 0) { 00070 return WORD_INVALID; 00071 } else { 00072 return(data); 00073 } 00074 } 00075 00084 WORD_ID 00085 make_ngram_ref(NGRAM_INFO *ndata, char *wstr) 00086 { 00087 WORD_ID nw; 00088 00089 nw = ngram_lookup_word(ndata, wstr); 00090 if (nw == WORD_INVALID) { /* not found */ 00091 if (ndata->isopen) { 00092 jlog("Warning: ngram_lookup: \"%s\" not exist in N-gram, treat as unknown\n", wstr); 00093 return(ndata->unk_id); 00094 } else { 00095 jlog("Error: ngram_lookup: \"%s\" not exist in N-gram\n", wstr); 00096 return WORD_INVALID; 00097 } 00098 } else { 00099 return(nw); 00100 } 00101 }