00001
00023
00024
00025
00026
00027
00028
00029
00030 #include <sent/stddefs.h>
00031 #include <sent/vocabulary.h>
00032
00041 WORD_ID
00042 voca_lookup_wid(char *keyword, WORD_INFO *winfo)
00043 {
00044 WORD_ID i, found;
00045 int plen,totallen;
00046 boolean numflag = TRUE;
00047 int wid;
00048 char *c;
00049
00050 if (keyword == NULL) return WORD_INVALID;
00051
00052 if (keyword[0] == '#') {
00053
00054 for(i=1;i<strlen(keyword);i++) {
00055 if (keyword[i] < '0' || keyword[i] > '9') {
00056 numflag = FALSE;
00057 break;
00058 }
00059 }
00060 if (numflag) {
00061 wid = atoi(&(keyword[1]));
00062 if (wid < 0 || wid >= winfo->num) {
00063 return(WORD_INVALID);
00064 } else {
00065 return(wid);
00066 }
00067 } else {
00068 return(WORD_INVALID);
00069 }
00070 }
00071
00072 found = WORD_INVALID;
00073 totallen = strlen(keyword);
00074 if ((c = strchr(keyword, '[')) != NULL) {
00075 plen = c - keyword;
00076 for (i=0;i<winfo->num;i++) {
00077 if (strnmatch(keyword,winfo->wname[i], plen)
00078 && strnmatch(c+1, winfo->woutput[i], totallen-plen-2)) {
00079 if (found == WORD_INVALID) {
00080 found = i;
00081 } else {
00082 j_printerr("Warning: several \"%s\" found in dictionary, use the first one..\n");
00083 break;
00084 }
00085 }
00086 }
00087 } else {
00088 for (i=0;i<winfo->num;i++) {
00089 if (strmatch(keyword,winfo->wname[i])) {
00090 if (found == WORD_INVALID) {
00091 found = i;
00092 } else {
00093 j_printerr("Warning: several \"%s\" found in dictionary, use the first one..\n");
00094 break;
00095 }
00096 }
00097 }
00098 }
00099 return found;
00100 }
00101
00102
00103
00104 #define WSSTEP 10
00105
00106
00115 WORD_ID *
00116 new_str2wordseq(WORD_INFO *winfo, char *s, int *len_return)
00117 {
00118 char *p;
00119 int num;
00120 int maxnum;
00121 WORD_ID *wseq;
00122
00123 maxnum = WSSTEP;
00124 wseq = (WORD_ID *)mymalloc(sizeof(WORD_ID)*maxnum);
00125 num = 0;
00126 for (p = strtok(s, " "); p != NULL; p = strtok(NULL, " ")) {
00127 if (num >= maxnum) {
00128 maxnum += WSSTEP;
00129 wseq = (WORD_ID *)myrealloc(wseq, sizeof(WORD_ID) * maxnum);
00130 }
00131 if ((wseq[num] = voca_lookup_wid(p, winfo)) == WORD_INVALID) {
00132
00133 j_printerr("word \"%s\" not found in dict\n", p);
00134 free(wseq);
00135 return NULL;
00136 }
00137 num++;
00138 }
00139
00140 *len_return = num;
00141 return(wseq);
00142 }