Julius 4.2
|
00001 00024 /* 00025 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00026 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00027 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00028 * All rights reserved 00029 */ 00030 00031 #include <sent/stddefs.h> 00032 #include <sent/dfa.h> 00033 00046 static int 00047 cp_find(int *list, int len, int key, int *loc) 00048 { 00049 int left, right, mid; 00050 int ret; 00051 00052 if (len == 0) { 00053 *loc = 0; 00054 return -1; 00055 } 00056 00057 left = 0; 00058 right = len - 1; 00059 while (left < right) { 00060 mid = (left + right) / 2; 00061 if (list[mid] < key) { 00062 left = mid + 1; 00063 } else { 00064 right = mid; 00065 } 00066 } 00067 if (list[left] == key) { 00068 *loc = left; 00069 ret = left; 00070 } else { 00071 ret = -1; 00072 if (list[left] > key) { 00073 *loc = left; 00074 } else { 00075 *loc = left + 1; 00076 } 00077 } 00078 return ret; 00079 } 00080 00090 boolean 00091 dfa_cp(DFA_INFO *dfa, int i, int j) 00092 { 00093 int loc; 00094 00095 /*return(dfa->cp[i][j]);*/ 00096 //return((dfa->cp[i][j>>3] & cp_table[j&7]) ? TRUE : FALSE); 00097 return(cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) != -1 ? TRUE : FALSE); 00098 } 00099 00108 boolean 00109 dfa_cp_begin(DFA_INFO *dfa, int i) 00110 { 00111 int loc; 00112 /*return(dfa->cp_begin[i]);*/ 00113 //return((dfa->cp_begin[i>>3] & cp_table[i&7]) ? TRUE : FALSE); 00114 return(cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) != -1 ? TRUE : FALSE); 00115 } 00116 00125 boolean 00126 dfa_cp_end(DFA_INFO *dfa, int i) 00127 { 00128 int loc; 00129 /*return(dfa->cp_end[i]);*/ 00130 //return((dfa->cp_end[i>>3] & cp_table[i&7]) ? TRUE : FALSE); 00131 return(cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) != -1 ? TRUE : FALSE); 00132 } 00133 00146 static boolean 00147 cp_add(int **list, int *len, int *alloclen, int val, int loc) 00148 { 00149 if (loc > *len) { 00150 jlog("InternalError: skip?\n"); 00151 return FALSE; 00152 } 00153 if (*len + 1 > *alloclen) { 00154 /* expand cplist if necessary */ 00155 *alloclen *= 2; 00156 *list = (int *)myrealloc(*list, sizeof(int) * (*alloclen)); 00157 } 00158 if (loc < *len) { 00159 memmove(&((*list)[loc+1]), &((*list)[loc]), sizeof(int) * (*len - loc)); 00160 } 00161 (*list)[loc] = val; 00162 (*len)++; 00163 return TRUE; 00164 } 00165 static boolean 00176 cp_remove(int **list, int *len, int loc) 00177 { 00178 if (*len == 0) return TRUE; 00179 if (loc >= *len) { 00180 jlog("InternalError: skip?\n"); 00181 return FALSE; 00182 } 00183 if (loc < *len - 1) { 00184 memmove(&((*list)[loc]), &((*list)[loc+1]), sizeof(int) * (*len - loc - 1)); 00185 } 00186 (*len) --; 00187 return TRUE; 00188 } 00189 00190 00199 void 00200 set_dfa_cp(DFA_INFO *dfa, int i, int j, boolean value) 00201 { 00202 int loc; 00203 if (value) { 00204 /* add j to cp list of i */ 00205 if (cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) == -1) { /* not exist */ 00206 cp_add(&(dfa->cp[i]), &(dfa->cplen[i]), &(dfa->cpalloclen[i]), j, loc); 00207 } 00208 } else { 00209 /* remove j from cp list of i */ 00210 if (cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) != -1) { /* already exist */ 00211 cp_remove(&(dfa->cp[i]), &(dfa->cplen[i]), loc); 00212 } 00213 } 00214 } 00215 00224 void 00225 set_dfa_cp_begin(DFA_INFO *dfa, int i, boolean value) 00226 { 00227 int loc; 00228 00229 if (value) { 00230 /* add j to cp list of i */ 00231 if (cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) == -1) { /* not exist */ 00232 cp_add(&(dfa->cp_begin), &(dfa->cp_begin_len), &(dfa->cp_begin_alloclen), i, loc); 00233 } 00234 } else { 00235 /* remove j from cp list of i */ 00236 if (cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) != -1) { /* already exist */ 00237 cp_remove(&(dfa->cp_begin), &(dfa->cp_begin_len), loc); 00238 } 00239 } 00240 } 00241 00250 void 00251 set_dfa_cp_end(DFA_INFO *dfa, int i, boolean value) 00252 { 00253 int loc; 00254 00255 if (value) { 00256 /* add j to cp list of i */ 00257 if (cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) == -1) { /* not exist */ 00258 cp_add(&(dfa->cp_end), &(dfa->cp_end_len), &(dfa->cp_end_alloclen), i, loc); 00259 } 00260 } else { 00261 /* remove j from cp list of i */ 00262 if (cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) != -1) { /* already exist */ 00263 cp_remove(&(dfa->cp_end), &(dfa->cp_end_len), loc); 00264 } 00265 } 00266 } 00267 00273 void 00274 init_dfa_cp(DFA_INFO *dfa) 00275 { 00276 dfa->cp = NULL; 00277 } 00278 00286 void 00287 malloc_dfa_cp(DFA_INFO *dfa, int term_num, int size) 00288 { 00289 int i; 00290 00291 dfa->cp = (int **)mymalloc(sizeof(int *) * term_num); 00292 dfa->cplen = (int *)mymalloc(sizeof(int) * term_num); 00293 dfa->cpalloclen = (int *)mymalloc(sizeof(int) * term_num); 00294 for(i=0;i<term_num;i++) { 00295 dfa->cp[i] = (int *)mymalloc(sizeof(int) * size); 00296 dfa->cpalloclen[i] = size; 00297 dfa->cplen[i] = 0; 00298 } 00299 dfa->cp_begin = (int *)mymalloc(sizeof(int) * size); 00300 dfa->cp_begin_alloclen = size; 00301 dfa->cp_begin_len = 0; 00302 dfa->cp_end = (int *)mymalloc(sizeof(int) * size); 00303 dfa->cp_end_alloclen = size; 00304 dfa->cp_end_len = 0; 00305 } 00306 00318 boolean 00319 dfa_cp_append(DFA_INFO *dfa, DFA_INFO *src, int offset) 00320 { 00321 int i, j, size; 00322 00323 if (dfa->cp == NULL) { 00324 /* no category pair information exist on target */ 00325 if (dfa->term_num != src->term_num) { 00326 jlog("InternalError: dfa_cp_append\n"); 00327 return FALSE; 00328 } 00329 /* just malloc and copy */ 00330 dfa->cp = (int **)mymalloc(sizeof(int *) * dfa->term_num); 00331 dfa->cplen = (int *)mymalloc(sizeof(int) * dfa->term_num); 00332 dfa->cpalloclen = (int *)mymalloc(sizeof(int) * dfa->term_num); 00333 for(i=0;i<dfa->term_num;i++) { 00334 size = src->cplen[i]; 00335 if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP; 00336 dfa->cp[i] = (int *)mymalloc(sizeof(int) * size); 00337 dfa->cpalloclen[i] = size; 00338 memcpy(dfa->cp[i], src->cp[i], sizeof(int) * src->cplen[i]); 00339 dfa->cplen[i] = src->cplen[i]; 00340 } 00341 size = src->cp_begin_len; 00342 if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP; 00343 dfa->cp_begin = (int *)mymalloc(sizeof(int) * size); 00344 dfa->cp_begin_alloclen = size; 00345 memcpy(dfa->cp_begin, src->cp_begin, sizeof(int) * src->cp_begin_len); 00346 dfa->cp_begin_len = src->cp_begin_len; 00347 size = src->cp_end_len; 00348 if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP; 00349 dfa->cp_end = (int *)mymalloc(sizeof(int) * size); 00350 dfa->cp_end_alloclen = size; 00351 memcpy(dfa->cp_end, src->cp_end, sizeof(int) * src->cp_end_len); 00352 dfa->cp_end_len = src->cp_end_len; 00353 return TRUE; 00354 } 00355 /* expand index */ 00356 dfa->cp = (int **)myrealloc(dfa->cp, sizeof(int *) * dfa->term_num); 00357 dfa->cplen = (int *)myrealloc(dfa->cplen, sizeof(int) * dfa->term_num); 00358 dfa->cpalloclen = (int *)myrealloc(dfa->cpalloclen, sizeof(int) * dfa->term_num); 00359 /* copy src->cp[i][j] to target->cp[i+offset][j+offset] */ 00360 for(i=offset;i<dfa->term_num;i++) { 00361 size = src->cplen[i-offset]; 00362 if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP; 00363 dfa->cp[i] = (int *)mymalloc(sizeof(int) * size); 00364 dfa->cpalloclen[i] = size; 00365 for(j=0;j<src->cplen[i-offset];j++) { 00366 dfa->cp[i][j] = src->cp[i-offset][j] + offset; 00367 } 00368 dfa->cplen[i] = src->cplen[i-offset]; 00369 } 00370 if (dfa->cp_begin_alloclen < dfa->cp_begin_len + src->cp_begin_len) { 00371 dfa->cp_begin_alloclen = dfa->cp_begin_len + src->cp_begin_len; 00372 dfa->cp_begin = (int *)myrealloc(dfa->cp_begin, sizeof(int) * dfa->cp_begin_alloclen); 00373 } 00374 for(j=0;j<src->cp_begin_len;j++) { 00375 dfa->cp_begin[dfa->cp_begin_len + j] = src->cp_begin[j] + offset; 00376 } 00377 dfa->cp_begin_len += src->cp_begin_len; 00378 if (dfa->cp_end_alloclen < dfa->cp_end_len + src->cp_end_len) { 00379 dfa->cp_end_alloclen = dfa->cp_end_len + src->cp_end_len; 00380 dfa->cp_end = (int *)myrealloc(dfa->cp_end, sizeof(int) * dfa->cp_end_alloclen); 00381 } 00382 for(j=0;j<src->cp_end_len;j++) { 00383 dfa->cp_end[dfa->cp_end_len + j] = src->cp_end[j] + offset; 00384 } 00385 dfa->cp_end_len += src->cp_end_len; 00386 00387 return TRUE; 00388 } 00389 00395 void 00396 free_dfa_cp(DFA_INFO *dfa) 00397 { 00398 int i; 00399 00400 if (dfa->cp != NULL) { 00401 free(dfa->cp_end); 00402 free(dfa->cp_begin); 00403 for(i=0;i<dfa->term_num;i++) free(dfa->cp[i]); 00404 free(dfa->cpalloclen); 00405 free(dfa->cplen); 00406 free(dfa->cp); 00407 dfa->cp = NULL; 00408 } 00409 } 00410 00411 void 00412 dfa_cp_output_rawdata(FILE *fp, DFA_INFO *dfa) 00413 { 00414 int i, j; 00415 00416 for(i=0;i<dfa->term_num;i++) { 00417 fprintf(fp, "%d:", i); 00418 for(j=0;j<dfa->cplen[i];j++) { 00419 fprintf(fp, " %d", dfa->cp[i][j]); 00420 } 00421 fprintf(fp, "\n"); 00422 } 00423 fprintf(fp, "bgn:"); 00424 for(j=0;j<dfa->cp_begin_len;j++) { 00425 fprintf(fp, " %d", dfa->cp_begin[j]); 00426 } 00427 fprintf(fp, "\n"); 00428 fprintf(fp, "end:"); 00429 for(j=0;j<dfa->cp_end_len;j++) { 00430 fprintf(fp, " %d", dfa->cp_end[j]); 00431 } 00432 fprintf(fp, "\n"); 00433 } 00434 00435 void 00436 dfa_cp_count_size(DFA_INFO *dfa, unsigned long *size_ret, unsigned long *allocsize_ret) 00437 { 00438 int i; 00439 unsigned long size, allocsize; 00440 00441 size = 0; 00442 allocsize = 0; 00443 for(i=0;i<dfa->term_num;i++) { 00444 size += sizeof(int) * dfa->cplen[i]; 00445 allocsize += sizeof(int) * dfa->cpalloclen[i]; 00446 } 00447 size += sizeof(int) * dfa->cp_begin_len; 00448 allocsize += sizeof(int) * dfa->cp_begin_alloclen; 00449 size += sizeof(int) * dfa->cp_end_len; 00450 allocsize += sizeof(int) * dfa->cp_end_alloclen; 00451 00452 allocsize += (sizeof(int *) + sizeof(int) + sizeof(int)) * dfa->term_num; 00453 00454 *size_ret = size; 00455 *allocsize_ret = allocsize; 00456 }