Julius 4.2
libsent/src/dfa/rddfa.c
説明を見る。
00001 
00018 /*
00019  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/dfa.h>
00027 
00028 static char buf[MAXLINELEN];    
00029 
00035 void
00036 dfa_state_init(DFA_INFO *dinfo)
00037 {
00038   int i;
00039   dinfo->maxstatenum = DFA_STATESTEP;
00040   dinfo->st = (DFA_STATE *)mymalloc(sizeof(DFA_STATE) * dinfo->maxstatenum);
00041   for (i=0;i<dinfo->maxstatenum;i++) {
00042     dinfo->st[i].number = i;
00043     dinfo->st[i].status = 0;
00044     dinfo->st[i].arc = NULL;
00045   }
00046   dinfo->state_num = dinfo->arc_num = dinfo->term_num = 0;
00047   dinfo->sp_id = WORD_INVALID;
00048 }
00049 
00056 void
00057 dfa_state_expand(DFA_INFO *dinfo, int needed)
00058 {
00059   int oldnum, i;
00060   oldnum = dinfo->maxstatenum;
00061   dinfo->maxstatenum += DFA_STATESTEP;
00062   if (dinfo->maxstatenum < needed) dinfo->maxstatenum = needed;
00063   dinfo->st = (DFA_STATE *)myrealloc(dinfo->st, sizeof(DFA_STATE) * dinfo->maxstatenum);
00064   for (i=oldnum;i<dinfo->maxstatenum;i++) {
00065     dinfo->st[i].number = i;
00066     dinfo->st[i].status = 0;
00067     dinfo->st[i].arc = NULL;
00068   }
00069 }
00070 
00079 boolean
00080 rddfa(FILE *fp, DFA_INFO *dinfo)
00081 {
00082   int state_max, arc_num, terminal_max;
00083 
00084   /* initialize */
00085   dfa_state_init(dinfo);
00086   state_max = 0;
00087   arc_num = 0;
00088   terminal_max = 0;
00089 
00090   while (getl(buf, MAXLINELEN, fp) != NULL) {
00091     if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) {
00092       break;
00093     }
00094   }
00095   dinfo->state_num = state_max + 1;
00096   dinfo->arc_num = arc_num;
00097   dinfo->term_num = terminal_max + 1;
00098   return(TRUE);
00099 }
00100 
00109 boolean
00110 rddfa_fp(FILE *fp, DFA_INFO *dinfo)
00111 {
00112   int state_max, arc_num, terminal_max;
00113 
00114   /* initialize */
00115   dfa_state_init(dinfo);
00116   state_max = 0;
00117   arc_num = 0;
00118   terminal_max = 0;
00119 
00120   while(getl_fp(buf, MAXLINELEN, fp) != NULL) {
00121     if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) {
00122       break;
00123     }
00124   }
00125   dinfo->state_num = state_max + 1;
00126   dinfo->arc_num = arc_num;
00127   dinfo->term_num = terminal_max + 1;
00128   return(TRUE);
00129 }
00130 
00142 boolean
00143 rddfa_line(char *line, DFA_INFO *dinfo, int *state_max, int *arc_num, int *terminal_max)
00144 {
00145   DFA_ARC *newarc;
00146   int state, terminal, next_state;
00147   unsigned int status;
00148   char *p;
00149 
00150   if (strmatch(buf, "DFAEND")) return(FALSE);
00151   /* format: state terminalID nextstate statuscode_of_state */
00152   if ((p = strtok(line, DELM)) == NULL) {
00153     jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00154     return FALSE;
00155   }
00156   state = atoi(p);
00157   if ((p = strtok(NULL, DELM)) == NULL) {
00158     jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00159     return FALSE;
00160   }
00161   terminal = atoi(p);
00162   if ((p = strtok(NULL, DELM)) == NULL) {
00163     jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00164     return FALSE;
00165   }
00166   next_state = atoi(p);
00167   if ((p = strtok(NULL, DELM)) == NULL) {
00168     jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00169     return FALSE;
00170   }
00171   sscanf(p, "%x", &status);
00172 
00173   if (state >= dinfo->maxstatenum) {      /* expand */
00174     dfa_state_expand(dinfo, state+1);
00175   }
00176   if (next_state >= dinfo->maxstatenum) { /* expand */
00177     dfa_state_expand(dinfo, next_state+1);
00178   }
00179 
00180   /* set state status (accept / initial) */
00181   if (status & ACCEPT_S) {
00182     dinfo->st[state].status |= ACCEPT_S;
00183   }
00184   /* the state #0 is an initial state */
00185   if (state == 0) {
00186     dinfo->st[state].status |= INITIAL_S;
00187   }
00188   
00189   /* skip line with negative terminalID/nextstate */
00190   if (terminal > 0 || next_state > 0) {
00191     /* add new arc to the state */
00192     newarc = (DFA_ARC *)mymalloc(sizeof(DFA_ARC));
00193     newarc->label    = terminal;
00194     newarc->to_state = next_state;
00195     newarc->next     = dinfo->st[state].arc;
00196     dinfo->st[state].arc = newarc;
00197     (*arc_num)++;
00198   }
00199 
00200   if (*state_max < state) *state_max = state;
00201   if (*terminal_max < terminal) *terminal_max = terminal;
00202 
00203   return(TRUE);
00204 }
00205 
00206 /* append dfa info to other */
00207 /* soffset: state offset  coffset: category(terminal) offset */
00208 
00217 void
00218 dfa_append(DFA_INFO *dst, DFA_INFO *src, int soffset, int coffset)
00219 {
00220   DFA_ARC *arc, *newarc;
00221   int s, state, terminal, next_state;
00222   unsigned int status;
00223 
00224   for (s = 0; s < src->state_num; s++) {
00225     state = s + soffset;
00226     status = src->st[s].status;
00227     if (state >= dst->maxstatenum) {      /* expand */
00228       dfa_state_expand(dst, state+1);
00229     }
00230     /* set state status (accept / initial) */
00231     if (status & ACCEPT_S) {
00232       dst->st[state].status |= ACCEPT_S;
00233     }
00234     /* the state #0 is an initial state */
00235     if (s == 0) {
00236       dst->st[state].status |= INITIAL_S;
00237     }
00238     for (arc = src->st[s].arc; arc; arc = arc->next) {
00239       terminal = arc->label + coffset;
00240       next_state = arc->to_state + soffset;
00241 
00242       if (next_state >= dst->maxstatenum) { /* expand */
00243         dfa_state_expand(dst, next_state+1);
00244       }
00245       /* add new arc to the state */
00246       newarc = (DFA_ARC *)mymalloc(sizeof(DFA_ARC));
00247       newarc->label    = terminal;
00248       newarc->to_state = next_state;
00249       newarc->next     = dst->st[state].arc;
00250       dst->st[state].arc = newarc;
00251 
00252       dst->arc_num++;
00253       if (dst->term_num < terminal + 1) dst->term_num = terminal + 1;
00254     }
00255     if (dst->state_num < state + 1) dst->state_num = state + 1;
00256   }
00257 }