46#include "EST_Pathname.h"
47#include "EST_cutils.h"
49#include "EST_FileType.h"
52#include "EST_TVector.h"
58#if defined(INSTANTIATE_TEMPLATES)
59#include "../base_class/EST_TList.cc"
63#include "../base_class/EST_TVector.cc"
70int EST_WFST::traverse_tag = 0;
72EST_WFST_State::EST_WFST_State(
int name)
83 p_name = state.p_name;
84 p_type = state.p_type;
86 for (p=state.transitions.head(); p != 0; p=p->next())
90EST_WFST_State::~EST_WFST_State()
94 for (p=transitions.head(); p != 0; p=p->next())
95 delete transitions(p);
120 for (
int i=0; i < p_num_states; ++i)
135 p_in_symbols = wfst.p_in_symbols;
136 p_out_symbols = wfst.p_out_symbols;
137 p_start_state = wfst.p_start_state;
138 current_tag = wfst.current_tag;
139 p_num_states = wfst.p_num_states;
140 p_states.
resize(p_num_states);
141 for (
int i=0; i < p_num_states; ++i)
152 for (i=0; i < p_states.
length(); i++)
163 in.append(
"__epsilon__");
166 if ((!streq(get_c_string(car(
iin)),
"__epsilon__")) &&
167 (!streq(get_c_string(car(
iin)),
"=")))
168 in.append(get_c_string(car(
iin)));
170 out.append(
"__epsilon__");
173 if ((!streq(get_c_string(car(
oout)),
"__epsilon__")) &&
174 (!streq(get_c_string(car(
oout)),
"=")))
175 out.append(get_c_string(car(
oout)));
190 cerr <<
"WFST transduce: \"" <<
in <<
"\" not in alphabet" <<
endl;
191 return WFST_ERROR_STATE;
206 for (i=s->transitions.head(); i != 0; i=i->next())
208 if (
in == s->transitions(i)->in_symbol())
211 s->transitions(i)->set_weight(1+s->transitions(i)->weight());
212 out.append(s->transitions(i));
224 for (i=s->transitions.head(); i != 0; i=i->next())
226 if (
in == s->transitions(i)->in_symbol())
228 out = s->transitions(i)->out_symbol();
229 return s->transitions(i)->state();
233 return WFST_ERROR_STATE;
238 if (
inout.contains(
"/"))
252 cerr <<
"WFST: one of " <<
in <<
"/" <<
out <<
" not in alphabet"
254 return WFST_ERROR_STATE;
273 for (i=s->transitions.head(); i != 0; i=i->next())
275 if ((
in == s->transitions(i)->in_symbol()) &&
276 (
out == s->transitions(i)->out_symbol()))
279 s->transitions(i)->set_weight(1+s->transitions(i)->weight());
280 return s->transitions(i);
295 return WFST_ERROR_STATE;
299 prob = trans->weight();
300 return trans->state();
304EST_write_status EST_WFST::save_binary(
FILE *fd)
311 for (i=0; i<p_num_states; i++)
313 num_transitions = p_states[i]->num_transitions();
314 fwrite(&num_transitions,4,1,fd);
315 if (p_states[i]->type() == wfst_final)
317 else if (p_states[i]->type() == wfst_nonfinal)
318 type = WFST_NONFINAL;
319 else if (p_states[i]->type() == wfst_licence)
324 for (
j=p_states[i]->transitions.head();
j != 0;
j=
j->next())
326 in = p_states[i]->transitions(
j)->in_symbol();
327 out = p_states[i]->transitions(
j)->out_symbol();
329 weight = p_states[i]->transitions(
j)->weight();
359 else if ((
ofd =
fopen(filename,
"wb")) == NULL)
361 cerr <<
"WFST: cannot write to file \"" << filename <<
"\"" <<
endl;
362 return misc_write_error;
366 fprintf(
ofd,
"DataType %s\n",(
const char *)type);
369 p_in_symbols.print_to_string(TRUE)+
")",
373 p_out_symbols.print_to_string(TRUE)+
")",
376 fprintf(
ofd,
"ByteOrder %s\n", ((EST_NATIVE_BO == bo_big) ?
"10" :
"01"));
379 if (type ==
"binary")
383 for (i=0; i < p_num_states; i++)
402 for (
j=s->transitions.head();
j != 0;
j=
j->next())
407 fprintf(
ofd,
" (%s ",(
const char *)quote_string(
in,
"\"",
"\\",1));
411 fprintf(
ofd,
" %s ",(
const char *)quote_string(
out,
"\"",
"\\",1));
415 s->transitions(
j)->state(),
416 s->transitions(
j)->weight());
427static float get_float(
FILE *fd,
int swap)
431 if (
swap) swapfloat(&f);
435static int get_int(
FILE *fd,
int swap)
445EST_read_status EST_WFST::load_binary(
FILE *fd,
458 for (i=0; i < num_states; i++)
473 cerr <<
"WFST load: unknown state type \"" <<
475 r = read_format_error;
481 cerr <<
"WFST load: internal error: unexpected state misalignment"
483 r = read_format_error;
520 if ((fd=
fopen(filename,
"r")) == NULL)
522 cerr <<
"WFST load: unable to open \"" << filename
523 <<
"\" for reading" <<
endl;
527 ts.set_quotes(
'"',
'\\');
529 if (((r = read_est_header(
ts,
hinfo,
ascii, t)) != format_ok) ||
532 cerr <<
"WFST load: not a WFST file \"" << filename <<
"\"" <<
endl;
533 return misc_read_error;
539 read_from_string(get_c_string(read_from_string(
hinfo.val(
"in"))));
541 read_from_string(get_c_string(read_from_string(
hinfo.val(
"out"))));
547 int num_states =
hinfo.ival(
"NumStates");
552 if (!
hinfo.present(
"ByteOrder"))
554 else if (((
hinfo.val(
"ByteOrder") ==
"01") ? bo_little : bo_big)
559 r = load_binary(fd,
hinfo,num_states,
swap);
563 for (i=0; i < num_states; i++)
566 if (i != get_c_int(car(car(
sd))))
568 cerr <<
"WFST load: expected description of state " << i <<
569 " but found \"" << siod_sprint(
sd) <<
"\"" <<
endl;
570 r = read_format_error;
573 if (streq(
"final",get_c_string(car(cdr(car(
sd))))))
575 else if (streq(
"nonfinal",get_c_string(car(cdr(car(
sd))))))
577 else if (streq(
"licence",get_c_string(car(cdr(car(
sd))))))
581 cerr <<
"WFST load: unknown state type \"" <<
582 siod_sprint(car(cdr(car(
sd)))) <<
"\"" <<
endl;
583 r = read_format_error;
589 cerr <<
"WFST load: internal error: unexpected state misalignment"
591 r = read_format_error;
594 if (load_transitions_from_lisp(s,cdr(
sd)) != format_ok)
596 r = read_format_error;
607EST_read_status EST_WFST::load_transitions_from_lisp(
int s,
LISP trans)
611 for (t=trans; t != NIL; t=cdr(t))
613 float w = get_c_float(siod_nth(3,car(t)));
614 int end = get_c_int(siod_nth(2,car(t)));
615 int in = p_in_symbols.
name(get_c_string(siod_nth(0,car(t))));
616 int out = p_out_symbols.
name(get_c_string(siod_nth(1,car(t))));
618 if ((
in == -1) || (
out == -1))
620 cerr <<
"WFST load: unknown vocabulary in state transition"
622 cerr <<
"WFST load: " << siod_sprint(car(t)) <<
endl;
623 return read_format_error;
625 p_states[s]->add_transition(w,end,
in,
out);
635 for (i=0; i < p_num_states; i++)
636 tt += p_states(i)->transitions.
length();
638 return EST_String(
"WFST ")+itoString(p_num_states)+
" states "+
639 itoString(
tt)+
" transitions ";
643void EST_WFST::more_states(
int new_max)
648 for (i=p_num_states; i <
new_max; i++)
657 if (p_num_states >= p_states.
length())
660 more_states((
int)((
float)(p_states.
length()+1)*1.5));
663 p_states[p_num_states] = s;
677 for (i=0; i < p_num_states; i++)
680 for (
j=s->transitions.head();
j !=0;
j=
j->next())
681 s->transitions(
j)->set_weight(0);
692 for (i=0; i < p_num_states; i++)
695 for (t=0,
j=s->transitions.head();
j !=0;
j=
j->next())
696 t += s->transitions(
j)->weight();
698 for (
j=s->transitions.head();
j !=0;
j=
j->next())
699 s->transitions(
j)->set_weight(s->transitions(
j)->weight()/t);
bool init(const EST_StrList &vocab)
(re-)initialise
const EST_String & name(const int n) const
The name given the index.
void append(const T &item)
add item onto end of list
void resize(int n, int set=1)
INLINE int length() const
number of items in vector.
void start_cumulate()
Clear and start cumulation.
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
EST_WFST_Transition * find_transition(int state, int in, int out) const
Find (first) transition given in and out symbols.
void init(int init_num_states=10)
Clear with (estimation of number of states required)
void clear()
clear removing existing states if any
void copy(const EST_WFST &wfst)
Copy from existing wfst.
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
int cumulate() const
Cumulation condition.
const EST_WFST_State * state(int i) const
Return internal state information.
void stop_cumulate()
Stop cumulation and calculate probabilities on transitions.
int transduce(int state, int in, int &out) const
Transduce in to out from state.
EST_read_status load(const EST_String &filename)
?
int transition(int state, int in, int out) const
Find (first) new state given in and out symbols.