214 enum representation_t {sparse, dense, backoff};
219 enum entry_t {frequencies, log_frequencies};
228 double p_number_of_sentences;
235 representation_t p_representation;
236 entry_t p_entry_type;
241 bool init_sparse_representation();
244 bool init_dense_representation();
252 double backoff_threshold;
255 double backoff_unigram_floor_freq;
263 const double get_backoff_discount(
const int order,
const double freq)
const;
265 bool init_backoff_representation();
267 void backoff_restore_unigram_states();
273 const EST_StrVector &make_ngram_from_index(
const int i)
const;
288 {
return words(p_order-1); }
290 {
return words(p_order-1); }
294 bool sparse_to_dense();
295 bool dense_to_sparse();
300 void freqs_to_probs();
327 bool p_init(
int o, representation_t r);
331 bool oov_preprocess(
const EST_String &filename,
346 const bool trace=
false)
const;
349 double *prob = NULL)
const;
363 void *
params,
const int level);
384 default_values(); init(
o,r,v);
388 void default_values();
390 bool init(
int o, representation_t r,
392 bool init(
int o, representation_t r,
396 bool init(
int o, representation_t r,
400 int num_states(
void)
const {
return p_num_states;}
401 double samples(
void)
const {
return p_num_samples;}
402 int order()
const {
return p_order; }
403 int get_vocab_length()
const {
return vocab?vocab->
length():0; }
405 int get_vocab_word(
const EST_String &s)
const;
406 int get_pred_vocab_length()
const {
return pred_vocab->
length(); }
407 EST_String get_pred_vocab_word(
int i)
const {
return pred_vocab->
name(i); }
408 int get_pred_vocab_word(
const EST_String &s)
const
409 {
return pred_vocab->
name(s); }
410 int closed_vocab()
const {
return !allow_oov; }
411 entry_t entry_type()
const {
return p_entry_type;}
412 representation_t representation()
const
413 {
return p_representation;}
417 const EST_String &prev = SENTENCE_START_MARKER,
427 const double count=1);
430 const double count=1);
434 void make_htk_compatible();
437 EST_read_status load(
const EST_String &filename);
439 EST_write_status save(
const EST_String &filename,
441 const bool trace=
false,
445 const EST_String &wordlist_index(
int i)
const;
447 const EST_String &predlist_index(
int i)
const;
450 bool set_entry_type(entry_t
new_type);
457 const bool trace=
false)
const;
459 const bool trace=
false)
const;
462 double *prob,
int *state)
const;
464 {
double p;
int state;
return predict(
words,&p,&state); }
466 {
int state;
return predict(
words,prob,&state); }
470 {
double p;
int state;
return predict(
words,&p,&state); }
472 {
int state;
return predict(
words,prob,&state); }
476 int find_next_state_id(
int state,
int word)
const;
485 bool force=
false)
const;
487 bool force=
false)
const;
523 friend EST_read_status load_ngram_htk_ascii(
const EST_String filename,
525 friend EST_read_status load_ngram_htk_binary(
const EST_String filename,
527 friend EST_read_status load_ngram_arpa(
const EST_String filename,
530 friend EST_read_status load_ngram_cstr_ascii(
const EST_String filename,
532 friend EST_read_status load_ngram_cstr_bin(
const EST_String filename,
535 friend EST_write_status save_ngram_htk_ascii_sub(
const EST_String &
word,
539 friend EST_write_status save_ngram_htk_ascii(
const EST_String filename,
545 friend EST_write_status save_ngram_cstr_ascii(
const EST_String filename,
549 friend EST_write_status save_ngram_cstr_bin(
const EST_String filename,
553 friend EST_write_status save_ngram_arpa(
const EST_String filename,
555 friend EST_write_status save_ngram_arpa_sub(
ostream *
ost,
558 friend EST_write_status save_ngram_wfst(
const EST_String filename,
577 bool compute_backoff_weights(
const int mincount=1,
583friend class EST_BackoffNgrammar;