42#include "EST_String.h"
43#include "EST_Ngrammar.h"
46ExponentialFit(
EST_DVector &
N,
double &a,
double &b,
int first,
int last)
62 cerr <<
"ExponentialFit : first must be >= 0" <<
endl;
68 cerr <<
"ExponentialFit : last must be < N.n()-1 = " <<
N.n()-1 <<
endl;
82 for(
int r=first;r<=last;r++)
102smooth_ExponentialFit(
EST_DVector &
N,
int first,
int last)
106 if (!ExponentialFit(
N,a,b,first,last))
108 cerr <<
"smooth_ExponentialFit : ExponentialFit failed !" <<
endl;
112 for(
int r=first;r<=last;r++)
113 N[r] =
exp(a)*
pow((
double)r, b);
126 for (k=s->pdf_const().item_start();
127 !s->pdf_const().item_end(k);
128 k = s->pdf_const().item_next(k))
130 s->pdf_const().item_freq(k,name,freq);
132 (*ff)[(int)(freq+0.5)] += 1;
145 double *max = (
double*)
params;
146 for (k=s->pdf_const().item_start();
147 !s->pdf_const().item_end(k);
148 k = s->pdf_const().item_next(k))
150 s->pdf_const().item_freq(k,name,freq);
169 for (k=s->pdf_const().item_start();
170 !s->pdf_const().item_end(k);
171 k = s->pdf_const().item_next(k))
173 s->pdf_const().item_freq(k,name,freq);
176 double nfreq = (*map)((int)(freq+0.5));
191 double *min = (
double*)
params;
192 for (k=s->pdf_const().item_start();
193 !s->pdf_const().item_end(k);
194 k = s->pdf_const().item_next(k))
196 s->pdf_const().item_freq(k,name,freq);
216 switch(n.representation())
219 case EST_Ngrammar::sparse:
220 case EST_Ngrammar::dense:
222 size = n.num_states();
227 if (n.p_states[i].pdf().
samples() > max)
228 max = n.p_states[i].pdf().
samples();
230 ff.resize((
int)(max+1.5));
243 n.p_states[i].pdf().
item_freq(k,name,freq);
244 ff[(int)(freq+0.5)] += 1;
253 for (i=1;i<
ff.n();i++)
256 ff[0] =
pow(
float(n.get_vocab_length()),
float(n.order())) -
total;
262 case EST_Ngrammar::backoff:
266 n.backoff_traverse(n.backoff_representation,
267 &get_max_f,(
void*)(&max),
269 ff.resize((
int)(max+1.5));
276 for (i=0;i<
ff.n();i++)
279 n.backoff_traverse(n.backoff_representation,
280 &make_f_of_f,(
void*)(&
ff),
287 for (i=1;i<
ff.n();i++)
298 cerr <<
"unknown representation for EST_Ngrammar" <<
endl;
309 switch(n.representation())
312 case EST_Ngrammar::sparse:
313 case EST_Ngrammar::dense:
315 int size = n.p_num_states;
325 n.p_states[i].pdf().
item_freq(k,name,freq);
326 nfreq = map((
int)(freq+0.5));
335 case EST_Ngrammar::backoff:
341 n.backoff_traverse(n.backoff_representation,
342 &map_f_of_f,(
void*)(&map),
349 cerr <<
"unknown representation for EST_Ngrammar" <<
endl;
367 cerr <<
"adjusted_frequencies_BasicGoodTuring :";
376 if( (
N(r+1) == 0) || (
N(r) == 0) )
379 M[r] = (r + 1) *
N(r+1) /
N(r);
395 cerr <<
"smoothed_frequency_distribution_ExponentialFit :"
396 <<
" maxcount too big, reducing it to " <<
maxcount <<
endl;
401 if (!smooth_ExponentialFit(
N,1,
maxcount+1))
402 cerr <<
"smooth_ExponentialFit failed !" <<
endl;
417 if (
ngrammar.entry_type() != EST_Ngrammar::frequencies)
419 cerr <<
"EST_Ngram: cannot Good-Turing smooth ngram:" <<
420 " entries are not frequencies" <<
endl;
427 case EST_Ngrammar::sparse:
428 case EST_Ngrammar::dense:
435 smoothed_frequency_distribution_ExponentialFit(
freqs,
maxcount-1);
444 case EST_Ngrammar::backoff:
447 cerr <<
"Smoothing of backed of grammars is not available!" <<
endl;
528cerr <<
"unknown representation for EST_Ngrammar" <<
endl;
543 if(
ngrammar.representation() != EST_Ngrammar::backoff)
545 cerr <<
"Good_Turing_discount is not appropriate for non backoff grammar !"
581 if(max >
freqs.n() - 2)
592 for(i=0;i<=max+1;i++)
595 smoothed_frequency_distribution_ExponentialFit(
freqs,max);
597 for(i=0;i<=max+1;i++)
610 for(i=(
int)
ngrammar.backoff_threshold;i<=max;i++)
615 if(
ngrammar.backoff_discount[
o-1][i] < 0)
621 for(;i<
freqs.n();i++)
EST_Litem * item_next(EST_Litem *idx) const
Used for iterating through members of the distribution.
void item_freq(EST_Litem *idx, EST_String &s, double &freq) const
During iteration returns name and frequency given index
EST_Litem * item_start() const
Used for iterating through members of the distribution.
double samples(void) const
Total number of example found.
void override_frequency(const EST_String &s, double c)
Sets the frequency of named item, without modifying {\tt num_samples}.
void set_frequency(const EST_String &s, double c)
int item_end(EST_Litem *idx) const
Used for iterating through members of the distribution.
INLINE int n() const
number of items in vector.