Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
pitchmark.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author: Paul Taylor */
34/* Date : December 1997 */
35/*-----------------------------------------------------------------------*/
36/* Pitchmark Laryngograph Signals */
37/* */
38/*=======================================================================*/
39
40/* Note - this is based on a pitchmarker developed by Mike Macon and
41written in matlab.
42*/
43
44#include "stdlib.h"
45#include "sigpr/EST_filter.h"
46#include "sigpr/EST_pitchmark.h"
47#include "ling_class/EST_Relation.h"
48#include "EST_math.h"
49#include "EST_inline_utils.h"
50#include "EST_wave_aux.h"
51#include "EST_track_aux.h"
52
53
54void delta(EST_Wave &tr, EST_Wave &d, int regression_length);
55
56EST_Track pitchmark(EST_Wave &lx, int lx_lf, int lx_lo, int lx_hf,
57 int lx_ho, int df_lf, int df_lo, int mo, int debug)
58{
61
62 pm.set_equal_space(false);
63 // pre-filtering
64
65 if (debug)
66 cout << "pitchmark 1\n";
67
68 FIRlowpass_double_filter(lx, lx_lf, lx_lo);
69 FIRhighpass_double_filter(lx, lx_hf, lx_ho);
70
71 if (debug)
72 cout << "pitchmark 2\n";
73
74 if (debug)
75 lx.save("tmpfilt.lx");
76
77// cout << "df " << df_lf << " df_o " << df_lo << endl;
78
79// lxdiff = lx;
80// differentiate(lxdiff);
81 lxdiff.resize(lx.num_samples());
82 lxdiff.set_sample_rate(lx.sample_rate());
83 delta(lx, lxdiff, 4);
84
85 if (debug)
86 lxdiff.save("tmpdiff.lx");
87
88 // it was found that median smoothing worked better here.
89
90 if (df_lo > 0)
91 FIRlowpass_double_filter(lxdiff, df_lf, df_lo);
92
93 if (mo > 0)
94 simple_mean_smooth(lxdiff, mo);
95
96 if (debug)
97 lxdiff.save("tmpfiltdiff.lx");
98
99 neg_zero_cross_pick(lxdiff, pm);
100
101 return pm;
102}
103
104EST_Track pitchmark(EST_Wave &lx, EST_Features &op)
105{
108 int lx_lf, lx_lo, lx_hf, lx_ho, df_lf, df_lo, mo, debug;
109
110 lx_lf = op.present("lx_low_frequency") ?
111 op.I("lx_low_frequency") : 400;
112 lx_lo = op.present("lx_low_order") ?
113 op.I("lx_low_order") : 19;
114
115 lx_hf = op.present("lx_high_frequency") ?
116 op.I("lx_high_frequency") : 40;
117 lx_ho = op.present("lx_high_order") ?
118 op.I("lx_high_order") : 19;
119
120 df_lf = op.present("df_low_frequency") ?
121 op.I("df_low_frequency") : 1000;
122 df_lo = op.present("df_low_order") ?
123 op.I("df_low_order") : 0;
124
125 mo = op.present("median_order") ?
126 op.I("median_order") : 19;
127
128 debug = op.present("pm_debug") ? 1 : 0;
129
130 return pitchmark(lx, lx_lf, lx_lo, lx_hf, lx_ho, df_lf, df_lo,
131 mo, debug);
132}
133
134/** Iterate through track and eliminate any frame whose distance to a
135preceding frames is less than min seconds*/
136
137void pm_min_check(EST_Track &pm, float min)
138{
139 int i, j;
140
141 for (i = j = 0; i < pm.num_frames() - 1; ++i, ++j)
142 {
143 pm.t(j) = pm.t(i);
144 while ((i < (pm.num_frames() - 1)) && ((pm.t(i + 1) - pm.t(i)) < min))
145 ++i;
146 }
147 if (i < pm.num_frames())
148 pm.t(j) = pm.t(i);
149 pm.resize(j, pm.num_channels());
150}
151
152
153void pm_fill(EST_Track &pm, float new_end, float max, float min, float def)
154{
156
157 if (new_end < 0)
158 new_end = pm.end();
159
160// if (debug)
161 // cout<< "new end:" << new_end << endl;
162 // largest possible set of new pitchmarks
163
164// cout << "num frames:" << pm.num_frames() << endl;
165// cout << "num frames:" << pm.end() << endl;
166// cout << "num frames:" << min << endl;
167 new_pm.resize(int(new_end / min));
168// cout << "num frames:" << pm.end()/min << endl;
169// cout << "num frames:" << new_pm.n() << endl;
170
171 int i, j, npm=0;
172 float last = 0.0;
173
174 int dropped=0, added=0;
175
176 for(j = 0; j < pm.num_frames(); j++)
177 {
178 float current = pm.t(j);
179
180 if (current > new_end)
181 break;
182
183 if (current - last < min)
184 {
185 // drop current pitchmark
186 dropped++;
187 }
188
189 else if (current-last > max)
190 {
191 // interpolate
192 int num = ifloor((current - last)/ def);
193 float size = (current-last) / num;
194 for (i = 1; i <= num; i++)
195 {
196 new_pm[npm] = last + i * size;
197 npm++;
198 added++;
199 }
200 }
201 else
202 {
203 new_pm[npm] = pm.t(j);
204 npm++;
205 }
206 last=current;
207 }
208
209 if (new_end - last > max)
210 {
211 // interpolate
212 int num = ifloor((new_end - last)/ def);
213 float size = (new_end -last) / num;
214 for (i = 1; i <= num; i++)
215 {
216 new_pm[npm] = last + i * size;
217 npm++;
218 added++;
219 }
220 }
221
222// if (debug)
223// if (dropped>0 || added >0)
224// cout << "Dropped " << dropped<< " and added " << added << " PMs\n";
225
226// if (debug)
227 pm.resize(npm, pm.num_channels());
228 for (i = 0; i < npm; i++)
229 pm.t(i) = new_pm(i);
230}
231
232void neg_zero_cross_pick(EST_Wave &lx, EST_Track &pm)
233{
234 int i, j;
235 pm.resize(lx.num_samples(), EST_CURRENT);
236
237 for (i = 1, j = 0; i < lx.num_samples(); ++i)
238 if ((lx.a(i -1) > 0) && (lx.a(i) <= 0))
239 pm.t(j++) = lx.t(i);
240
241 pm.resize(j, EST_CURRENT);
242
243 for (i = 0; i < pm.num_frames(); ++i)
244 pm.set_value(i);
245}
246
247void pm_to_label(EST_Track &pm, EST_Relation &lab)
248{
249 EST_Item *seg;
250 lab.clear();
251
252 for (int i = 0; i < pm.num_frames(); ++i)
253 {
254 seg = lab.append();
255 seg->set("name","");
256 seg->set("end",pm.t(i));
257 }
258}
259
260void pm_to_f0(EST_Track &pm, EST_Track &f0)
261{
262 float prev_pm = 0.0;
263 f0 = pm;
264 f0.resize(EST_ALL, 1);
265
266 for (int i = 0; i < f0.num_frames(); ++i)
267 {
268 f0.a(i, 0) = 1.0 / (f0.t(i) - prev_pm);
269 prev_pm = f0.t(i);
270 }
271}
272
273void pm_to_f0(EST_Track &pm, EST_Track &fz, float shift)
274{
275 int i;
276 float period;
277
278 fz.resize((int)(pm.end()/shift), 1);
279 fz.fill_time(shift);
280
281 for (i = 0; i < fz.num_frames() -1 ; ++i)
282 {
283 period = get_time_frame_size(pm, pm.index_below(fz.t(i)));
284 fz.a(i) = 1.0 /period;
285 }
286}
int present(const EST_String &name) const
const int I(const EST_String &path) const