Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
pda_main.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Paul Taylor */
34/* Date : May 1994 */
35/*-----------------------------------------------------------------------*/
36/* Pitch Detection Algorithm Main routine */
37/* */
38/*=======================================================================*/
39#include <fstream>
40#include "EST.h"
41#include "sigpr/EST_sigpr_utt.h"
42#include "EST_cmd_line_options.h"
43
44void set_parameters(EST_Features &a_list, EST_Option &al);
45
46void option_override(EST_Features &op, EST_Option al,
47 const EST_String &option, const EST_String &arg);
48
49static int save_pm(EST_String filename, EST_Track fz);
50
51/** @name <command>pda</command> <emphasis>Pitch Detection Algorithm</emphasis>
52 @id pda-manual
53 * @toc
54 */
55
56//@{
57
58/**@name Synopsis
59 */
60//@{
61
62//@synopsis
63
64/**
65pda is a pitch detection algorithm that produces a fundamental frequency
66contour from a speech waveform file. At present only the
67super resolution pitch determination algorithm is implemented.
68See (Medan, Yair, and Chazan, 1991) and (Bagshaw et al., 1993) for a detailed
69description of the algorithm.
70</para><para>
71
72The default values given below were found to optimise the performance
73of the pitch determination algorithm for speech data sampled at 20kHz
74using a 16\-bit waveform and low pass filter with a 600Hz cut-off
75frequency and more than \-85dB rejection above 700Hz. The best
76performances occur if the [\-p] flag is passed. </para><para>
77*/
78
79//@}
80
81/**@name Options
82 */
83//@{
84
85//@options
86
87//@}
88
89
90int main (int argc, char *argv[])
91{
95 EST_Features op;
96 EST_String out_file("-");
98
99 parse_command_line
100 (argc, argv,
101 EST_String("[input file] -o [output file] [options]\n")+
102 "Summary: pitch track waveform files\n"
103 "use \"-\" to make input and output files stdin/out\n"
104 "-h Options help\n\n"+
105 options_wave_input()+
106 options_pda_general()+
107 options_pda_srpd()+
108 options_track_output(),
109 files, al);
110
111 default_pda_options(op);
112 set_parameters(op, al);
113
114 if (read_wave(sig, files.first(), al) != format_ok)
115 exit(-1);
116
117 out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
118
119 pda(sig, fz, op); // do f0 tracking
120
121 if (al.present("-pm"))
122 save_pm(out_file, fz);
123 else
124 fz.save(out_file, op.S("f0_file_type", "0"));
125
126 if (al.present("-diff"))
127 {
128 fz = differentiate(fz);
129 fz.save(out_file + ".diff", op.S("f0_file_type", "0"));
130 }
131 return 0;
132}
133
134
135void set_parameters(EST_Features &op, EST_Option &al)
136{
137 op.set("srpd_resize", 1);
138
139 // general options
140 option_override(op, al, "pda_frame_shift", "-shift");
141 option_override(op, al, "pda_frame_length", "-length");
142 option_override(op, al, "max_pitch", "-fmax");
143 option_override(op, al, "min_pitch", "-fmin");
144
145 // low pass filtering options.
146 option_override(op, al, "lpf_cutoff", "-u");
147 option_override(op, al, "lpf_order", "-forder");
148
149 option_override(op, al, "decimation", "-d");
150 option_override(op, al, "noise_floor", "-n");
151 option_override(op, al, "min_v2uv_coef_thresh", "-m");
152 option_override(op, al, "v2uv_coef_thresh_ratio", "-R");
153 option_override(op, al, "v2uv_coef_thresh", "-H");
154 option_override(op, al, "anti_doubling_thresh", "-t");
155 option_override(op, al, "peak_tracking", "-P");
156
157 option_override(op, al, "f0_file_type", "-otype");
158 option_override(op, al, "wave_file_type", "-itype");
159
160 if (al.val("-L", 0) == "true")
161 op.set("do_low_pass", "true");
162 if (al.val("-R", 0) == "true")
163 op.set("do_low_pass", "false");
164
165
166/* op.set("lpf_cutoff",al.val("-u", 0));
167 op.set("lpf_order",al.val("-forder", 0));
168
169 //sprd options
170 op.set("decimation", al.val("-d", 0));
171 op.set("noise_floor", al.val("-n", 0));
172 op.set("min_v2uv_coef_thresh", al.val("-m", 0));
173 op.set("v2uv_coef_thresh_ratio", al.val("-r", 0));
174 op.set("v2uv_coef_thresh", al.val("-H", 0));
175 op.set("anti_doubling_thresh", al.val("-t", 0));
176 op.set("peak_tracking", al.val("-P", 0));
177 if (al.val("-L", 0) == "true")
178 op.set("do_low_pass", "true");
179 if (al.val("-R", 0) == "true")
180 op.set("do_low_pass", "false");
181 op.set("f0_file_type", al.val("-otype", 0));
182 op.set("wave_file_type", al.val("-itype", 0));
183*/
184}
185
186/* a_list.override_val("sample_rate", al.val("-f", 0));
187 a_list.override_val("min_pitch", al.val("-fmin", 0));
188 a_list.override_val("max_pitch", al.val("-fmax", 0));
189 a_list.override_val("pda_frame_shift", al.val("-s", 0));
190 a_list.override_val("pda_frame_length",al.val("-l", 0));
191
192 // low pass filtering options.
193 a_list.override_val("lpf_cutoff",al.val("-u", 0));
194 a_list.override_val("lpf_order",al.val("-forder", 0));
195
196 //sprd options
197 a_list.override_val("decimation", al.val("-d", 0));
198 a_list.override_val("noise_floor", al.val("-n", 0));
199 a_list.override_val("min_v2uv_coef_thresh", al.val("-m", 0));
200 a_list.override_val("v2uv_coef_thresh_ratio", al.val("-r", 0));
201 a_list.override_val("v2uv_coef_thresh", al.val("-H", 0));
202 a_list.override_val("anti_doubling_thresh", al.val("-t", 0));
203 a_list.override_val("peak_tracking", al.val("-P", 0));
204 if (al.val("-L", 0) == "true")
205 a_list.override_val("do_low_pass", "true");
206 if (al.val("-R", 0) == "true")
207 a_list.override_val("do_low_pass", "false");
208 a_list.override_val("f0_file_type", al.val("-otype", 0));
209 a_list.override_val("wave_file_type", al.val("-itype", 0));
210*/
211
212
213static int save_pm(EST_String filename, EST_Track fz)
214{
215 ostream *outf;
216 float position, period;
217
218 if (filename == "-")
219 outf = &cout;
220 else
221 outf = new ofstream(filename);
222
223 if (!(*outf))
224 {
225 cerr << "save_pm: can't write to file \"" << filename << "\"" << endl;
226 return -1;
227 }
228
229 *outf << "XAO1\n\n"; // xmg header identifier.
230 *outf << "LineType bars \n";
231 *outf << "LineStyle solid \n";
232 *outf << "LineWidth 0 \n";
233 *outf << "Freq 16\n";
234 *outf << "Format Binary \n";
235 *outf << char(12) << "\n"; // control L character
236
237 position = 0.0;
238 int gap = 0;
239 for (int i = 0; i < fz.num_frames(); ++i)
240 {
241 if (fz.val(i))
242 {
243 if (gap)
244 {
245 position = fz.t(i);
246 gap = 0;
247 }
248 period = 1.0 / fz.a(i);
249 *outf << (position + period) * 1000.0 << endl;
250 position += period;
251 }
252 else
253 gap = 1;
254 }
255
256 if (outf != &cout)
257 delete outf;
258
259 return 0;
260}
261
262/**@name Examples
263
264Pitch detection on typical male voice, using low pass filtering:
265<screen>
266$ pda kdt_010.wav -o kdt_010.f0 -fmin 80 -fmax 200 -L
267</screen>
268*/
269//@{
270
271//@}
272//@}
void set(const EST_String &name, int ival)
const EST_String S(const EST_String &path) const