Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
ch_track_main.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1994,1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Paul Taylor */
34/* Date : June 1994 */
35/*-----------------------------------------------------------------------*/
36/* EST_Track file manipulation program */
37/* */
38/*=======================================================================*/
39
40#include "EST.h"
41#include "EST_cmd_line_options.h"
42
43#define DEFAULT_TIME_SCALE 0.001
44
45int StrListtoIList(EST_StrList &s, EST_IList &il);
46void extract_channel(EST_Track &orig, EST_Track &nt, EST_IList &ch_list);
47
48EST_write_status save_snns_pat(const EST_String filename,
50
51EST_read_status read_TrackList(EST_TrackList &tlist, EST_StrList &files,
52 EST_Option &al);
53
54void extract(EST_Track &tr, EST_Option &al);
55/** @name <command>ch_track</command> <emphasis>Track file manipulation</emphasis>
56 * @id ch-track-manual
57 * @toc
58 */
59
60//@{
61
62
63/**@name Synopsis
64 */
65//@{
66
67//@synopsis
68
69/**
70ch_track is used to manipulate the format of a track
71file. Operations include:
72
73<itemizedlist>
74<listitem><para>file format conversion</para></listitem>
75<listitem><para>smoothing</para></listitem>
76<listitem><para>changing the frame spacing of a track (resampling)</para></listitem>
77<listitem><para>producing differentiated and delta tracks</para></listitem>
78<listitem><para>Using a threshold to convert a track file to a label file</para></listitem>
79
80<listitem><para>making multiple input files into a single multi-channel output file</para></listitem>
81<listitem><para>extracting a single channel from a multi-channel track</para></listitem>
82<listitem><para>extracting a time-delimited portion of the waveform</para></listitem>
83</itemizedlist>
84
85 */
86
87//@}
88
89/**@name Options
90 */
91//@{
92
93//@options
94
95//@}
96
97
98int main(int argc, char *argv[])
99{
100 EST_String in_file("-"), out_file("-");
106 EST_Litem *p;
107
108 parse_command_line(
109 argc, argv,
110 EST_String("[input file] -o [output file] [options]\n")+
111 "Summary: change/copy track files\n"
112 "use \"-\" to make input and output files stdin/out\n"
113 "-h Options help\n"+
114 options_track_input()+ "\n"+
115 options_track_output()+ "\n"
116 "-info Print information about file and header. \n"
117 " This option gives useful information such as file \n"
118 " length, file type, channel names. No output is produced\n\n"
119 "-track_names <string> \n"
120 " File containing new names for output channels\n\n"
121 "-diff Differentiate contour. This performs simple \n"
122 " numerical differentiation on the contour by \n"
123 " subtracting the amplitude of the current frame \n"
124 " from the amplitude of the next. Although quick, \n"
125 " this technique is crude and not recommende as the \n"
126 " estimation of the derivate is done on only one point\n\n"
127 "-delta <int> Make delta coefficients (better form of differentiate).\n"
128 " The argument to this option is the regression length of \n"
129 " of the delta calculation and can be between 2 and 4 \n\n"
130 "-sm <float> Length of smoothing window in seconds. Various types of \n"
131 " smoothing are available for tracks. This options specifies \n"
132 " length of the smooting window which effects the degree of \n"
133 " smoothing, i.e. a longer value means more smoothing \n\n"
134 "-smtype <string> Smooth type, median or mean\n"
135 "-style <string> Convert track to other form. Currently only one form \n"
136 " \"label\" is supported. This uses a specified cut off to \n"
137 " make a label file, with two labels, one for above the \n"
138 " cut off (-pos) and one for below (-neg)\n\n"
139 "-t <float> threshold for track to label conversion \n"
140 "-neg <string> Name of negative label in track to label conversion \n"
141 "-pos <string> Name of positive label in track to label conversion \n"
142 "-pc <string> Combine given tracks in parallel. If option \n"
143 " is longest, pad shorter tracks to longest, else if \n"
144 " first pad/cut to match first input track \n" +
145 options_track_filetypes_long(),
146 files, al);
147
148/*redundant options
149 "-time_channel <string>\n"+
150 " Which track in track file holds pitchmark times\n"+
151 "-time_scale <float> \n"+
152 " Scale of pitchmarks (default 0.001 = milliseconds)\n"+
153*/
154
155
156 override_lib_ops(settings, al);
157 out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
158
160
161// ts.open(files.first());
162// tr.load(ts);
163// cout << tr;
164
165 if (read_TrackList(trlist, files, al) != read_ok)
166 exit(0);
167
168 if (files.length() == 0)
169 {
170 cerr << argv[0] << ": no input files specified\n";
171 exit(-1);
172 }
173
174 if (al.present("-info"))
175 {
176 for (p = trlist.head(); p; p = p->next())
177 track_info(trlist(p));
178 exit(0);
179 }
180
181 if (al.present("-pc")) // parallelize them
182 ParallelTracks(tr, trlist, al.val("-pc"));
183
184 else if (al.val("-otype", 0) == "snns")
185 { // sometime this will generalise for multiple input files
187 inpat.append(trlist.nth(0));
188 outpat.append(trlist.nth(1));
189 save_snns_pat(out_file, inpat, outpat);
190 exit(0);
191 }
192 else // concatenate them
193 {
194 tr.resize(0, tr.num_channels());
195 // Reorg -- fix += to resize to largest num_channels (with warning)
196 for (p = trlist.head(); p; p = p->next())
197 tr += trlist(p);
198 }
199
200 if (al.present("-S"))
201 tr.sample(al.fval("-S"));
202 if (al.present("-sm"))
203 {
204 track_smooth(tr, al.fval("-sm"),al.val("-smtype"));
205 }
206
207 if (al.present("-diff") && al.present("-delta"))
208 {
209 cerr << "Using -diff and -delta together makes no sense !\n";
210 exit(-1);
211 }
212 if (al.present("-diff"))
213 {
214 tr = differentiate(tr);
215 }
216 if (al.present("-delta"))
217 {
218 EST_Track ntr = tr; // to copy size !;
219 delta(tr,ntr,al.ival("-delta"));
220 tr = ntr;
221 }
222
223 if (al.present("-c"))
224 {
225 EST_StrList s;
228 StringtoStrList(al.val("-c"), s, " ,"); // separator can be space or comma
229 StrListtoIList(s, il);
230 extract_channel(tr, ntr, il);
231 tr = ntr;
232 }
233
234 if (al.present("-start") || al.present("-end")
235 || al.present("-to") || al.present("-from"))
236 extract(tr, al);
237
238// tr.assign_map(&LPCTrackMap);
239// tr.set_space_type("VARI");
240
241
242 // optionally rename output tracks before saving
243
244 if (al.present("-track_names"))
245 {
247 if(load_StrList(al.val("-track_names"),new_names) != format_ok)
248 {
249 cerr << "Failed to load new track names file." << endl;
250 exit(-1);
251 }
252 /*
253 if (tr.num_channels() != new_names.length())
254 {
255 cerr << "Number of names in output track names file (";
256 cerr << new_names.length() << ") " << endl;
257 cerr << " does not match number of output channels (";
258 cerr << tr.num_channels() << ")" << endl;
259 exit(-1);
260 }
261
262 EST_Litem *np;
263 int ni;
264 for (np = new_names.head(),ni=0; np; np = np->next(),ni++)
265 tr.set_channel_name(new_names(np),ni);
266 */
267 tr.resize(EST_CURRENT, new_names);
268 }
269
270 // track_info(tr);
271
272/* tr.resize(EST_CURRENT, 10);
273
274 cout << "new\n";
275 track_info(tr);
276
277 EST_StrList x;
278 x.append("a");
279 x.append("c");
280 x.append("d");
281
282
283
284 cout << "new\n";
285 track_info(tr);
286*/
287
288
289 // Write out file in appropriate format
290
291 if (al.val("-style",0) == "label")
292 {
294 if (al.present("-t"))
295 track_to_label(tr, lab, al.fval("-t"));
296 else
297 track_to_label(tr, lab);
298 if (al.present("-pos"))
299 change_label(lab, "pos", al.val("-pos"));
300 if (al.present("-neg"))
301 change_label(lab, "neg", al.val("-neg"));
302 if (lab.save(out_file) != write_ok)
303 exit(-1);
304 }
305/* else if (al.val("-style",0) == "pm")
306 {
307 EST_Relation lab;
308
309 if (!al.present("-f"))
310 {
311 cerr << "must specify sample rate (with -f) for pm style\n";
312 exit(-1);
313 }
314 int sample_rate = al.ival("-f", 0);
315
316 track_to_pm(tr, sample_rate, lab);
317
318 if (lab.save(out_file) != write_ok)
319 exit(-1);
320 }
321*/
322 else
323 {
324 if (tr.save(out_file, al.val("-otype")) != write_ok)
325 exit(-1);
326 }
327
328 return 0;
329}
330
331void override_lib_ops(EST_Option &a_list, EST_Option &al)
332{
333 a_list.override_val("ishift", al.val("-s", 0));
334 a_list.override_val("color", al.val("-color", 0));
335 a_list.override_val("in_track_file_type", al.val("-itype", 0));
336 a_list.override_val("out_track_file_type", al.val("-otype", 0));
337 a_list.override_val("tr_to_label_thresh", al.val("-t", 0));
338 a_list.override_fval("time_scale", DEFAULT_TIME_SCALE);
339
340 if (al.val("-style", 0) == "label")
341 a_list.override_val("lab_file_type", al.val("-otype", 0));
342 if (al.present("-time_scale"))
343 a_list.override_fval("time_scale", al.fval("-time_scale", 1));
344 if (al.present("-time_channel"))
345 a_list.override_val("time_channel", al.sval("-time_channel", 1));
346}
347
348
349/** @name Making multiple tracks into a single track
350
351If multiple input files are specified, by default they are concatenated into
352the output file.
353<para>
354<screen>
355$ ch_track kdt_010.tr kdt_011.tr kdt_012.tr kdt_013.tr -o out.tr
356</screen>
357</para>
358<para>
359In the above example, 4 multi channel input files are converted to
360one single channel output file. Multi-channel tracks can
361concatenated provided they all have the same number of input channels.
362
363</para><para>
364
365Multiple input files can be made into a multi-channel output file by
366using the -pc option:
367
368</para><para>
369<screen>
370$ ch_track kdt_010.tr kdt_011.tr kdt_012.tr kdt_013.tr -o -pc longest out.tr
371</screen>
372</para>
373<para>
374The argument to -pc can either be longest, in which the output
375track is the length of the longest input file, or first in which it
376is the length of the first input file.
377
378*/
379
380//@{
381//@}
382
383/** @name Extracting channels from multi-channel tracks
384
385The -c option is used to specify channels which should be extracted
386from the input. If the input is a 4 channel track,
387</para><para>
388<screen>
389$ ch_track kdt_m.tr -o a.tr -c "0 2"
390</screen>
391</para>
392<para>
393will extract the 0th and 2nd channel (counting starts from 0). The
394argument to -c can be either a single number of a list of numbers
395(wrapped in quotes).
396
397 */
398//@{
399//@}
400
401
402/** @name Extracting of a single region from a track
403
404There are several ways of extracting a region of a track. The
405simplest way is by using the start, end, to and from commands to
406delimit a sub portion of the input track. For example
407</para><para>
408<screen>
409$ ch_track kdt_010.tr -o small.tr -start 1.45 -end 1.768
410</screen>
411</para>
412<para>
413extracts a subtrack starting at 1.45 seconds and extending to 1.768 seconds.
414alternatively,
415</para><para>
416<screen>
417$ ch_track kdt_010.tr -o small.tr -from 50 -to 100
418</screen>
419</para>
420<para>
421extracts a subtrack starting at 50 frames and extending to 100
422frames. Times and frames can be mixed in sub-track extraction. The
423output track will have the same number of channels as the input track.
424
425
426*/
427//@{
428//@}
429
430/** @name Adding headers and format conversion
431
432It is usually a good idea for all track files to have headers as this
433way different files can be handled safely. ch_track provides a means
434of adding headers to unheadered files. These files are assumed to
435be ascii floats with one channel per line.
436
437The following adds a header to an ascii file.
438</para>
439<para>
440<screen>
441$ ch_track kdt_010.atr -o kdt_010.h5.tr -otype est -s 0.01
442</screen>
443</para>
444<para>
445ch_track can change the frame shift of a fixed frame file, or convert
446a variable frame shift file into a fixed frame shift. At present this
447is done with a very crude resampling technique and hence the output
448file may suffer from anti-aliasing distortion.</para><para>
449
450
451Change to a frame spacing of 0.02 seconds:
452</para><para>
453<screen>
454$ ch_track kdt_010.tr -o kdt_010.tr2 -S 0.02
455</screen>
456*/
457 //@{
458 //@}
459
460//@}
461