Edinburgh Speech Tools
2.4-release
Loading...
Searching...
No Matches
scfg_parse_main.cc
1
/*************************************************************************/
2
/* */
3
/* Centre for Speech Technology Research */
4
/* University of Edinburgh, UK */
5
/* Copyright (c) 1996,1997 */
6
/* All Rights Reserved. */
7
/* */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
20
/* permission. */
21
/* */
22
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30
/* THIS SOFTWARE. */
31
/* */
32
/*************************************************************************/
33
/* Author : Alan W Black */
34
/* Date : October 1997 */
35
/*-----------------------------------------------------------------------*/
36
/* Parse a list of sentences with a given stochastic context free */
37
/* grammar */
38
/* */
39
/*=======================================================================*/
40
#include <cstdlib>
41
#include <cstdio>
42
#include <iostream>
43
#include <fstream>
44
#include <cstring>
45
#include "EST.h"
46
#include "EST_SCFG.h"
47
#include "EST_SCFG_Chart.h"
48
#include "siod.h"
49
50
static
EST_String
outfile =
"-"
;
51
52
static
int
scfg_parse_main(
int
argc
,
char
**
argv
);
53
54
55
/** @name <command>scfg_parse</command> <emphasis>Parse text using a pre-trained stochastic context free grammar</emphasis>
56
@id scfg-parse-manual
57
* @toc
58
*/
59
60
//@{
61
62
63
/**@name Synopsis
64
*/
65
//@{
66
67
//@synopsis
68
69
/**
70
71
This parses given text with a given stochastic context free grammar.
72
Note this program is not designed as an arbitrary parser for
73
unrestricted English. It simply parses the input non-terminals
74
with the given grammar. If you want to English (or other language)
75
parses consider using the festival script <command>scfg_parse</command>
76
which does proper tokenization and part of speech tagging, before
77
passing it to a SCFG.
78
79
*/
80
81
//@}
82
83
/**@name OPTIONS
84
*/
85
//@{
86
87
//@options
88
89
//@}
90
91
int
main(
int
argc
,
char
**
argv
)
92
{
93
94
scfg_parse_main(
argc
,
argv
);
95
96
exit
(0);
97
return
0;
98
}
99
100
static
int
scfg_parse_main(
int
argc
,
char
**
argv
)
101
{
102
// Top level function generates a probabilistic grammar
103
EST_Option
al
;
104
EST_StrList
files
;
105
EST_SCFG_Chart
chart
;
106
LISP
rules,s,parse;
107
FILE
*corpus,*
output
;
108
int
i;
109
110
parse_command_line
111
(
argc
,
argv
,
112
EST_String
(
"[options]\n"
)+
113
"Summary: Parse a corpus with a stochastic context free grammar\n"
+
114
"-grammar <ifile> Grammar file, one rule per line.\n"
+
115
"-corpus <ifile> Corpus file, one bracketed sentence per line.\n"
+
116
"-brackets Output bracketing only.\n"
+
117
"-o <ofile> Output file for parsed sentences.\n"
,
118
files
,
al
);
119
120
if
(
al
.present(
"-o"
))
121
outfile =
al
.val(
"-o"
);
122
else
123
outfile =
"-"
;
124
125
siod_init();
126
127
if
(
al
.present(
"-grammar"
))
128
{
129
rules = vload(
al
.val(
"-grammar"
),1);
130
gc_protect(&rules);
131
}
132
else
133
{
134
cerr
<<
"scfg_parse: no grammar specified"
<<
endl
;
135
exit
(1);
136
}
137
138
if
(
al
.present(
"-corpus"
))
139
{
140
if
((corpus =
fopen
(
al
.val(
"-corpus"
),
"r"
)) == NULL)
141
{
142
cerr
<<
"scfg_parse: can't open corpus file \""
<<
143
al
.val(
"-corpus"
) <<
"\" for reading "
<<
endl
;
144
exit
(1);
145
}
146
}
147
else
148
{
149
cerr
<<
"scfg_parse: no corpus specified"
<<
endl
;
150
exit
(1);
151
}
152
153
if
(
al
.present(
"-o"
))
154
{
155
if
((
output
=
fopen
(
al
.val(
"-o"
),
"w"
)) == NULL)
156
{
157
cerr
<<
"scfg_parse: can't open output file \""
<<
158
al
.val(
"-o"
) <<
"\" for writing "
<<
endl
;
159
exit
(1);
160
}
161
}
162
else
163
output
=
stdout
;
164
165
gc_protect(&s);
166
gc_protect(&parse);
167
for
(i=0; ((s=lreadf(corpus)) != get_eof_val()); i++)
168
{
169
parse = scfg_parse(s,rules);
170
if
(
al
.present(
"-brackets"
))
171
{
172
LISP
bparse
= scfg_bracketing_only(parse);
173
if
(
bparse
== NIL)
174
bparse
= s;
175
pprint_to_fd(
output
,
bparse
);
176
}
177
else
178
pprint_to_fd(
output
,parse);
179
if
(i%100 == 99)
180
user_gc(NIL);
181
}
182
183
if
(
output
!=
stdout
)
184
fclose
(
output
);
185
gc_unprotect(&s);
186
gc_unprotect(&parse);
187
gc_unprotect(&rules);
188
189
return
0;
190
}
191
EST_Hash_Pair
Definition
EST_THash.h:75
EST_Option
Definition
EST_Option.h:50
EST_SCFG_Chart
Definition
EST_SCFG_Chart.h:100
EST_String
Definition
EST_String.h:70
EST_TList
Definition
EST_TList.h:109
main
scfg_parse_main.cc
Generated on Mon Apr 1 2024 04:48:28 for Edinburgh Speech Tools by
1.9.8