Skip to content

Commit 71a7745

Browse files
authored
[tn] english tn, support fraction (#209)
1 parent c26d489 commit 71a7745

4 files changed

Lines changed: 180 additions & 2 deletions

File tree

tn/english/normalizer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from tn.english.rules.cardinal import Cardinal
1818
from tn.english.rules.ordinal import Ordinal
1919
from tn.english.rules.decimal import Decimal
20+
from tn.english.rules.fraction import Fraction
2021
from tn.english.rules.word import Word
2122
from tn.english.rules.date import Date
2223

@@ -36,19 +37,22 @@ def build_tagger(self):
3637
cardinal = add_weight(Cardinal().tagger, 1.0)
3738
ordinal = add_weight(Ordinal().tagger, 1.0)
3839
decimal = add_weight(Decimal().tagger, 1.0)
40+
fraction = add_weight(Fraction().tagger, 1.0)
3941
date = add_weight(Date().tagger, 0.99)
4042
word = add_weight(Word().tagger, 100)
4143
tagger = (cardinal | ordinal | word
42-
| date | decimal).optimize() + self.DELETE_SPACE
44+
| date | decimal | fraction).optimize() + self.DELETE_SPACE
4345
# delete the last space
4446
self.tagger = tagger.star @ self.build_rule(delete(' '), r='[EOS]')
4547

4648
def build_verbalizer(self):
4749
cardinal = Cardinal().verbalizer
4850
ordinal = Ordinal().verbalizer
4951
decimal = Decimal().verbalizer
52+
fraction = Fraction().verbalizer
5053
word = Word().verbalizer
5154
date = Date().verbalizer
5255
verbalizer = (cardinal | ordinal | word
53-
| date | decimal).optimize() + self.INSERT_SPACE
56+
| date | decimal
57+
| fraction).optimize() + self.INSERT_SPACE
5458
self.verbalizer = verbalizer.star

tn/english/rules/fraction.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2+
# Copyright (c) 2024, WENET COMMUNITY. Xingchen Song (sxc19@tsinghua.org.cn).
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import pynini
17+
from pynini.examples import plurals
18+
from pynini.lib import pynutil
19+
20+
from tn.processor import Processor
21+
from tn.utils import get_abs_path
22+
from tn.english.rules.cardinal import Cardinal
23+
from tn.english.rules.ordinal import Ordinal
24+
25+
26+
class Fraction(Processor):
27+
28+
def __init__(self, deterministic: bool = False):
29+
"""
30+
Args:
31+
deterministic: if True will provide a single transduction option,
32+
for False multiple transduction are generated (used for audio-based normalization)
33+
"""
34+
super().__init__('fraction', ordertype="en_tn")
35+
self.deterministic = deterministic
36+
self.build_tagger()
37+
self.build_verbalizer()
38+
39+
def build_tagger(self):
40+
"""
41+
Finite state transducer for classifying fraction
42+
"23 4/5" ->
43+
fraction { integer_part: "twenty three" numerator: "four" denominator: "five" }
44+
"23 4/5th" ->
45+
fraction { integer_part: "twenty three" numerator: "four" denominator: "five" }
46+
"""
47+
cardinal_graph = Cardinal(self.deterministic).graph
48+
integer = pynutil.insert(
49+
"integer_part: \"") + cardinal_graph + pynutil.insert("\"")
50+
numerator = (pynutil.insert("numerator: \"") + cardinal_graph +
51+
(pynini.cross("/", "\" ") | pynini.cross(" / ", "\" ")))
52+
53+
endings = ["rd", "th", "st", "nd"]
54+
endings += [x.upper() for x in endings]
55+
optional_end = pynini.closure(pynini.cross(pynini.union(*endings), ""),
56+
0, 1)
57+
58+
denominator = pynutil.insert(
59+
"denominator: \""
60+
) + cardinal_graph + optional_end + pynutil.insert("\"")
61+
62+
graph = pynini.closure(integer + pynini.accep(" "), 0,
63+
1) + (numerator + denominator)
64+
graph |= pynini.closure(
65+
integer +
66+
(pynini.accep(" ") | pynutil.insert(" ")), 0, 1) + pynini.compose(
67+
pynini.string_file(
68+
get_abs_path("english/data/number/fraction.tsv")),
69+
(numerator + denominator))
70+
71+
self.graph = graph
72+
final_graph = self.add_tokens(self.graph)
73+
self.tagger = final_graph.optimize()
74+
75+
def build_verbalizer(self):
76+
"""
77+
Finite state transducer for verbalizing fraction
78+
e.g. fraction { integer_part: "twenty three" numerator: "four" denominator: "five" } ->
79+
twenty three and four fifth
80+
"""
81+
suffix = Ordinal(self.deterministic).suffix
82+
83+
integer = pynutil.delete("integer_part: \"") + pynini.closure(
84+
self.NOT_QUOTE) + pynutil.delete("\" ")
85+
denominator_one = pynini.cross("denominator: \"one\"", "over one")
86+
denominator_half = pynini.cross("denominator: \"two\"", "half")
87+
denominator_quarter = pynini.cross("denominator: \"four\"", "quarter")
88+
89+
denominator_rest = (pynutil.delete("denominator: \"") +
90+
pynini.closure(self.NOT_QUOTE) @ suffix +
91+
pynutil.delete("\""))
92+
93+
denominators = plurals._priority_union(
94+
denominator_one,
95+
plurals._priority_union(
96+
denominator_half,
97+
plurals._priority_union(denominator_quarter, denominator_rest,
98+
pynini.closure(self.VCHAR)),
99+
pynini.closure(self.VCHAR),
100+
),
101+
pynini.closure(self.VCHAR),
102+
).optimize()
103+
if not self.deterministic:
104+
denominators |= pynutil.delete("denominator: \"") + (
105+
pynini.accep("four") @ suffix) + pynutil.delete("\"")
106+
107+
numerator_one = pynutil.delete("numerator: \"") + pynini.accep(
108+
"one") + pynutil.delete("\" ")
109+
numerator_one = numerator_one + self.INSERT_SPACE + denominators
110+
numerator_rest = (
111+
pynutil.delete("numerator: \"") +
112+
(pynini.closure(self.NOT_QUOTE) - pynini.accep("one")) +
113+
pynutil.delete("\" "))
114+
numerator_rest = numerator_rest + self.INSERT_SPACE + denominators
115+
numerator_rest @= pynini.cdrewrite(
116+
plurals._priority_union(pynini.cross("half", "halves"),
117+
pynutil.insert("s"),
118+
pynini.closure(self.VCHAR)),
119+
"",
120+
"[EOS]",
121+
pynini.closure(self.VCHAR),
122+
)
123+
124+
graph = numerator_one | numerator_rest
125+
126+
conjunction = pynutil.insert("and ")
127+
128+
integer = pynini.closure(integer + self.INSERT_SPACE + conjunction, 0,
129+
1)
130+
131+
graph = integer + graph
132+
graph @= pynini.cdrewrite(
133+
pynini.cross("and one half", "and a half")
134+
| pynini.cross("over ones", "over one"), "", "[EOS]",
135+
pynini.closure(self.VCHAR))
136+
137+
self.graph = graph
138+
delete_tokens = self.delete_tokens(self.graph)
139+
self.verbalizer = delete_tokens.optimize()

tn/english/test/data/fraction.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
23 4/5 => twenty three and four fifths
2+
23 4/5th => twenty three and four fifths
3+
1/3 => one third
4+
1/2 => one half
5+
1/4 => one quarter
6+
2/4 => two quarters
7+
23/44 => twenty three forty fourths

tn/english/test/fraction_test.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright (c) 2024 Xingchen Song (sxc19@tsinghua.org.cn)
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
17+
from tn.english.rules.fraction import Fraction
18+
from tn.english.test.utils import parse_test_case
19+
20+
21+
class TestFraction:
22+
23+
fraction = Fraction(deterministic=False)
24+
fraction_cases = parse_test_case('data/fraction.txt')
25+
26+
@pytest.mark.parametrize("written, spoken", fraction_cases)
27+
def test_fraction(self, written, spoken):
28+
assert self.fraction.normalize(written) == spoken

0 commit comments

Comments
 (0)