Skip to content

Commit d865aa2

Browse files
authored
fix(itn): 7/-12 -> -7/12 (#77)
1 parent 2213ed0 commit d865aa2

3 files changed

Lines changed: 15 additions & 7 deletions

File tree

itn/chinese/rules/fraction.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
from itn.chinese.rules.cardinal import Cardinal
1616
from tn.processor import Processor
1717

18-
from pynini.lib.pynutil import delete, insert
18+
from pynini import string_file
19+
from pynini.lib.pynutil import delete, insert, add_weight
1920

2021

2122
class Fraction(Processor):
@@ -27,14 +28,21 @@ def __init__(self):
2728

2829
def build_tagger(self):
2930
number = Cardinal().number
30-
31-
tagger = (insert('denominator: "') + number +
31+
sign = string_file('itn/chinese/data/number/sign.tsv') # + -
32+
33+
# NOTE(xcsong): default weight = 1.0, set to -1.0 means higher priority
34+
# For example,
35+
# 1.0, 负二分之三 -> { sign: "" denominator: "-2" numerator: "3" }
36+
# -1.0,负二分之三 -> { sign: "-" denominator: "2" numerator: "3" }
37+
tagger = (insert('sign: "') + add_weight(sign, -1.0).ques +
38+
insert('" denominator: "') + number +
3239
delete('分之') + insert('" numerator: "') +
3340
number + insert('"'))
3441
self.tagger = self.add_tokens(tagger)
3542

3643
def build_verbalizer(self):
37-
numerator = delete('numerator: "') + self.SIGMA + delete('"')
44+
sign = delete('sign: "') + self.SIGMA + delete('"')
45+
numerator = delete(' numerator: "') + self.SIGMA + delete('"')
3846
denominator = delete(' denominator: "') + self.SIGMA + delete('"')
39-
verbalizer = numerator + insert('/') + denominator
47+
verbalizer = sign + numerator + insert('/') + denominator
4048
self.verbalizer = self.delete_tokens(verbalizer)

runtime/processor/token_parser.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ const std::unordered_map<std::string, std::vector<std::string>> TN_ORDERS = {
3434
{"time", {"noon", "hour", "minute", "second"}}};
3535
const std::unordered_map<std::string, std::vector<std::string>> ITN_ORDERS = {
3636
{"date", {"year", "month", "day"}},
37-
{"fraction", {"numerator", "denominator"}},
37+
{"fraction", {"sign", "numerator", "denominator"}},
3838
{"measure", {"numerator", "denominator", "value"}},
3939
{"money", {"currency", "value"}},
4040
{"time", {"hour", "minute", "second", "noon"}}};

tn/token_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
'time': ['noon', 'hour', 'minute', 'second']}
2424
ITN_ORDERS = {
2525
'date': ['year', 'month', 'day'],
26-
'fraction': ['numerator', 'denominator'],
26+
'fraction': ['sign', 'numerator', 'denominator'],
2727
'measure': ['numerator', 'denominator', 'value'],
2828
'money': ['currency', 'value'],
2929
'time': ['hour', 'minute', 'second', 'noon']}

0 commit comments

Comments
 (0)