Skip to content

Commit b68409a

Browse files
committed
fix(itn): 货币单位精确到角和分
1 parent 24328b8 commit b68409a

4 files changed

Lines changed: 11 additions & 4 deletions

File tree

itn/chinese/rules/money.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,17 @@ def build_tagger(self):
3636
Cardinal().number_exclude_0_to_9
3737
# 七八美元 => $7~8
3838
number |= digit + insert("~") + digit
39+
# 三千三百八十元五毛八分 => ¥3380.58
3940
tagger = (insert('value: "') + number + insert('"') +
40-
insert(' currency: "') + (code | symbol) + insert('"'))
41+
insert(' currency: "') + (code | symbol) + insert('"') +
42+
insert(' decimal: "') + (
43+
insert(".") + digit + (delete("毛") | delete("角")) + (digit + delete("分")).ques
44+
).ques + insert('"'))
4145
self.tagger = self.add_tokens(tagger)
4246

4347
def build_verbalizer(self):
4448
currency = delete('currency: "') + self.SIGMA + delete('"')
4549
value = delete(' value: "') + self.SIGMA + delete('"')
46-
verbalizer = currency + value
50+
decimal = delete(' decimal: "') + self.SIGMA + delete('"')
51+
verbalizer = currency + value + decimal
4752
self.verbalizer = self.delete_tokens(verbalizer)

itn/chinese/test/data/money.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@
77
四十五六新台币 => TWD45-6
88
七百三四欧元 => €730-40
99
七百三四十马来西亚令吉 => RM730-40
10+
三千三百八十元五角八分 => ¥3380.58
11+
二十五元三毛 => ¥25.3

runtime/processor/wetext_token_parser.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ const std::unordered_map<std::string, std::vector<std::string>> ITN_ORDERS = {
3636
{"date", {"year", "month", "day"}},
3737
{"fraction", {"sign", "numerator", "denominator"}},
3838
{"measure", {"numerator", "denominator", "value"}},
39-
{"money", {"currency", "value"}},
39+
{"money", {"currency", "value", "decimal"}},
4040
{"time", {"hour", "minute", "second", "noon"}}};
4141

4242
TokenParser::TokenParser(ParseType type) {

tn/token_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
'date': ['year', 'month', 'day'],
2626
'fraction': ['sign', 'numerator', 'denominator'],
2727
'measure': ['numerator', 'denominator', 'value'],
28-
'money': ['currency', 'value'],
28+
'money': ['currency', 'value', 'decimal'],
2929
'time': ['hour', 'minute', 'second', 'noon']}
3030

3131

0 commit comments

Comments
 (0)