Skip to content

Commit 2b5d0fa

Browse files
committed
修复十一到十二个百分点的bug
1 parent cf29fe5 commit 2b5d0fa

2 files changed

Lines changed: 22 additions & 3 deletions

File tree

itn/chinese/rules/measure.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,34 @@ def build_tagger(self):
5656
+ insert("%")
5757
)
5858

59+
# 定义"到百分点"的转换
60+
to_percent_point = cross("到", "~")
61+
5962
# 二十二个百分点, 零点六个百分点, 负二十二个百分点
60-
percent_point = (
63+
# 十一到十二个百分点 => 11~12%
64+
# 注意:需要确保范围匹配时,第二个数字不会误匹配后续的"个"+"百分"+"点"
65+
percent_point_single = (
6166
(sign + delete("的").ques).ques
6267
+ Cardinal().number
63-
+ delete("个").ques
68+
+ delete("个")
6469
+ delete("百分")
6570
+ (delete("点") | delete("比"))
6671
+ insert("%")
6772
)
73+
74+
percent_point_range = (
75+
(sign + delete("的").ques).ques
76+
+ Cardinal().number
77+
+ to_percent_point
78+
+ Cardinal().number
79+
+ delete("个")
80+
+ delete("百分")
81+
+ (delete("点") | delete("比"))
82+
+ insert("%")
83+
)
84+
85+
percent_point = percent_point_range | percent_point_single
86+
6887

6988
# 十千米每小时 => 10km/h, 十一到一百千米每小时 => 11~100km/h
7089
# measure = number + (to + number).ques + units
@@ -106,7 +125,7 @@ def build_tagger(self):
106125
-0.5,
107126
)
108127

109-
tagger = insert('value: "') + (measure | measure_sp | percent | percent_point) + insert('"')
128+
tagger = insert('value: "') + (add_weight(percent_point_range, -0.5) | add_weight(percent_point_single, -0.3) | measure | measure_sp | percent) + insert('"')
110129
# 每小时十千米 => 10km/h, 每小时三十到三百一十一千米 => 30~311km/h
111130
tagger |= insert('denominator: "') + delete("每") + units + insert('" numerator: "') + measure + insert('"')
112131

itn/zh_itn_tagger.fst

744 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)