Skip to content

Commit cf29fe5

Browse files
committed
添加‘个百分点’转换%
1 parent 0a8c747 commit cf29fe5

3 files changed

Lines changed: 13 additions & 3 deletions

File tree

itn/chinese/rules/cardinal.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,9 @@ def build_tagger(self):
172172
# 5. 添加"中文数字+英文字母"的规则,如"四a" -> "4a"
173173
# 匹配一个或多个英文字母(大小写)
174174
from pynini import union
175-
english_letters = union(*[accep(c) for c in "abcdABCD"])
175+
english_letters = union(*[accep(c) for c in "abcdqABCD"])
176176
# 数字+字母的组合,如"四a" -> "4a"
177-
number_with_letter = number + english_letters.plus
177+
number_with_letter = number.plus + english_letters.plus
178178
cardinal |= add_weight(number_with_letter, 0.05) # 使用较高优先级
179179

180180
# 6. 添加两个连续完整数字的范围规则(如"二十一二十二" -> "21-22")

itn/chinese/rules/measure.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,16 @@ def build_tagger(self):
5656
+ insert("%")
5757
)
5858

59+
# 二十二个百分点, 零点六个百分点, 负二十二个百分点
60+
percent_point = (
61+
(sign + delete("的").ques).ques
62+
+ Cardinal().number
63+
+ delete("个").ques
64+
+ delete("百分")
65+
+ (delete("点") | delete("比"))
66+
+ insert("%")
67+
)
68+
5969
# 十千米每小时 => 10km/h, 十一到一百千米每小时 => 11~100km/h
6070
# measure = number + (to + number).ques + units
6171
measure = number + (insert("、") + number).star + (to + number).ques + units
@@ -96,7 +106,7 @@ def build_tagger(self):
96106
-0.5,
97107
)
98108

99-
tagger = insert('value: "') + (measure | measure_sp | percent) + insert('"')
109+
tagger = insert('value: "') + (measure | measure_sp | percent | percent_point) + insert('"')
100110
# 每小时十千米 => 10km/h, 每小时三十到三百一十一千米 => 30~311km/h
101111
tagger |= insert('denominator: "') + delete("每") + units + insert('" numerator: "') + measure + insert('"')
102112

itn/zh_itn_tagger.fst

231 KB
Binary file not shown.

0 commit comments

Comments
 (0)