Skip to content

Commit 4fffc97

Browse files
authored
[itn/number] Fix 两千零十 (#91)
1 parent 6f5d3db commit 4fffc97

2 files changed

Lines changed: 8 additions & 3 deletions

File tree

itn/chinese/rules/cardinal.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def build_tagger(self):
5151
# 一千一 => 1100, 一千 => 1000
5252
thousand = ((hundred | teen | tens | digits) + delete('千') + (
5353
hundred
54-
| add_weight(zero + tens, 0.1)
54+
| add_weight(zero + (tens | teen), 0.1)
5555
| add_weight(addzero + zero + digit, 0.5)
5656
| add_weight(digit + addzero**2, 0.8)
5757
| add_weight(addzero**3, 1.0)))
@@ -60,7 +60,7 @@ def build_tagger(self):
6060
+ delete('万')
6161
+ (thousand
6262
| add_weight(zero + hundred, 0.1)
63-
| add_weight(addzero + zero + tens, 0.5)
63+
| add_weight(addzero + zero + (tens | teen), 0.5)
6464
| add_weight(addzero + addzero + zero + digit, 0.5)
6565
| add_weight(digit + addzero**3, 0.8)
6666
| add_weight(addzero**4, 1.0)))
@@ -85,7 +85,7 @@ def build_tagger(self):
8585
if self.enable_0_to_9:
8686
cardinal |= number
8787
else:
88-
number_two_plus = (digits + digits.plus) | teen | tens | hundred | thousand | ten_thousand
88+
number_two_plus = (digits + digits.plus) | teen | tens | hundred | thousand | ten_thousand # noqa
8989
cardinal |= number_two_plus
9090
tagger = insert('value: "') + cardinal + insert('"')
9191
self.tagger = self.add_tokens(tagger)

itn/chinese/test/data/number.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
一千零一十一 => 1011
1515
一千一百一十一 => 1111
1616
两千 => 2000
17+
两千零十 => 2010
18+
两千零一十 => 2010
19+
两千零十二 => 2012
20+
两千零一十二 => 2012
21+
两千零二十 => 2020
1722
一万 => 10000
1823
一万零一 => 10001
1924
一万零一十一 => 10011

0 commit comments

Comments
 (0)