|
20 | 20 |
|
21 | 21 | class Cardinal(Processor): |
22 | 22 |
|
23 | | - def __init__(self, enable_standalone_number=True, enable_0_to_9=True): |
| 23 | + def __init__(self, enable_standalone_number=True, enable_0_to_9=True, |
| 24 | + enable_million=False): |
24 | 25 | super().__init__('cardinal') |
25 | 26 | self.number = None |
26 | 27 | self.number_exclude_0_to_9 = None |
27 | 28 | self.enable_standalone_number = enable_standalone_number |
28 | 29 | self.enable_0_to_9 = enable_0_to_9 |
| 30 | + self.enable_million = enable_million |
29 | 31 | self.build_tagger() |
30 | 32 | self.build_verbalizer() |
31 | 33 |
|
@@ -57,14 +59,26 @@ def build_tagger(self): |
57 | 59 | | add_weight(digit + addzero**2, 0.8) |
58 | 60 | | add_weight(addzero**3, 1.0))) |
59 | 61 | # 10001111, 1001111, 101111, 11111, 10111, 10011, 10001, 10000 |
60 | | - ten_thousand = ((thousand | hundred | teen | tens | digits) |
61 | | - + delete('万') |
62 | | - + (thousand |
63 | | - | add_weight(zero + hundred, 0.1) |
64 | | - | add_weight(addzero + zero + (tens | teen), 0.5) |
65 | | - | add_weight(addzero + addzero + zero + digit, 0.5) |
66 | | - | add_weight(digit + addzero**3, 0.8) |
67 | | - | add_weight(addzero**4, 1.0))) |
| 62 | + if self.enable_million: |
| 63 | + ten_thousand = ((thousand | hundred | teen | tens | digits) |
| 64 | + + delete('万') |
| 65 | + + (thousand |
| 66 | + | add_weight(zero + hundred, 0.1) |
| 67 | + | add_weight(addzero + zero + (tens | teen), 0.5) |
| 68 | + | add_weight(addzero + addzero + zero + digit, 0.5) |
| 69 | + | add_weight(digit + addzero**3, 0.8) |
| 70 | + | add_weight(addzero**4, 1.0))) |
| 71 | + else: |
| 72 | + ten_thousand = ((teen | tens | digits) |
| 73 | + + delete('万') |
| 74 | + + (thousand |
| 75 | + | add_weight(zero + hundred, 0.1) |
| 76 | + | add_weight(addzero + zero + (tens | teen), 0.5) |
| 77 | + | add_weight(addzero + addzero + zero + digit, 0.5) |
| 78 | + | add_weight(digit + addzero**3, 0.8) |
| 79 | + | add_weight(addzero**4, 1.0))) |
| 80 | + ten_thousand |= (thousand | hundred) + accep("万") + delete("零").ques + ( |
| 81 | + thousand | hundred | tens | teen | digits).ques |
68 | 82 | # 个/十/百/千/万 |
69 | 83 | number = digits | teen | tens | hundred | thousand | ten_thousand |
70 | 84 | # 兆/亿 |
@@ -94,8 +108,8 @@ def build_tagger(self): |
94 | 108 | number_exclude_0_to_9 = teen | tens | hundred | thousand | ten_thousand |
95 | 109 | # 兆/亿 |
96 | 110 | number_exclude_0_to_9 = ( |
97 | | - (number_exclude_0_to_9 + accep('兆') + delete('零').ques).ques + |
98 | | - (number_exclude_0_to_9 + accep('亿') + delete('零').ques).ques + |
| 111 | + ((number_exclude_0_to_9 | digits) + accep('兆') + delete('零').ques).ques + |
| 112 | + ((number_exclude_0_to_9 | digits) + accep('亿') + delete('零').ques).ques + |
99 | 113 | number_exclude_0_to_9 |
100 | 114 | ) |
101 | 115 | # 负的xxx 1.11, 1.01 |
|
0 commit comments