@@ -73,13 +73,22 @@ def build_tagger(self):
7373 # 负的xxx 1.11, 1.01
7474 number = sign .ques + number + (dot + digits .plus ).ques
7575 # 五六万,三五千,六七百,三四十
76- number |= add_weight (
77- (digit + insert ("0~" ) + digit + cross ("十" , "0" )) |
78- (digit + insert ("00~" ) + digit + cross ("百" , "00" )) |
79- (digit + insert ("000~" ) + digit + cross ("千" , "000" )) |
80- (digit + insert ("0000~" ) + digit + cross ("万" , "0000" )), - 1.0
81- )
76+ special_2number = digit + insert ("0~" ) + digit + cross ("十" , "0" )
77+ special_2number |= digit + insert ("00~" ) + digit + cross ("百" , "00" )
78+ special_2number |= digit + insert ("000~" ) + digit + cross ("千" , "000" )
79+ special_2number |= digit + insert ("0000~" ) + digit + cross ("万" , "0000" )
80+ number |= special_2number
81+ # 十七八美元 => $17~18, 四十五六岁 => 45-6岁,
82+ # 三百七八公里 => 370-80km, 三百七八十千克 => 370-80kg
83+ special_3number = cross ('十' , '1' ) + digit + insert ("~1" ) + digit
84+ special_3number |= digit + delete ('十' ) + digit + insert ("-" ) + digit
85+ special_3number |= digit + delete ('百' ) + digit + insert ("0-" ) + digit \
86+ + (insert ("0" ) | add_weight (cross ("十" , "0" ), - 0.1 ))
87+ number |= add_weight (special_3number , - 100.0 )
88+
8289 self .number = number .optimize ()
90+ self .special_2number = special_2number .optimize ()
91+ self .special_3number = special_3number .optimize ()
8392
8493 # 十/百/千/万
8594 number_exclude_0_to_9 = teen | tens | hundred | thousand | ten_thousand
@@ -95,12 +104,11 @@ def build_tagger(self):
95104 (dot + digits .plus ).plus
96105 )
97106 # 五六万,三五千,六七百,三四十
98- number_exclude_0_to_9 |= add_weight (
99- (digit + insert ("0~" ) + digit + cross ("十" , "0" )) |
100- (digit + insert ("00~" ) + digit + cross ("百" , "00" )) |
101- (digit + insert ("000~" ) + digit + cross ("千" , "000" )) |
102- (digit + insert ("0000~" ) + digit + cross ("万" , "0000" )), - 1.0
103- )
107+ # 十七八美元 => $17~18, 四十五六岁 => 45-6岁,
108+ # 三百七八公里 => 370-80km, 三百七八十千克 => 370-80kg
109+ number_exclude_0_to_9 |= special_2number
110+ number_exclude_0_to_9 |= add_weight (special_3number , - 100.0 )
111+
104112 self .number_exclude_0_to_9 = (sign .ques + number_exclude_0_to_9 ).optimize () # noqa
105113
106114 # cardinal string like 127.0.0.1, used in ID, IP, etc.
0 commit comments