@@ -32,24 +32,29 @@ def build_tagger(self):
3232 units_en = string_file ('itn/chinese/data/measure/units_en.tsv' )
3333 units_zh = string_file ('itn/chinese/data/measure/units_zh.tsv' )
3434 sign = string_file ('itn/chinese/data/number/sign.tsv' ) # + -
35+ to = cross ('到' , '~' ) | cross ('到百分之' , '~' )
36+
3537 units = add_weight (units_en , - 1.0 ) | \
3638 ((accep ('亿' ) | accep ('兆' ) | accep ('万' )).ques + units_zh )
3739
3840 number = Cardinal ().number if self .enable_0_to_9 else \
3941 Cardinal ().number_exclude_0_to_9
40- # 百分之三十, 百分三十, 百分之百
42+ # 百分之三十, 百分三十, 百分之百,百分之三十到四十, 百分之三十到百分之五十五
4143 percent = ((sign + delete ('的' ).ques ).ques + delete ('百分' ) +
42- delete ('之' ).ques + (Cardinal ().number | cross ('百' , '100' ))
44+ delete ('之' ).ques +
45+ ((Cardinal ().number + (to + Cardinal ().number ).ques ) |
46+ ((Cardinal ().number + to ).ques + cross ('百' , '100' )))
4347 + insert ('%' ))
4448
45- # 十千米每小时 => 10km/h
46- measure = number + units
49+ # 十千米每小时 => 10km/h, 十一到一百千米每小时 => 11~100km/h
50+ measure = number + ( to + number ). ques + units
4751 tagger = insert ('value: "' ) + (measure | percent ) + insert ('"' )
4852
49- # 每小时十千米 => 10km/h
53+ # 每小时十千米 => 10km/h, 每小时三十到三百一十一千米 => 30~311km/h
5054 tagger |= (
5155 insert ('denominator: "' ) + delete ('每' ) + units +
5256 insert ('" numerator: "' ) + measure + insert ('"' ))
57+
5358 self .tagger = self .add_tokens (tagger )
5459
5560 def build_verbalizer (self ):
0 commit comments