@@ -31,17 +31,31 @@ def build_tagger(self):
3131 units_zh = string_file ('tn/chinese/data/measure/units_zh.tsv' )
3232 units = units_en | units_zh
3333 rmspace = delete (' ' ).ques
34+ to = cross ('-' , '到' ) | cross ('~' , '到' ) | accep ('到' )
3435
3536 number = Cardinal ().number
3637 percent = insert ('百分之' ) + number + delete ('%' )
3738
3839 number @= self .build_rule (cross ('二' , '两' ), '[BOS]' , '[EOS]' )
3940 # 1-11个,1个-11个
40- prefix = (number + (rmspace + units ).ques +
41- (cross ('-' , '到' ) | accep ('到' )))
41+ prefix = number + (rmspace + units ).ques + to
4242 measure = prefix .ques + number + rmspace + units
43- measure @= self .build_rule (cross ('两两' , '二两' ), '[BOS]' , '' )
44- tagger = insert ('value: "' ) + (measure | percent ) + insert ('"' )
43+
44+ for unit in ['两' , '月' , '号' ]:
45+ measure @= self .build_rule (cross ('两' + unit , '二' + unit ),
46+ l = '[BOS]' )
47+ measure @= self .build_rule (cross ('到两' + unit , '到二' + unit ),
48+ r = '[EOS]' )
49+
50+ # -xxxx年, -xx年
51+ digits = Cardinal ().digits
52+ cardinal = digits ** 2 | digits ** 4
53+ unit = accep ('年' ) | accep ('年度' ) | accep ('赛季' )
54+ prefix = cardinal + (rmspace + unit ).ques + to
55+ annual = prefix .ques + cardinal + unit
56+
57+ tagger = insert ('value: "' ) + (measure | percent
58+ | annual ) + insert ('"' )
4559
4660 # 10km/h
4761 rmsign = rmspace + delete ('/' ) + rmspace
0 commit comments