1616
1717# TODO(pzd17): multi-language support
1818from tn .chinese .normalizer import Normalizer
19+ from itn .main import str2bool
1920
2021
2122def main ():
@@ -27,10 +28,30 @@ def main():
2728 help = 'cache dir containing *.fst' )
2829 parser .add_argument ('--overwrite_cache' , action = 'store_true' ,
2930 help = 'rebuild *.fst' )
31+ parser .add_argument ('--remove_interjections' , type = str ,
32+ default = 'True' ,
33+ help = 'remove interjections like "啊" and "儿"' )
34+ parser .add_argument ('--traditional_to_simple' , type = str ,
35+ default = 'True' ,
36+ help = 'i.e., "喆" -> "哲"' )
37+ parser .add_argument ('--remove_puncts' , type = str ,
38+ default = 'False' ,
39+ help = 'remove punctuations like "。" and ","' )
40+ parser .add_argument ('--full_to_half' , type = str ,
41+ default = 'True' ,
42+ help = 'i.e., "A" -> "A"' )
43+ parser .add_argument ('--tag_oov' , type = str ,
44+ default = 'False' ,
45+ help = 'tag OOV with "OOV"' )
3046 args = parser .parse_args ()
3147
3248 normalizer = Normalizer (cache_dir = args .cache_dir ,
33- overwrite_cache = args .overwrite_cache )
49+ overwrite_cache = args .overwrite_cache ,
50+ remove_interjections = str2bool (args .remove_interjections ),
51+ traditional_to_simple = str2bool (args .traditional_to_simple ),
52+ remove_puncts = str2bool (args .remove_puncts ),
53+ full_to_half = str2bool (args .full_to_half ),
54+ tag_oov = str2bool (args .tag_oov ))
3455
3556 if args .text :
3657 print (normalizer .tag (args .text ))
0 commit comments