1515#include " processor/token_parser.h"
1616
1717#include " utils/log.h"
18- #include " utils/utf8_string .h"
18+ #include " utils/string .h"
1919
2020namespace wetext {
2121const std::string EOS = " <EOS>" ;
@@ -41,113 +41,113 @@ const std::unordered_map<std::string, std::vector<std::string>> ITN_ORDERS = {
4141
4242TokenParser::TokenParser (ParseType type) {
4343 if (type == ParseType::kTN ) {
44- orders = TN_ORDERS;
44+ orders_ = TN_ORDERS;
4545 } else {
46- orders = ITN_ORDERS;
46+ orders_ = ITN_ORDERS;
4747 }
4848}
4949
50- void TokenParser::load (const std::string& input) {
51- string2chars (input, &text );
52- CHECK_GT (text .size (), 0 );
53- index = 0 ;
54- ch = text [0 ];
50+ void TokenParser::Load (const std::string& input) {
51+ SplitUTF8StringToChars (input, &text_ );
52+ CHECK_GT (text_ .size (), 0 );
53+ index_ = 0 ;
54+ ch_ = text_ [0 ];
5555}
5656
57- bool TokenParser::read () {
58- if (index < text .size () - 1 ) {
59- index += 1 ;
60- ch = text[index ];
57+ bool TokenParser::Read () {
58+ if (index_ < text_ .size () - 1 ) {
59+ index_ += 1 ;
60+ ch_ = text_[index_ ];
6161 return true ;
6262 }
63- ch = EOS;
63+ ch_ = EOS;
6464 return false ;
6565}
6666
67- bool TokenParser::parse_ws () {
68- bool not_eos = ch != EOS;
69- while (not_eos && ch == " " ) {
70- not_eos = read ();
67+ bool TokenParser::ParseWs () {
68+ bool not_eos = ch_ != EOS;
69+ while (not_eos && ch_ == " " ) {
70+ not_eos = Read ();
7171 }
7272 return not_eos;
7373}
7474
75- bool TokenParser::parse_char (const std::string& exp) {
76- if (ch == exp) {
77- read ();
75+ bool TokenParser::ParseChar (const std::string& exp) {
76+ if (ch_ == exp) {
77+ Read ();
7878 return true ;
7979 }
8080 return false ;
8181}
8282
83- bool TokenParser::parse_chars (const std::string& exp) {
83+ bool TokenParser::ParseChars (const std::string& exp) {
8484 bool ok = false ;
8585 std::vector<std::string> chars;
86- string2chars (exp, &chars);
86+ SplitUTF8StringToChars (exp, &chars);
8787 for (const auto & x : chars) {
88- ok |= parse_char (x);
88+ ok |= ParseChar (x);
8989 }
9090 return ok;
9191}
9292
93- std::string TokenParser::parse_key () {
94- CHECK_NE (ch , EOS);
95- CHECK_EQ (UTF8_WHITESPACE.count (ch ), 0 );
93+ std::string TokenParser::ParseKey () {
94+ CHECK_NE (ch_ , EOS);
95+ CHECK_EQ (UTF8_WHITESPACE.count (ch_ ), 0 );
9696
9797 std::string key = " " ;
98- while (ASCII_LETTERS.count (ch ) > 0 ) {
99- key += ch ;
100- read ();
98+ while (ASCII_LETTERS.count (ch_ ) > 0 ) {
99+ key += ch_ ;
100+ Read ();
101101 }
102102 return key;
103103}
104104
105- std::string TokenParser::parse_value () {
106- CHECK_NE (ch , EOS);
105+ std::string TokenParser::ParseValue () {
106+ CHECK_NE (ch_ , EOS);
107107 bool escape = false ;
108108
109109 std::string value = " " ;
110- while (ch != " \" " ) {
111- value += ch ;
112- escape = ch == " \\ " && !escape;
113- read ();
110+ while (ch_ != " \" " ) {
111+ value += ch_ ;
112+ escape = ch_ == " \\ " && !escape;
113+ Read ();
114114 if (escape) {
115- value += ch ;
116- read ();
115+ value += ch_ ;
116+ Read ();
117117 }
118118 }
119119 return value;
120120}
121121
122- void TokenParser::parse (const std::string& input) {
123- load (input);
124- while (parse_ws ()) {
125- std::string name = parse_key ();
126- parse_chars (" { " );
122+ void TokenParser::Parse (const std::string& input) {
123+ Load (input);
124+ while (ParseWs ()) {
125+ std::string name = ParseKey ();
126+ ParseChars (" { " );
127127
128128 Token token (name);
129- while (parse_ws ()) {
130- if (ch == " }" ) {
131- parse_char (" }" );
129+ while (ParseWs ()) {
130+ if (ch_ == " }" ) {
131+ ParseChar (" }" );
132132 break ;
133133 }
134- std::string key = parse_key ();
135- parse_chars (" : \" " );
136- std::string value = parse_value ();
137- parse_char (" \" " );
138- token.append (key, value);
134+ std::string key = ParseKey ();
135+ ParseChars (" : \" " );
136+ std::string value = ParseValue ();
137+ ParseChar (" \" " );
138+ token.Append (key, value);
139139 }
140- tokens .emplace_back (token);
140+ tokens_ .emplace_back (token);
141141 }
142142}
143143
144- std::string TokenParser::reorder (const std::string& input) {
145- parse (input);
144+ std::string TokenParser::Reorder (const std::string& input) {
145+ Parse (input);
146146 std::string output = " " ;
147- for (auto & token : tokens ) {
148- output += token.string (orders ) + " " ;
147+ for (auto & token : tokens_ ) {
148+ output += token.String (orders_ ) + " " ;
149149 }
150- return trim (output);
150+ return Trim (output);
151151}
152152
153153} // namespace wetext
0 commit comments