Skip to content

Commit 6f5d3db

Browse files
authored
[runtime] Use google style (#89)
1 parent 4dc6f87 commit 6f5d3db

17 files changed

Lines changed: 185 additions & 176 deletions

inverse_normalize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def main():
3131
parser.add_argument('--text', help='input string')
3232
parser.add_argument('--file', help='input file path')
3333
parser.add_argument('--overwrite_cache', action='store_true',
34-
help='rebuild *.far')
34+
help='rebuild *.fst')
3535
parser.add_argument('--enable_standalone_number', type=str,
3636
default='True',
3737
help='enable standalone number')

normalize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def main():
2323
parser.add_argument('--text', help='input string')
2424
parser.add_argument('--file', help='input file path')
2525
parser.add_argument('--overwrite_cache', action='store_true',
26-
help='rebuild *.far')
26+
help='rebuild *.fst')
2727
args = parser.parse_args()
2828

2929
normalizer = Normalizer(cache_dir='tn',

runtime/CMakeLists.txt

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,9 @@ endif()
2929
include(openfst)
3030
include_directories(${PROJECT_SOURCE_DIR})
3131

32-
add_library(processor STATIC
33-
processor/processor.cc
34-
processor/token_parser.cc
35-
utils/utf8_string.cc
36-
)
37-
if(MSVC)
38-
target_link_libraries(processor PUBLIC fst)
39-
else()
40-
target_link_libraries(processor PUBLIC dl fst)
41-
endif()
42-
43-
add_executable(processor_main bin/processor_main.cc)
44-
target_link_libraries(processor_main PUBLIC processor)
32+
add_subdirectory(utils)
33+
add_subdirectory(processor)
34+
add_subdirectory(bin)
4535

4636
if(BUILD_TESTING)
4737
include(gtest)

runtime/bin/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
add_executable(processor_main processor_main.cc)
2+
target_link_libraries(processor_main PUBLIC processor)

runtime/bin/processor_main.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,19 @@ int main(int argc, char* argv[]) {
3434
wetext::Processor processor(FLAGS_tagger, FLAGS_verbalizer);
3535

3636
if (!FLAGS_text.empty()) {
37-
std::string tagged_text = processor.tag(FLAGS_text);
37+
std::string tagged_text = processor.Tag(FLAGS_text);
3838
std::cout << tagged_text << std::endl;
39-
std::string normalized_text = processor.verbalize(tagged_text);
39+
std::string normalized_text = processor.Verbalize(tagged_text);
4040
std::cout << normalized_text << std::endl;
4141
}
4242

4343
if (!FLAGS_file.empty()) {
4444
std::ifstream file(FLAGS_file);
4545
std::string line;
4646
while (getline(file, line)) {
47-
std::string tagged_text = processor.tag(line);
47+
std::string tagged_text = processor.Tag(line);
4848
std::cout << tagged_text << std::endl;
49-
std::string normalized_text = processor.verbalize(tagged_text);
49+
std::string normalized_text = processor.Verbalize(tagged_text);
5050
std::cout << normalized_text << std::endl;
5151
}
5252
}

runtime/processor/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
add_library(processor STATIC
2+
processor.cc
3+
token_parser.cc
4+
)
5+
if(MSVC)
6+
target_link_libraries(processor PUBLIC fst utils)
7+
else()
8+
target_link_libraries(processor PUBLIC dl fst utils)
9+
endif()

runtime/processor/processor.cc

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ Processor::Processor(const std::string& tagger_path,
3434
}
3535
}
3636

37-
std::string Processor::shortest_path(const StdVectorFst& lattice) {
37+
std::string Processor::ShortestPath(const StdVectorFst& lattice) {
3838
StdVectorFst shortest_path;
3939
fst::ShortestPath(lattice, &shortest_path, 1, true);
4040

@@ -43,31 +43,34 @@ std::string Processor::shortest_path(const StdVectorFst& lattice) {
4343
return output;
4444
}
4545

46-
std::string Processor::compose(const std::string& input,
46+
std::string Processor::Compose(const std::string& input,
4747
const StdVectorFst* fst) {
4848
StdVectorFst input_fst;
4949
compiler_->operator()(input, &input_fst);
5050

5151
StdVectorFst lattice;
5252
fst::Compose(input_fst, *fst, &lattice);
53-
return shortest_path(lattice);
53+
return ShortestPath(lattice);
5454
}
5555

56-
std::string Processor::tag(const std::string& input) {
57-
return compose(input, tagger_.get());
56+
std::string Processor::Tag(const std::string& input) {
57+
return Compose(input, tagger_.get());
5858
}
5959

60-
std::string Processor::verbalize(const std::string& input) {
60+
std::string Processor::Verbalize(const std::string& input) {
6161
if (input.empty()) {
6262
return "";
6363
}
6464
TokenParser parser(parse_type_);
65-
std::string output = parser.reorder(input);
66-
return compose(output, verbalizer_.get());
65+
std::string output = parser.Reorder(input);
66+
67+
output = Compose(output, verbalizer_.get());
68+
output.erase(std::remove(output.begin(), output.end(), '\0'), output.end());
69+
return output;
6770
}
6871

69-
std::string Processor::normalize(const std::string& input) {
70-
return verbalize(tag(input));
72+
std::string Processor::Normalize(const std::string& input) {
73+
return Verbalize(Tag(input));
7174
}
7275

7376
} // namespace wetext

runtime/processor/processor.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ namespace wetext {
2828
class Processor {
2929
public:
3030
Processor(const std::string& tagger_path, const std::string& verbalizer_path);
31-
std::string tag(const std::string& input);
32-
std::string verbalize(const std::string& input);
33-
std::string normalize(const std::string& input);
31+
std::string Tag(const std::string& input);
32+
std::string Verbalize(const std::string& input);
33+
std::string Normalize(const std::string& input);
3434

3535
private:
36-
std::string shortest_path(const StdVectorFst& lattice);
37-
std::string compose(const std::string& input, const StdVectorFst* fst);
36+
std::string ShortestPath(const StdVectorFst& lattice);
37+
std::string Compose(const std::string& input, const StdVectorFst* fst);
3838

3939
ParseType parse_type_;
4040
std::shared_ptr<StdVectorFst> tagger_ = nullptr;

runtime/processor/token_parser.cc

Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#include "processor/token_parser.h"
1616

1717
#include "utils/log.h"
18-
#include "utils/utf8_string.h"
18+
#include "utils/string.h"
1919

2020
namespace wetext {
2121
const std::string EOS = "<EOS>";
@@ -41,113 +41,113 @@ const std::unordered_map<std::string, std::vector<std::string>> ITN_ORDERS = {
4141

4242
TokenParser::TokenParser(ParseType type) {
4343
if (type == ParseType::kTN) {
44-
orders = TN_ORDERS;
44+
orders_ = TN_ORDERS;
4545
} else {
46-
orders = ITN_ORDERS;
46+
orders_ = ITN_ORDERS;
4747
}
4848
}
4949

50-
void TokenParser::load(const std::string& input) {
51-
string2chars(input, &text);
52-
CHECK_GT(text.size(), 0);
53-
index = 0;
54-
ch = text[0];
50+
void TokenParser::Load(const std::string& input) {
51+
SplitUTF8StringToChars(input, &text_);
52+
CHECK_GT(text_.size(), 0);
53+
index_ = 0;
54+
ch_ = text_[0];
5555
}
5656

57-
bool TokenParser::read() {
58-
if (index < text.size() - 1) {
59-
index += 1;
60-
ch = text[index];
57+
bool TokenParser::Read() {
58+
if (index_ < text_.size() - 1) {
59+
index_ += 1;
60+
ch_ = text_[index_];
6161
return true;
6262
}
63-
ch = EOS;
63+
ch_ = EOS;
6464
return false;
6565
}
6666

67-
bool TokenParser::parse_ws() {
68-
bool not_eos = ch != EOS;
69-
while (not_eos && ch == " ") {
70-
not_eos = read();
67+
bool TokenParser::ParseWs() {
68+
bool not_eos = ch_ != EOS;
69+
while (not_eos && ch_ == " ") {
70+
not_eos = Read();
7171
}
7272
return not_eos;
7373
}
7474

75-
bool TokenParser::parse_char(const std::string& exp) {
76-
if (ch == exp) {
77-
read();
75+
bool TokenParser::ParseChar(const std::string& exp) {
76+
if (ch_ == exp) {
77+
Read();
7878
return true;
7979
}
8080
return false;
8181
}
8282

83-
bool TokenParser::parse_chars(const std::string& exp) {
83+
bool TokenParser::ParseChars(const std::string& exp) {
8484
bool ok = false;
8585
std::vector<std::string> chars;
86-
string2chars(exp, &chars);
86+
SplitUTF8StringToChars(exp, &chars);
8787
for (const auto& x : chars) {
88-
ok |= parse_char(x);
88+
ok |= ParseChar(x);
8989
}
9090
return ok;
9191
}
9292

93-
std::string TokenParser::parse_key() {
94-
CHECK_NE(ch, EOS);
95-
CHECK_EQ(UTF8_WHITESPACE.count(ch), 0);
93+
std::string TokenParser::ParseKey() {
94+
CHECK_NE(ch_, EOS);
95+
CHECK_EQ(UTF8_WHITESPACE.count(ch_), 0);
9696

9797
std::string key = "";
98-
while (ASCII_LETTERS.count(ch) > 0) {
99-
key += ch;
100-
read();
98+
while (ASCII_LETTERS.count(ch_) > 0) {
99+
key += ch_;
100+
Read();
101101
}
102102
return key;
103103
}
104104

105-
std::string TokenParser::parse_value() {
106-
CHECK_NE(ch, EOS);
105+
std::string TokenParser::ParseValue() {
106+
CHECK_NE(ch_, EOS);
107107
bool escape = false;
108108

109109
std::string value = "";
110-
while (ch != "\"") {
111-
value += ch;
112-
escape = ch == "\\" && !escape;
113-
read();
110+
while (ch_ != "\"") {
111+
value += ch_;
112+
escape = ch_ == "\\" && !escape;
113+
Read();
114114
if (escape) {
115-
value += ch;
116-
read();
115+
value += ch_;
116+
Read();
117117
}
118118
}
119119
return value;
120120
}
121121

122-
void TokenParser::parse(const std::string& input) {
123-
load(input);
124-
while (parse_ws()) {
125-
std::string name = parse_key();
126-
parse_chars(" { ");
122+
void TokenParser::Parse(const std::string& input) {
123+
Load(input);
124+
while (ParseWs()) {
125+
std::string name = ParseKey();
126+
ParseChars(" { ");
127127

128128
Token token(name);
129-
while (parse_ws()) {
130-
if (ch == "}") {
131-
parse_char("}");
129+
while (ParseWs()) {
130+
if (ch_ == "}") {
131+
ParseChar("}");
132132
break;
133133
}
134-
std::string key = parse_key();
135-
parse_chars(": \"");
136-
std::string value = parse_value();
137-
parse_char("\"");
138-
token.append(key, value);
134+
std::string key = ParseKey();
135+
ParseChars(": \"");
136+
std::string value = ParseValue();
137+
ParseChar("\"");
138+
token.Append(key, value);
139139
}
140-
tokens.emplace_back(token);
140+
tokens_.emplace_back(token);
141141
}
142142
}
143143

144-
std::string TokenParser::reorder(const std::string& input) {
145-
parse(input);
144+
std::string TokenParser::Reorder(const std::string& input) {
145+
Parse(input);
146146
std::string output = "";
147-
for (auto& token : tokens) {
148-
output += token.string(orders) + " ";
147+
for (auto& token : tokens_) {
148+
output += token.String(orders_) + " ";
149149
}
150-
return trim(output);
150+
return Trim(output);
151151
}
152152

153153
} // namespace wetext

runtime/processor/token_parser.h

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ struct Token {
3737

3838
Token(const std::string& name) : name(name) {}
3939

40-
void append(const std::string& key, const std::string& value) {
40+
void Append(const std::string& key, const std::string& value) {
4141
order.emplace_back(key);
4242
members[key] = value;
4343
}
4444

45-
std::string string(
45+
std::string String(
4646
const std::unordered_map<std::string, std::vector<std::string>>& orders) {
4747
std::string output = name + " {";
4848
if (orders.count(name) > 0) {
@@ -67,25 +67,25 @@ enum ParseType {
6767
class TokenParser {
6868
public:
6969
TokenParser(ParseType type);
70-
std::string reorder(const std::string& input);
70+
std::string Reorder(const std::string& input);
7171

7272
private:
73-
void load(const std::string& input);
74-
bool read();
75-
bool parse_ws();
76-
bool parse_char(const std::string& exp);
77-
bool parse_chars(const std::string& exp);
78-
std::string parse_key();
79-
std::string parse_value();
80-
void parse(const std::string& input);
73+
void Load(const std::string& input);
74+
bool Read();
75+
bool ParseWs();
76+
bool ParseChar(const std::string& exp);
77+
bool ParseChars(const std::string& exp);
78+
std::string ParseKey();
79+
std::string ParseValue();
80+
void Parse(const std::string& input);
8181

82-
int index;
83-
std::string ch;
84-
std::vector<std::string> text;
85-
std::vector<Token> tokens;
86-
std::unordered_map<std::string, std::vector<std::string>> orders;
82+
int index_;
83+
std::string ch_;
84+
std::vector<std::string> text_;
85+
std::vector<Token> tokens_;
86+
std::unordered_map<std::string, std::vector<std::string>> orders_;
8787
};
8888

89-
} // wetext
89+
} // namespace wetext
9090

9191
#endif // PROCESSOR_TOKEN_PARSER_H_

0 commit comments

Comments
 (0)