Skip to content

Commit 4b9118a

Browse files
authored
[runtime] use fst instead of far (#68)
* save fst instead far * fix runtime * fix workflow * update cmakelists * use shared ptr
1 parent 978266a commit 4b9118a

17 files changed

Lines changed: 104 additions & 193 deletions

File tree

.github/workflows/wheels.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
- name: Prepare Graph
2424
run: |
2525
mkdir graph
26-
cp tn/zh_tn_normalizer.far graph
27-
cp itn/zh_itn_normalizer.far graph
26+
cp tn/*.fst graph
27+
cp itn/*.fst graph
2828
2929
- name: Upload Graph
3030
uses: actions/upload-artifact@v3

itn/chinese/inverse_normalizer.py

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import os
16-
1715
from tn.processor import Processor
1816
from itn.chinese.rules.cardinal import Cardinal
1917
from itn.chinese.rules.char import Char
@@ -26,36 +24,19 @@
2624
from itn.chinese.rules.time import Time
2725
from itn.chinese.rules.preprocessor import PreProcessor
2826

29-
from pynini import Far
3027
from pynini.lib.pynutil import add_weight, delete
3128
from importlib_resources import files
3229

3330

3431
class InverseNormalizer(Processor):
3532

36-
def __init__(self, cache_dir=None, overwrite_cache=False,
33+
def __init__(self, cache_dir='itn', overwrite_cache=False,
3734
enable_standalone_number=True,
3835
enable_0_to_9=True):
3936
super().__init__(name='inverse_normalizer', ordertype='itn')
40-
self.cache_dir = cache_dir
41-
self.overwrite_cache = overwrite_cache
4237
self.convert_number = enable_standalone_number
4338
self.enable_0_to_9 = enable_0_to_9
44-
45-
far_file = files('itn').joinpath('zh_itn_normalizer.far')
46-
if self.cache_dir:
47-
os.makedirs(self.cache_dir, exist_ok=True)
48-
far_file = os.path.join(self.cache_dir, 'zh_itn_normalizer.far')
49-
50-
if far_file and os.path.exists(far_file) and not overwrite_cache:
51-
self.tagger = Far(far_file)['tagger']
52-
self.verbalizer = Far(far_file)['verbalizer']
53-
else:
54-
self.build_tagger()
55-
self.build_verbalizer()
56-
57-
if self.cache_dir and self.overwrite_cache:
58-
self.export(far_file)
39+
self.build_fst('zh_itn', cache_dir, overwrite_cache)
5940

6041
def build_tagger(self):
6142
tagger = (add_weight(Date().tagger, 1.02)

runtime/CMakeLists.txt

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
22

3-
project(text_processing VERSION 0.1)
3+
project(wetextprocessing VERSION 0.1)
44
set(CMAKE_CXX_STANDARD 14)
55

66
set(CMAKE_VERBOSE_MAKEFILE OFF)
7-
option(BUILD_TESTING "whether to build unit test" ON)
8-
option(FST_HAVE_BIN "whether to build fst binaries" OFF)
7+
option(BUILD_TESTING "whether to build unit test" OFF)
98

109
include(FetchContent)
11-
include(ExternalProject)
1210
set(FETCHCONTENT_QUIET OFF)
1311
get_filename_component(fc_base "fc_base" REALPATH BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
1412
set(FETCHCONTENT_BASE_DIR ${fc_base})
@@ -21,9 +19,8 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
2119
endif()
2220

2321
include(openfst)
24-
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
22+
include_directories(${PROJECT_SOURCE_DIR})
2523
add_subdirectory(utils)
26-
add_dependencies(utils openfst)
2724
add_subdirectory(processor)
2825
add_subdirectory(bin)
2926

runtime/bin/processor_main.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,26 @@
1717
#include <string>
1818

1919
#include "processor/processor.h"
20-
#include "processor/token_parser.h"
2120
#include "utils/flags.h"
2221

2322
DEFINE_string(text, "", "input string");
2423
DEFINE_string(file, "", "input file");
25-
DEFINE_string(far, "", "FST archives");
24+
DEFINE_string(tagger, "", "tagger fst path");
25+
DEFINE_string(verbalizer, "", "verbalizer fst path");
2626

2727
int main(int argc, char* argv[]) {
2828
gflags::ParseCommandLineFlags(&argc, &argv, false);
2929
google::InitGoogleLogging(argv[0]);
3030

31-
if (FLAGS_far.empty()) {
32-
LOG(FATAL) << "Please provide the FST archives.";
31+
if (FLAGS_tagger.empty() || FLAGS_verbalizer.empty()) {
32+
LOG(FATAL) << "Please provide the tagger and verbalizer fst files.";
3333
}
34-
wenet::Processor processor(FLAGS_far);
34+
wenet::Processor processor(FLAGS_tagger, FLAGS_verbalizer);
3535

3636
if (!FLAGS_text.empty()) {
3737
std::string tagged_text = processor.tag(FLAGS_text);
3838
std::cout << tagged_text << std::endl;
39-
std::string normalized_text = processor.normalize(FLAGS_text);
39+
std::string normalized_text = processor.verbalize(tagged_text);
4040
std::cout << normalized_text << std::endl;
4141
}
4242

@@ -46,7 +46,7 @@ int main(int argc, char* argv[]) {
4646
while (getline(file, line)) {
4747
std::string tagged_text = processor.tag(line);
4848
std::cout << tagged_text << std::endl;
49-
std::string normalized_text = processor.normalize(line);
49+
std::string normalized_text = processor.verbalize(tagged_text);
5050
std::cout << normalized_text << std::endl;
5151
}
5252
}

runtime/cmake/openfst.cmake

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,36 @@
11
include(gflags)
2+
# We can't build glog with gflags, unless gflags is pre-installed.
3+
# If build glog with pre-installed gflags, there will be conflict.
24
set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
35
include(glog)
46

5-
set(CONFIG_FLAGS "")
6-
if(NOT FST_HAVE_BIN)
7-
set(CONFIG_FLAGS "--disable-bin")
7+
set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
8+
set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
9+
set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
10+
set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
11+
set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
12+
set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
13+
set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
14+
set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
15+
set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
16+
set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
17+
set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
18+
set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
19+
20+
if(MSVC)
21+
add_compile_options(/W0 /wd4244 /wd4267)
822
endif()
923

24+
# "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
1025
# Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
1126
# To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
1227
set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
13-
set(openfst_PREFIX_DIR ${fc_base}/openfst-subbuild/openfst-populate-prefix CACHE PATH "OpenFST prefix directory")
14-
ExternalProject_Add(openfst
15-
URL https://github.com/mjansche/openfst/archive/1.7.2.zip
16-
URL_HASH MD5=96656fee440ee2d71006a4900ef9ac00
17-
PREFIX ${openfst_PREFIX_DIR}
18-
SOURCE_DIR ${openfst_SOURCE_DIR}
19-
CONFIGURE_COMMAND ${openfst_SOURCE_DIR}/configure ${CONFIG_FLAGS} --enable-far --prefix=${openfst_PREFIX_DIR}
20-
"CPPFLAGS=-I${gflags_BINARY_DIR}/include -I${glog_SOURCE_DIR}/src -I${glog_BINARY_DIR}"
21-
"LDFLAGS=-L${gflags_BINARY_DIR} -L${glog_BINARY_DIR}"
22-
"LIBS=-lgflags_nothreads -lglog -lpthread"
23-
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
24-
BUILD_COMMAND make -j$(nproc)
28+
FetchContent_Declare(openfst
29+
URL https://github.com/kkm000/openfst/archive/refs/tags/win/1.7.2.1.tar.gz
30+
URL_HASH SHA256=e04e1dabcecf3a687ace699ccb43a8a27da385777a56e69da6e103344cc66bca
31+
PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
2532
)
26-
add_dependencies(openfst gflags glog)
27-
link_directories(${openfst_PREFIX_DIR}/lib)
33+
FetchContent_MakeAvailable(openfst)
34+
add_dependencies(fst gflags glog)
35+
target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
2836
include_directories(${openfst_SOURCE_DIR}/src/include)

runtime/patch/openfst/src/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ install(DIRECTORY include/ DESTINATION include/
66
FILES_MATCHING PATTERN "*.h")
77

88
add_subdirectory(lib)
9-
add_subdirectory(script)
9+
10+
if(HAVE_SCRIPT)
11+
add_subdirectory(script)
12+
endif(HAVE_SCRIPT)
1013

1114
if(HAVE_BIN)
1215
add_subdirectory(bin)

runtime/patch/openfst/src/bin/CMakeLists.txt

Whitespace-only changes.

runtime/patch/openfst/src/extensions/special/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ if(HAVE_BIN)
1010
sigma-fst.cc
1111
)
1212

13-
set_target_properties(fstspecial-bin PROPERTIE
13+
set_target_properties(fstspecial-bin PROPERTIES
1414
FOLDER special/bin
1515
OUTPUT_NAME fstspecial
1616
)

runtime/patch/openfst/src/test/CMakeLists.txt

Lines changed: 0 additions & 54 deletions
This file was deleted.

runtime/processor/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ add_library(processor STATIC
22
processor.cc
33
token_parser.cc
44
)
5-
target_link_libraries(processor PUBLIC fstfar utils)
5+
target_link_libraries(processor PUBLIC utils)

0 commit comments

Comments
 (0)