|
6 | 6 | * can use dirent.h, S_ISDIR, S_ISREG, etc. without #ifdef clutter. |
7 | 7 | * |
8 | 8 | * On non-Windows platforms this header is a no-op passthrough. |
| 9 | + * |
| 10 | + * ----------------------------------------------------------------------------- |
| 11 | + * TODO(future): Move this shim out of the public include path. |
| 12 | + * ----------------------------------------------------------------------------- |
| 13 | + * Flagged in PR #383 review (coderabbitai): this header currently lives under |
| 14 | + * `include/rac/core/` which means any SDK consumer that pulls a commons public |
| 15 | + * header transitively inherits *un-prefixed* global names — `DIR`, `dirent`, |
| 16 | + * `opendir`, `readdir`, `closedir`, `strcasecmp`, `strncasecmp`, and the |
| 17 | + * `S_IS*` / `S_IFLNK` macros. That: |
| 18 | + * 1. Breaks the project's "all public symbols must be `rac_` prefixed" rule |
| 19 | + * (see `sdk/runanywhere-commons/CLAUDE.md`). |
| 20 | + * 2. Can collide with a consumer's own dirent shim or the platform's real |
| 21 | + * headers if they include in a different order. |
| 22 | + * Impact is Windows-only in practice (POSIX platforms just pass through to |
| 23 | + * system headers), but it's still a leaky public contract. |
| 24 | + * |
| 25 | + * Options for the cleanup: |
| 26 | + * A) Move the implementation to `src/internal/rac_platform_compat.h` so it's |
| 27 | + * never installed / never visible to consumers. All current call sites |
| 28 | + * would need their `#include` path updated. This is the preferred fix. |
| 29 | + * B) Keep the header public but rename every exposed symbol to `rac_*` |
| 30 | + * (`rac_opendir`, `rac_readdir`, `rac_dirent`, `rac_strcasecmp`, …) and |
| 31 | + * update every call site. More invasive in source but keeps drop-in |
| 32 | + * POSIX-ish semantics; less aligned with the project rule. |
| 33 | + * |
| 34 | + * Current call sites to update (option A or B): |
| 35 | + * - src/features/vlm/vlm_component.cpp |
| 36 | + * - src/features/rag/onnx_embedding_provider.cpp |
| 37 | + * - src/features/result_free.cpp |
| 38 | + * - src/backends/onnx/onnx_backend.cpp |
| 39 | + * - src/backends/onnx/wakeword_onnx.cpp |
| 40 | + * - src/infrastructure/download/download_orchestrator.cpp |
| 41 | + * - src/infrastructure/extraction/rac_extraction.cpp |
| 42 | + * - src/infrastructure/telemetry/telemetry_json.cpp |
| 43 | + * - tests/test_extraction.cpp, tests/test_download_orchestrator.cpp, tests/test_common.h |
| 44 | + * - Any new Windows-facing file that uses opendir/stat/etc. |
| 45 | + * |
| 46 | + * Deferred because it's orthogonal to the "make Windows build work" goal. |
| 47 | + * Deferring is safe: the pollution only manifests on Windows, and today no |
| 48 | + * external consumer builds commons on Windows yet. |
9 | 49 | */ |
10 | 50 |
|
11 | 51 | #ifndef RAC_PLATFORM_COMPAT_H |
@@ -129,17 +169,33 @@ static inline int closedir(DIR* dir) { |
129 | 169 |
|
130 | 170 | #ifdef _WIN32 |
131 | 171 | /** |
132 | | - * Convert a UTF-8 std::string to std::wstring for Windows wide-char APIs. |
133 | | - * Used by ONNX Runtime Session creation which requires wchar_t* on Windows. |
| 172 | + * Convert a UTF-8 std::string to std::wstring (UTF-16) for Windows wide-char APIs. |
| 173 | + * Uses MultiByteToWideChar so non-ASCII paths (Chinese, Japanese, accented chars) |
| 174 | + * convert correctly — a plain byte-widening copy would corrupt multi-byte UTF-8 |
| 175 | + * sequences. Used by ONNX Runtime session creation which requires wchar_t*. |
134 | 176 | */ |
135 | 177 | inline std::wstring rac_to_wstring(const std::string& s) { |
136 | | - return std::wstring(s.begin(), s.end()); |
| 178 | + if (s.empty()) return {}; |
| 179 | + int size = MultiByteToWideChar(CP_UTF8, 0, s.data(), |
| 180 | + static_cast<int>(s.size()), nullptr, 0); |
| 181 | + if (size <= 0) return {}; |
| 182 | + std::wstring out(static_cast<size_t>(size), L'\0'); |
| 183 | + MultiByteToWideChar(CP_UTF8, 0, s.data(), static_cast<int>(s.size()), |
| 184 | + &out[0], size); |
| 185 | + return out; |
137 | 186 | } |
138 | 187 | inline std::wstring rac_to_wstring(const char* s) { |
139 | | - if (!s) return {}; |
140 | | - return std::wstring(s, s + strlen(s)); |
| 188 | + if (!s || !*s) return {}; |
| 189 | + return rac_to_wstring(std::string(s)); |
141 | 190 | } |
142 | | -/** On Windows, ONNX Runtime expects wchar_t* paths */ |
| 191 | +/** |
| 192 | + * On Windows, ONNX Runtime expects wchar_t* paths. |
| 193 | + * NOTE: The macro returns a pointer into a temporary std::wstring. Callers MUST |
| 194 | + * store the result in a named local before calling .c_str(), otherwise the |
| 195 | + * pointer dangles at the end of the full-expression: |
| 196 | + * std::wstring wp = rac_to_wstring(p); |
| 197 | + * Ort::Session s(env, wp.c_str(), options); |
| 198 | + */ |
143 | 199 | #define RAC_ORT_PATH(p) rac_to_wstring(p).c_str() |
144 | 200 | #else |
145 | 201 | /** On non-Windows, ONNX Runtime expects char* paths */ |
|
0 commit comments