Skip to content

Commit b6df951

Browse files
author
Guy Bedford
authored
gen-host-js: utf16 support (#392)
* gen-host-js: utf16 support * whitespace * use all camelcasing * test integration * remove unused file * formatting * pr review * fixup wasi test reference * c internal string tests, js encoding defaults * fixup unicode string literal * fix string assertion * actually fix c string assertion * le decoding only * remove utf16 decoder intrinsic use
1 parent bd89607 commit b6df951

7 files changed

Lines changed: 403 additions & 216 deletions

File tree

crates/gen-guest-c/src/lib.rs

Lines changed: 92 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@ use wit_bindgen_core::wit_parser::abi::{
88
AbiVariant, Bindgen, Bitcast, Instruction, LiftLower, WasmType,
99
};
1010
use wit_bindgen_core::{uwrite, uwriteln, wit_parser::*, Direction, Files, Generator, Ns};
11+
use wit_component::StringEncoding;
1112

1213
#[derive(Default)]
1314
pub struct C {
1415
src: Source,
1516
in_import: bool,
1617
opts: Opts,
18+
includes: Vec<String>,
1719
funcs: HashMap<String, Vec<Func>>,
1820
return_pointer_area_size: usize,
1921
return_pointer_area_align: usize,
@@ -51,7 +53,10 @@ struct Func {
5153
pub struct Opts {
5254
/// Skip emitting component allocation helper functions
5355
#[cfg_attr(feature = "clap", arg(long))]
54-
no_helpers: bool,
56+
pub no_helpers: bool,
57+
/// Set component string encoding
58+
#[cfg_attr(feature = "clap", arg(long))]
59+
pub string_encoding: StringEncoding,
5560
}
5661

5762
impl Opts {
@@ -98,6 +103,12 @@ impl C {
98103
}
99104
}
100105

106+
fn include(&mut self, incl: &str) {
107+
if !self.includes.iter().any(|i| i == incl) {
108+
self.includes.push(format!("#include {}", incl));
109+
}
110+
}
111+
101112
fn classify_ret(&mut self, iface: &Interface, func: &Function) -> Return {
102113
let mut ret = Return {
103114
return_multiple: false,
@@ -207,6 +218,14 @@ impl C {
207218
}
208219
}
209220

221+
fn char_type(&self) -> String {
222+
match self.opts.string_encoding {
223+
StringEncoding::UTF8 => String::from("char"),
224+
StringEncoding::UTF16 => String::from("char16_t"),
225+
StringEncoding::CompactUTF16 => panic!("Compact UTF16 unsupported"),
226+
}
227+
}
228+
210229
fn type_string(&mut self, iface: &Interface, ty: &Type) -> String {
211230
// Getting a type string happens during codegen, and by default means
212231
// that this is a private type that's being generated. This means we
@@ -1195,9 +1214,7 @@ impl Generator for C {
11951214

11961215
fn finish_one(&mut self, iface: &Interface, files: &mut Files) {
11971216
let linking_symbol = component_type_object::linking_symbol(iface, self.direction);
1198-
self.src.c_includes("#include <stdlib.h>".into());
1199-
self.src
1200-
.c_includes(format!("#include \"{}.h\"", iface.name.to_kebab_case()));
1217+
self.include("<stdlib.h>".into());
12011218
uwrite!(
12021219
self.src.c_adapters,
12031220
"
@@ -1241,36 +1258,70 @@ impl Generator for C {
12411258
}
12421259

12431260
if self.needs_string {
1261+
self.include("<string.h>");
1262+
if self.opts.string_encoding == StringEncoding::UTF16 {
1263+
self.include("<uchar.h>");
1264+
}
1265+
if self.opts.string_encoding == StringEncoding::CompactUTF16 {
1266+
panic!("Compact UTF16 is unsupported");
1267+
}
1268+
12441269
uwrite!(
12451270
self.src.h_defs,
12461271
"
12471272
typedef struct {{
1248-
char *ptr;
1273+
{0} *ptr;
12491274
size_t len;
1250-
}} {0}_string_t;
1275+
}} {1}_string_t;
12511276
",
1277+
self.char_type(),
12521278
iface.name.to_snake_case(),
12531279
);
1280+
// Perhaps the string helpers should just take an explicit length?
12541281
uwrite!(
12551282
self.src.h_helpers,
12561283
"
1257-
void {0}_string_set({0}_string_t *ret, const char *s);
1258-
void {0}_string_dup({0}_string_t *ret, const char *s);
1284+
void {0}_string_set({0}_string_t *ret, const {1} *s);
1285+
void {0}_string_dup({0}_string_t *ret, const {1} *s);
12591286
void {0}_string_free({0}_string_t *ret);\
12601287
",
12611288
iface.name.to_snake_case(),
1289+
self.char_type(),
12621290
);
1291+
let (str_len_s, alignment) = if self.opts.string_encoding == StringEncoding::UTF16 {
1292+
uwrite!(
1293+
self.src.h_helpers,
1294+
"
1295+
size_t {0}_string_len(const char16_t* s);
1296+
",
1297+
iface.name.to_snake_case(),
1298+
);
1299+
uwrite!(
1300+
self.src.c_helpers,
1301+
"
1302+
size_t {0}_string_len(const char16_t* s) {{
1303+
char16_t* c = (char16_t*)s;
1304+
for (; *c; ++c);
1305+
return c-s;
1306+
}}
1307+
",
1308+
iface.name.to_snake_case(),
1309+
);
1310+
(format!("{}_string_len(s)", iface.name.to_snake_case()), "2")
1311+
} else {
1312+
(String::from("strlen(s)"), "1")
1313+
};
12631314
uwrite!(
12641315
self.src.c_helpers,
12651316
"
1266-
void {0}_string_set({0}_string_t *ret, const char *s) {{
1267-
ret->ptr = (char*) s;
1268-
ret->len = strlen(s);
1317+
void {0}_string_set({0}_string_t *ret, const {1} *s) {{
1318+
ret->ptr = ({1}*) s;
1319+
ret->len = {2};
12691320
}}
12701321
1271-
void {0}_string_dup({0}_string_t *ret, const char *s) {{
1272-
ret->len = strlen(s);
1273-
ret->ptr = cabi_realloc(NULL, 0, 1, ret->len);
1322+
void {0}_string_dup({0}_string_t *ret, const {1} *s) {{
1323+
ret->len = {2};
1324+
ret->ptr = cabi_realloc(NULL, 0, {3}, ret->len);
12741325
memcpy(ret->ptr, s, ret->len);
12751326
}}
12761327
@@ -1283,6 +1334,9 @@ impl Generator for C {
12831334
}}
12841335
",
12851336
iface.name.to_snake_case(),
1337+
self.char_type(),
1338+
str_len_s,
1339+
alignment,
12861340
);
12871341
}
12881342

@@ -1320,17 +1374,15 @@ impl Generator for C {
13201374
}
13211375
}
13221376

1323-
let mut h_str = format!("#ifndef __BINDINGS_{}_H\n#define __BINDINGS_{}_H\n#ifdef __cplusplus\nextern \"C\" {{\n#endif",
1377+
self.include("<stdint.h>");
1378+
self.include("<stdbool.h>");
1379+
1380+
let mut h_str = format!("#ifndef __BINDINGS_{}_H\n#define __BINDINGS_{}_H\n#ifdef __cplusplus\nextern \"C\" {{\n#endif\n",
13241381
iface.name.to_shouty_snake_case(), iface.name.to_shouty_snake_case());
1325-
h_str.push_str("\n\n#include <stdint.h>\n#include <stdbool.h>\n");
1326-
if self.needs_string {
1327-
h_str.push_str("#include <string.h>\n");
1328-
}
1382+
h_str.push_str(&self.includes.join("\n"));
1383+
h_str.push_str("\n");
13291384

1330-
let mut c_str = self.src.c_includes.join("\n");
1331-
if c_str.len() > 0 {
1332-
c_str.push_str("\n");
1333-
}
1385+
let mut c_str = format!("#include \"{}.h\"\n", iface.name.to_kebab_case());
13341386
c_str.push_str(&self.src.c_fns);
13351387

13361388
if self.src.h_defs.len() > 0 {
@@ -1870,11 +1922,14 @@ impl Bindgen for FunctionBindgen<'_> {
18701922
"switch ({op0}) {{
18711923
case 0: {{
18721924
{result}.is_some = false;
1873-
{none}break;
1925+
{none}\
1926+
break;
18741927
}}
18751928
case 1: {{
18761929
{result}.is_some = true;
1877-
{some}{set_some}break;
1930+
{some}\
1931+
{set_some}\
1932+
break;
18781933
}}
18791934
}}\n"
18801935
);
@@ -1908,26 +1963,26 @@ impl Bindgen for FunctionBindgen<'_> {
19081963
let bind_ok =
19091964
if let Some(ok) = self.gen.get_nonempty_type(iface, result.ok.as_ref()) {
19101965
let ok_ty = self.gen.type_string(iface, ok);
1911-
format!("const {ok_ty} *{ok_payload} = &({op0}).val.ok;")
1966+
format!("const {ok_ty} *{ok_payload} = &({op0}).val.ok;\n")
19121967
} else {
19131968
String::new()
19141969
};
19151970
let bind_err =
19161971
if let Some(err) = self.gen.get_nonempty_type(iface, result.err.as_ref()) {
19171972
let err_ty = self.gen.type_string(iface, err);
1918-
format!("const {err_ty} *{err_payload} = &({op0}).val.err;")
1973+
format!("const {err_ty} *{err_payload} = &({op0}).val.err;\n")
19191974
} else {
19201975
String::new()
19211976
};
19221977
uwrite!(
19231978
self.src,
1924-
"
1979+
"\
19251980
if (({op0}).is_err) {{
1926-
{bind_err}
1927-
{err}
1981+
{bind_err}\
1982+
{err}\
19281983
}} else {{
1929-
{bind_ok}
1930-
{ok}
1984+
{bind_ok}\
1985+
{ok}\
19311986
}}
19321987
"
19331988
);
@@ -1996,8 +2051,11 @@ impl Bindgen for FunctionBindgen<'_> {
19962051
Instruction::StringLift { .. } => {
19972052
let list_name = self.gen.type_string(iface, &Type::String);
19982053
results.push(format!(
1999-
"({}) {{ (char*)({}), (size_t)({}) }}",
2000-
list_name, operands[0], operands[1]
2054+
"({}) {{ ({}*)({}), (size_t)({}) }}",
2055+
list_name,
2056+
self.gen.char_type(),
2057+
operands[0],
2058+
operands[1]
20012059
));
20022060
}
20032061

@@ -2281,7 +2339,6 @@ enum SourceType {
22812339
HDefs,
22822340
HFns,
22832341
HHelpers,
2284-
// CIncludes,
22852342
// CFns,
22862343
// CHelpers,
22872344
// CAdapters,
@@ -2292,7 +2349,6 @@ struct Source {
22922349
h_defs: wit_bindgen_core::Source,
22932350
h_fns: wit_bindgen_core::Source,
22942351
h_helpers: wit_bindgen_core::Source,
2295-
c_includes: Vec<String>,
22962352
c_fns: wit_bindgen_core::Source,
22972353
c_helpers: wit_bindgen_core::Source,
22982354
c_adapters: wit_bindgen_core::Source,
@@ -2304,7 +2360,6 @@ impl Source {
23042360
SourceType::HDefs => self.h_defs(s),
23052361
SourceType::HFns => self.h_fns(s),
23062362
SourceType::HHelpers => self.h_helpers(s),
2307-
// SourceType::CIncludes => self.c_includes(s),
23082363
// SourceType::CFns => self.c_fns(s),
23092364
// SourceType::CHelpers => self.c_helpers(s),
23102365
// SourceType::CAdapters => self.c_adapters(s),
@@ -2314,9 +2369,6 @@ impl Source {
23142369
self.h_defs.push_str(&append_src.h_defs);
23152370
self.h_fns.push_str(&append_src.h_fns);
23162371
self.h_helpers.push_str(&append_src.h_helpers);
2317-
for i in &append_src.c_includes {
2318-
self.c_includes(i.into());
2319-
}
23202372
self.c_fns.push_str(&append_src.c_fns);
23212373
self.c_helpers.push_str(&append_src.c_helpers);
23222374
self.c_adapters.push_str(&append_src.c_adapters);
@@ -2330,11 +2382,6 @@ impl Source {
23302382
fn h_helpers(&mut self, s: &str) {
23312383
self.h_helpers.push_str(s);
23322384
}
2333-
fn c_includes(&mut self, s: String) {
2334-
if !self.c_includes.contains(&s) {
2335-
self.c_includes.push(s);
2336-
}
2337-
}
23382385
fn c_fns(&mut self, s: &str) {
23392386
self.c_fns.push_str(s);
23402387
}

0 commit comments

Comments
 (0)