如何在 WebAssembly 中 return 来自 Rust 的字符串(或类似字符串)?
How to return a string (or similar) from Rust in WebAssembly?
我用这个 Rust 代码创建了一个小的 Wasm 文件:
#[no_mangle]
pub fn hello() -> &'static str {
"hello from rust"
}
它构建并且可以从 JS 调用 hello
函数:
<!DOCTYPE html>
<html>
<body>
<script>
fetch('main.wasm')
.then(response => response.arrayBuffer())
.then(bytes => WebAssembly.instantiate(bytes, {}))
.then(results => {
alert(results.instance.exports.hello());
});
</script>
</body>
</html>
我的问题是 alert
显示 "undefined"。如果我 return a i32
,它会工作并显示 i32
。我也试过 return a String
但它不起作用(它仍然显示 "undefined")。
有没有办法在 WebAssembly 中 return 来自 Rust 的字符串?我应该使用什么类型?
您不能直接 return Rust String
或 &str
。而是分配 return 一个包含数据的原始字节指针,然后必须在 JavaScript 端将其编码为 JS 字符串。
你可以看看 SHA1 例子 here。
感兴趣的函数在
demos/bundle.js
- copyCStr
demos/sha1/sha1-digest.rs
- digest
WebAssembly 仅支持少数 numeric types,这是可以通过导出函数 return 编辑的所有内容。
当您编译为 WebAssembly 时,您的字符串将保存在模块的线性内存中。为了从主机 JavaScript 中读取此字符串,您需要 return 对其在内存中的位置的引用,以及字符串的长度,即两个整数。这允许您从内存中读取字符串。
无论您将哪种语言编译成 WebAssembly,都可以使用相同的技术。 提供了问题的详细背景。
对于 Rust,您需要使用外部函数接口 (FFI),使用 CString
类型,如下所示:
use std::ffi::CString;
use std::os::raw::c_char;
static HELLO: &'static str = "hello from rust";
#[no_mangle]
pub fn get_hello() -> *mut c_char {
let s = CString::new(HELLO).unwrap();
s.into_raw()
}
#[no_mangle]
pub fn get_hello_len() -> usize {
HELLO.len()
}
上面的代码导出了两个函数,get_hello
return 是对字符串的引用,get_hello_len
return 是它的长度。
将上述代码编译成wasm模块后,可以按如下方式访问字符串:
const res = await fetch('chip8.wasm');
const buffer = await res.arrayBuffer();
const module = await WebAssembly.compile(buffer);
const instance = await WebAssembly.instantiate(module);
// obtain the module memory
const linearMemory = instance.exports.memory;
// create a buffer starting at the reference to the exported string
const offset = instance.exports.get_hello();
const stringBuffer = new Uint8Array(linearMemory.buffer, offset,
instance.exports.get_hello_len());
// create a string from this buffer
let str = '';
for (let i=0; i<stringBuffer.length; i++) {
str += String.fromCharCode(stringBuffer[i]);
}
console.log(str);
我看到的大多数例子都将字符串复制了两次。首先在 WASM 端,进入 CString
或将 Vec
缩小到其容量,然后在 JS 端解码 UTF-8.
鉴于我们经常为了速度而使用 WASM,我试图实现一个可以重用 Rust 向量的版本。
use std::collections::HashMap;
/// Byte vectors shared with JavaScript.
///
/// A map from payload's memory location to `Vec<u8>`.
///
/// In order to deallocate memory in Rust we need not just the memory location but also it's size.
/// In case of strings and vectors the freed size is capacity.
/// Keeping the vector around allows us not to change it's capacity.
///
/// Not thread-safe (assuming that we're running WASM from the single JavaScript thread).
static mut SHARED_VECS: Option<HashMap<u32, Vec<u8>>> = None;
extern "C" {
fn console_log(rs: *const u8);
fn console_log_8859_1(rs: *const u8);
}
#[no_mangle]
pub fn init() {
unsafe { SHARED_VECS = Some(HashMap::new()) }
}
#[no_mangle]
pub fn vec_len(payload: *const u8) -> u32 {
unsafe {
SHARED_VECS
.as_ref()
.unwrap()
.get(&(payload as u32))
.unwrap()
.len() as u32
}
}
pub fn vec2js<V: Into<Vec<u8>>>(v: V) -> *const u8 {
let v = v.into();
let payload = v.as_ptr();
unsafe {
SHARED_VECS.as_mut().unwrap().insert(payload as u32, v);
}
payload
}
#[no_mangle]
pub extern "C" fn free_vec(payload: *const u8) {
unsafe {
SHARED_VECS.as_mut().unwrap().remove(&(payload as u32));
}
}
#[no_mangle]
pub fn start() {
unsafe {
console_log(vec2js(format!("Hello again!")));
console_log_8859_1(vec2js(b"ASCII string." as &[u8]));
}
}
和 JavaScript 部分:
(function (iif) {
function rs2js (mod, rs, utfLabel = 'utf-8') {
const view = new Uint8Array (mod.memory.buffer, rs, mod.vec_len (rs))
const utf8dec = new TextDecoder (utfLabel)
const utf8 = utf8dec.decode (view)
mod.free_vec (rs)
return utf8}
function loadWasm (cache) {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/WebAssembly/instantiateStreaming
WebAssembly.instantiateStreaming (fetch ('main.wasm', {cache: cache ? "default" : "no-cache"}), {env: {
console_log: function (rs) {if (window.console) console.log ('main]', rs2js (iif.main, rs))},
console_log_8859_1: function (rs) {if (window.console) console.log ('main]', rs2js (iif.main, rs, 'iso-8859-1'))}
}}) .then (results => {
const exports = results.instance.exports
exports.init()
iif.main = exports
iif.main.start()})}
// Hot code reloading.
if (window.location.hostname == '127.0.0.1' && window.location.port == '43080') {
window.setInterval (
function() {
// Check if the WASM was updated.
fetch ('main.wasm.lm', {cache: "no-cache"}) .then (r => r.text()) .then (lm => {
lm = lm.trim()
if (/^\d+$/.test (lm) && lm != iif.lm) {
iif.lm = lm
loadWasm (false)}})},
200)
} else loadWasm (true)
} (window.iif = window.iif || {}))
此处的 trade-off 是我们在 WASM 中使用 HashMap
,这可能会增加大小,除非已经需要 HashMap
。
一个有趣的替代方法是使用 tables 与 JavaScript 共享(有效载荷、长度、容量)三元组,并在需要释放字符串时取回它。但是我还不知道怎么用这些表格。
P.S。有时我们不想首先分配 Vec
。
在这种情况下,我们可以将内存跟踪移动到 JavaScript:
extern "C" {
fn new_js_string(utf8: *const u8, len: i32) -> i32;
fn console_log(js: i32);
}
fn rs2js(rs: &str) -> i32 {
assert!(rs.len() < i32::max_value() as usize);
unsafe { new_js_string(rs.as_ptr(), rs.len() as i32) }
}
#[no_mangle]
pub fn start() {
unsafe {
console_log(rs2js("Hello again!"));
}
}
(function (iif) {
function loadWasm (cache) {
WebAssembly.instantiateStreaming (fetch ('main.wasm', {cache: cache ? "default" : "no-cache"}), {env: {
new_js_string: function (utf8, len) {
const view = new Uint8Array (iif.main.memory.buffer, utf8, len)
const utf8dec = new TextDecoder ('utf-8')
const decoded = utf8dec.decode (view)
let stringId = iif.lastStringId
while (typeof iif.strings[stringId] !== 'undefined') stringId += 1
if (stringId > 2147483647) { // Can't easily pass more than that through WASM.
stringId = -2147483648
while (typeof iif.strings[stringId] !== 'undefined') stringId += 1
if (stringId > 2147483647) throw new Error ('Out of string IDs!')}
iif.strings[stringId] = decoded
return iif.lastStringId = stringId},
console_log: function (js) {
if (window.console) console.log ('main]', iif.strings[js])
delete iif.strings[js]}
}}) .then (results => {
iif.main = results.instance.exports
iif.main.start()})}
loadWasm (true)
} (window.iif = window.iif || {strings: {}, lastStringId: 1}))
我用这个 Rust 代码创建了一个小的 Wasm 文件:
#[no_mangle]
pub fn hello() -> &'static str {
"hello from rust"
}
它构建并且可以从 JS 调用 hello
函数:
<!DOCTYPE html>
<html>
<body>
<script>
fetch('main.wasm')
.then(response => response.arrayBuffer())
.then(bytes => WebAssembly.instantiate(bytes, {}))
.then(results => {
alert(results.instance.exports.hello());
});
</script>
</body>
</html>
我的问题是 alert
显示 "undefined"。如果我 return a i32
,它会工作并显示 i32
。我也试过 return a String
但它不起作用(它仍然显示 "undefined")。
有没有办法在 WebAssembly 中 return 来自 Rust 的字符串?我应该使用什么类型?
您不能直接 return Rust String
或 &str
。而是分配 return 一个包含数据的原始字节指针,然后必须在 JavaScript 端将其编码为 JS 字符串。
你可以看看 SHA1 例子 here。
感兴趣的函数在
demos/bundle.js
-copyCStr
demos/sha1/sha1-digest.rs
-digest
WebAssembly 仅支持少数 numeric types,这是可以通过导出函数 return 编辑的所有内容。
当您编译为 WebAssembly 时,您的字符串将保存在模块的线性内存中。为了从主机 JavaScript 中读取此字符串,您需要 return 对其在内存中的位置的引用,以及字符串的长度,即两个整数。这允许您从内存中读取字符串。
无论您将哪种语言编译成 WebAssembly,都可以使用相同的技术。
对于 Rust,您需要使用外部函数接口 (FFI),使用 CString
类型,如下所示:
use std::ffi::CString;
use std::os::raw::c_char;
static HELLO: &'static str = "hello from rust";
#[no_mangle]
pub fn get_hello() -> *mut c_char {
let s = CString::new(HELLO).unwrap();
s.into_raw()
}
#[no_mangle]
pub fn get_hello_len() -> usize {
HELLO.len()
}
上面的代码导出了两个函数,get_hello
return 是对字符串的引用,get_hello_len
return 是它的长度。
将上述代码编译成wasm模块后,可以按如下方式访问字符串:
const res = await fetch('chip8.wasm');
const buffer = await res.arrayBuffer();
const module = await WebAssembly.compile(buffer);
const instance = await WebAssembly.instantiate(module);
// obtain the module memory
const linearMemory = instance.exports.memory;
// create a buffer starting at the reference to the exported string
const offset = instance.exports.get_hello();
const stringBuffer = new Uint8Array(linearMemory.buffer, offset,
instance.exports.get_hello_len());
// create a string from this buffer
let str = '';
for (let i=0; i<stringBuffer.length; i++) {
str += String.fromCharCode(stringBuffer[i]);
}
console.log(str);
我看到的大多数例子都将字符串复制了两次。首先在 WASM 端,进入 CString
或将 Vec
缩小到其容量,然后在 JS 端解码 UTF-8.
鉴于我们经常为了速度而使用 WASM,我试图实现一个可以重用 Rust 向量的版本。
use std::collections::HashMap;
/// Byte vectors shared with JavaScript.
///
/// A map from payload's memory location to `Vec<u8>`.
///
/// In order to deallocate memory in Rust we need not just the memory location but also it's size.
/// In case of strings and vectors the freed size is capacity.
/// Keeping the vector around allows us not to change it's capacity.
///
/// Not thread-safe (assuming that we're running WASM from the single JavaScript thread).
static mut SHARED_VECS: Option<HashMap<u32, Vec<u8>>> = None;
extern "C" {
fn console_log(rs: *const u8);
fn console_log_8859_1(rs: *const u8);
}
#[no_mangle]
pub fn init() {
unsafe { SHARED_VECS = Some(HashMap::new()) }
}
#[no_mangle]
pub fn vec_len(payload: *const u8) -> u32 {
unsafe {
SHARED_VECS
.as_ref()
.unwrap()
.get(&(payload as u32))
.unwrap()
.len() as u32
}
}
pub fn vec2js<V: Into<Vec<u8>>>(v: V) -> *const u8 {
let v = v.into();
let payload = v.as_ptr();
unsafe {
SHARED_VECS.as_mut().unwrap().insert(payload as u32, v);
}
payload
}
#[no_mangle]
pub extern "C" fn free_vec(payload: *const u8) {
unsafe {
SHARED_VECS.as_mut().unwrap().remove(&(payload as u32));
}
}
#[no_mangle]
pub fn start() {
unsafe {
console_log(vec2js(format!("Hello again!")));
console_log_8859_1(vec2js(b"ASCII string." as &[u8]));
}
}
和 JavaScript 部分:
(function (iif) {
function rs2js (mod, rs, utfLabel = 'utf-8') {
const view = new Uint8Array (mod.memory.buffer, rs, mod.vec_len (rs))
const utf8dec = new TextDecoder (utfLabel)
const utf8 = utf8dec.decode (view)
mod.free_vec (rs)
return utf8}
function loadWasm (cache) {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/WebAssembly/instantiateStreaming
WebAssembly.instantiateStreaming (fetch ('main.wasm', {cache: cache ? "default" : "no-cache"}), {env: {
console_log: function (rs) {if (window.console) console.log ('main]', rs2js (iif.main, rs))},
console_log_8859_1: function (rs) {if (window.console) console.log ('main]', rs2js (iif.main, rs, 'iso-8859-1'))}
}}) .then (results => {
const exports = results.instance.exports
exports.init()
iif.main = exports
iif.main.start()})}
// Hot code reloading.
if (window.location.hostname == '127.0.0.1' && window.location.port == '43080') {
window.setInterval (
function() {
// Check if the WASM was updated.
fetch ('main.wasm.lm', {cache: "no-cache"}) .then (r => r.text()) .then (lm => {
lm = lm.trim()
if (/^\d+$/.test (lm) && lm != iif.lm) {
iif.lm = lm
loadWasm (false)}})},
200)
} else loadWasm (true)
} (window.iif = window.iif || {}))
此处的 trade-off 是我们在 WASM 中使用 HashMap
,这可能会增加大小,除非已经需要 HashMap
。
一个有趣的替代方法是使用 tables 与 JavaScript 共享(有效载荷、长度、容量)三元组,并在需要释放字符串时取回它。但是我还不知道怎么用这些表格。
P.S。有时我们不想首先分配 Vec
。
在这种情况下,我们可以将内存跟踪移动到 JavaScript:
extern "C" {
fn new_js_string(utf8: *const u8, len: i32) -> i32;
fn console_log(js: i32);
}
fn rs2js(rs: &str) -> i32 {
assert!(rs.len() < i32::max_value() as usize);
unsafe { new_js_string(rs.as_ptr(), rs.len() as i32) }
}
#[no_mangle]
pub fn start() {
unsafe {
console_log(rs2js("Hello again!"));
}
}
(function (iif) {
function loadWasm (cache) {
WebAssembly.instantiateStreaming (fetch ('main.wasm', {cache: cache ? "default" : "no-cache"}), {env: {
new_js_string: function (utf8, len) {
const view = new Uint8Array (iif.main.memory.buffer, utf8, len)
const utf8dec = new TextDecoder ('utf-8')
const decoded = utf8dec.decode (view)
let stringId = iif.lastStringId
while (typeof iif.strings[stringId] !== 'undefined') stringId += 1
if (stringId > 2147483647) { // Can't easily pass more than that through WASM.
stringId = -2147483648
while (typeof iif.strings[stringId] !== 'undefined') stringId += 1
if (stringId > 2147483647) throw new Error ('Out of string IDs!')}
iif.strings[stringId] = decoded
return iif.lastStringId = stringId},
console_log: function (js) {
if (window.console) console.log ('main]', iif.strings[js])
delete iif.strings[js]}
}}) .then (results => {
iif.main = results.instance.exports
iif.main.start()})}
loadWasm (true)
} (window.iif = window.iif || {strings: {}, lastStringId: 1}))