BigQuery 用户定义函数中的 Base64 编码
Base64-encoding in a BigQuery User-defined function
BigQuery 使用 Javascript 作为其用户定义的函数。 BigQuery 中 BYTES
的输入和输出映射到 Javascript.
中的 base64 编码字符串或从中映射
BigQuery 没有浏览器 window
对象,因此缺少 atob
和 btoa
。有没有一种在 Bigquery JS 环境中编码和解码的简单方法,或者你是否必须包含一个库来进行映射?
您需要包含一个库,但是一旦您将 JavaScript 放到 Cloud Storage 上就相当简单了,您可以将此方法用于要导入的其他常用库。我在 中找到了一个实现,并将这些内容放在一个名为 btoa_atob.js
:
的文件中
(function () {
var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';
function InvalidCharacterError(message) {
this.message = message;
}
InvalidCharacterError.prototype = new Error;
InvalidCharacterError.prototype.name = 'InvalidCharacterError';
// encoder
// [https://gist.github.com/999166] by [https://github.com/nignag]
btoa = function (input) {
var str = String(input);
for (
// initialize result and counter
var block, charCode, idx = 0, map = chars, output = '';
// if the next str index does not exist:
// change the mapping table to "="
// check if d has no fractional digits
str.charAt(idx | 0) || (map = '=', idx % 1);
// "8 - idx % 1 * 8" generates the sequence 2, 4, 6, 8
output += map.charAt(63 & block >> 8 - idx % 1 * 8)
) {
charCode = str.charCodeAt(idx += 3/4);
if (charCode > 0xFF) {
throw new InvalidCharacterError("'btoa' failed: The string to be encoded contains characters outside of the Latin1 range.");
}
block = block << 8 | charCode;
}
return output;
};
// decoder
// [https://gist.github.com/1020396] by [https://github.com/atk]
atob = function (input) {
var str = String(input).replace(/[=]+$/, ''); // #31: ExtendScript bad parse of /=
if (str.length % 4 == 1) {
throw new InvalidCharacterError("'atob' failed: The string to be decoded is not correctly encoded.");
}
for (
// initialize result and counters
var bc = 0, bs, buffer, idx = 0, output = '';
// get next character
buffer = str.charAt(idx++);
// character found in table? initialize bit storage and add its ascii value;
~buffer && (bs = bc % 4 ? bs * 64 + buffer : buffer,
// and if not first of each 4 characters,
// convert the first 8 bits to one ascii character
bc++ % 4) ? output += String.fromCharCode(255 & bs >> (-2 * bc & 6)) : 0
) {
// try to find character in table (0-63, not found => -1)
buffer = chars.indexOf(buffer);
}
return output;
};
}());
然后我将文件复制到我的云存储:
gsutil cp btoa_atob.js gs://my-bucket/
然后我写了一个使用它的虚拟函数:
#standardSQL
CREATE TEMP FUNCTION Foo(b BYTES) RETURNS STRING LANGUAGE js AS """
var result = atob(b);
// ... process result of atob.
return result;
"""
OPTIONS (library='gs://my-bucket/btoa_atob.js');
SELECT Foo(b'\xa0b1\xff\xee');
BigQuery 使用 Javascript 作为其用户定义的函数。 BigQuery 中 BYTES
的输入和输出映射到 Javascript.
BigQuery 没有浏览器 window
对象,因此缺少 atob
和 btoa
。有没有一种在 Bigquery JS 环境中编码和解码的简单方法,或者你是否必须包含一个库来进行映射?
您需要包含一个库,但是一旦您将 JavaScript 放到 Cloud Storage 上就相当简单了,您可以将此方法用于要导入的其他常用库。我在 btoa_atob.js
:
(function () {
var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';
function InvalidCharacterError(message) {
this.message = message;
}
InvalidCharacterError.prototype = new Error;
InvalidCharacterError.prototype.name = 'InvalidCharacterError';
// encoder
// [https://gist.github.com/999166] by [https://github.com/nignag]
btoa = function (input) {
var str = String(input);
for (
// initialize result and counter
var block, charCode, idx = 0, map = chars, output = '';
// if the next str index does not exist:
// change the mapping table to "="
// check if d has no fractional digits
str.charAt(idx | 0) || (map = '=', idx % 1);
// "8 - idx % 1 * 8" generates the sequence 2, 4, 6, 8
output += map.charAt(63 & block >> 8 - idx % 1 * 8)
) {
charCode = str.charCodeAt(idx += 3/4);
if (charCode > 0xFF) {
throw new InvalidCharacterError("'btoa' failed: The string to be encoded contains characters outside of the Latin1 range.");
}
block = block << 8 | charCode;
}
return output;
};
// decoder
// [https://gist.github.com/1020396] by [https://github.com/atk]
atob = function (input) {
var str = String(input).replace(/[=]+$/, ''); // #31: ExtendScript bad parse of /=
if (str.length % 4 == 1) {
throw new InvalidCharacterError("'atob' failed: The string to be decoded is not correctly encoded.");
}
for (
// initialize result and counters
var bc = 0, bs, buffer, idx = 0, output = '';
// get next character
buffer = str.charAt(idx++);
// character found in table? initialize bit storage and add its ascii value;
~buffer && (bs = bc % 4 ? bs * 64 + buffer : buffer,
// and if not first of each 4 characters,
// convert the first 8 bits to one ascii character
bc++ % 4) ? output += String.fromCharCode(255 & bs >> (-2 * bc & 6)) : 0
) {
// try to find character in table (0-63, not found => -1)
buffer = chars.indexOf(buffer);
}
return output;
};
}());
然后我将文件复制到我的云存储:
gsutil cp btoa_atob.js gs://my-bucket/
然后我写了一个使用它的虚拟函数:
#standardSQL
CREATE TEMP FUNCTION Foo(b BYTES) RETURNS STRING LANGUAGE js AS """
var result = atob(b);
// ... process result of atob.
return result;
"""
OPTIONS (library='gs://my-bucket/btoa_atob.js');
SELECT Foo(b'\xa0b1\xff\xee');