如何使用 BigQuery 在 INT64 和二进制 STRING 之间进行转换?
How can I convert between INT64 and binary STRING using BigQuery?
BigQuery 问题跟踪器上有一个 feature request 用于转换函数,例如在 INT64
和二进制 STRING
之间。除了使用 JavaScript UDF 之外,是否有解决方法?
是的,您可以使用 SQL 实现 INT64
和二进制 STRING
之间的转换,而不需要 JavaScript。这是一个例子:
CREATE TEMP FUNCTION ToBinary(x INT64) AS (
(SELECT STRING_AGG(CAST(x >> bit & 0x1 AS STRING), '' ORDER BY bit DESC)
FROM UNNEST(GENERATE_ARRAY(0, 63)) AS bit)
);
CREATE TEMP FUNCTION FromBinary(s STRING) AS (
(SELECT SUM(CAST(c AS INT64) << (LENGTH(s) - 1 - bit))
FROM UNNEST(SPLIT(s, '')) AS c WITH OFFSET bit)
);
SELECT x, ToBinary(x) AS binary, FromBinary(ToBinary(x)) AS roundtrip
FROM UNNEST([1, 123456, 9876543210, -1001]) AS x;
这个returns:
+------------+------------------------------------------------------------------+------------+
| x | binary | roundtrip |
+------------+------------------------------------------------------------------+------------+
| 1 | 0000000000000000000000000000000000000000000000000000000000000001 | 1 |
| 123456 | 0000000000000000000000000000000000000000000000011110001001000000 | 123456 |
| 9876543210 | 0000000000000000000000000000001001001100101100000001011011101010 | 9876543210 |
| -1001 | 1111111111111111111111111111111111111111111111111111110000010111 | -1001 |
+------------+------------------------------------------------------------------+------------+
作为奖励,这里有一种在 INT64
和十六进制 STRING
之间转换的方法:
CREATE TEMP FUNCTION ToHex(x INT64) AS (
(SELECT STRING_AGG(FORMAT('%02x', x >> (byte * 8) & 0xff), '' ORDER BY byte DESC)
FROM UNNEST(GENERATE_ARRAY(0, 7)) AS byte)
);
CREATE TEMP FUNCTION FromHex(s STRING) AS (
(SELECT SUM(CAST(CONCAT('0x', SUBSTR(s, byte * 2 + 1, 2)) AS INT64) << ((LENGTH(s) - (byte + 1) * 2) * 4))
FROM UNNEST(GENERATE_ARRAY(1, LENGTH(s) / 2)) WITH OFFSET byte)
);
SELECT x, ToHex(x) AS hex, FromHex(ToHex(x)) AS roundtrip
FROM UNNEST([1, 123456, 9876543210, -1001]) AS x;
这个returns:
+------------+------------------+------------+
| x | hex | roundtrip |
+------------+------------------+------------+
| 1 | 0000000000000001 | 1 |
| 123456 | 000000000001e240 | 123456 |
| 9876543210 | 000000024cb016ea | 9876543210 |
| -1001 | fffffffffffffc17 | -1001 |
+------------+------------------+------------+
(八进制转换留作 reader 的练习。)
也可以使用 CAST 和 FORMAT:
整数转十六进制:
SELECT FORMAT("%X", my_int) AS my_hex ...
十六进制转整数
SELECT CAST(my_hex AS INT64) AS my_int ...
2021 年更新:
from_binary and to_binary 功能已集成到官方 bqutil
public 项目中。
将二进制表示形式转换为整数:
SELECT bqutil.fn.from_binary('111')
7
将整数转换为二进制表示:
SELECT bqutil.fn.to_binary(7)
0000000000000000000000000000000000000000000000000000000000000111
BigQuery 问题跟踪器上有一个 feature request 用于转换函数,例如在 INT64
和二进制 STRING
之间。除了使用 JavaScript UDF 之外,是否有解决方法?
是的,您可以使用 SQL 实现 INT64
和二进制 STRING
之间的转换,而不需要 JavaScript。这是一个例子:
CREATE TEMP FUNCTION ToBinary(x INT64) AS (
(SELECT STRING_AGG(CAST(x >> bit & 0x1 AS STRING), '' ORDER BY bit DESC)
FROM UNNEST(GENERATE_ARRAY(0, 63)) AS bit)
);
CREATE TEMP FUNCTION FromBinary(s STRING) AS (
(SELECT SUM(CAST(c AS INT64) << (LENGTH(s) - 1 - bit))
FROM UNNEST(SPLIT(s, '')) AS c WITH OFFSET bit)
);
SELECT x, ToBinary(x) AS binary, FromBinary(ToBinary(x)) AS roundtrip
FROM UNNEST([1, 123456, 9876543210, -1001]) AS x;
这个returns:
+------------+------------------------------------------------------------------+------------+
| x | binary | roundtrip |
+------------+------------------------------------------------------------------+------------+
| 1 | 0000000000000000000000000000000000000000000000000000000000000001 | 1 |
| 123456 | 0000000000000000000000000000000000000000000000011110001001000000 | 123456 |
| 9876543210 | 0000000000000000000000000000001001001100101100000001011011101010 | 9876543210 |
| -1001 | 1111111111111111111111111111111111111111111111111111110000010111 | -1001 |
+------------+------------------------------------------------------------------+------------+
作为奖励,这里有一种在 INT64
和十六进制 STRING
之间转换的方法:
CREATE TEMP FUNCTION ToHex(x INT64) AS (
(SELECT STRING_AGG(FORMAT('%02x', x >> (byte * 8) & 0xff), '' ORDER BY byte DESC)
FROM UNNEST(GENERATE_ARRAY(0, 7)) AS byte)
);
CREATE TEMP FUNCTION FromHex(s STRING) AS (
(SELECT SUM(CAST(CONCAT('0x', SUBSTR(s, byte * 2 + 1, 2)) AS INT64) << ((LENGTH(s) - (byte + 1) * 2) * 4))
FROM UNNEST(GENERATE_ARRAY(1, LENGTH(s) / 2)) WITH OFFSET byte)
);
SELECT x, ToHex(x) AS hex, FromHex(ToHex(x)) AS roundtrip
FROM UNNEST([1, 123456, 9876543210, -1001]) AS x;
这个returns:
+------------+------------------+------------+
| x | hex | roundtrip |
+------------+------------------+------------+
| 1 | 0000000000000001 | 1 |
| 123456 | 000000000001e240 | 123456 |
| 9876543210 | 000000024cb016ea | 9876543210 |
| -1001 | fffffffffffffc17 | -1001 |
+------------+------------------+------------+
(八进制转换留作 reader 的练习。)
也可以使用 CAST 和 FORMAT:
整数转十六进制:
SELECT FORMAT("%X", my_int) AS my_hex ...
十六进制转整数
SELECT CAST(my_hex AS INT64) AS my_int ...
2021 年更新:
from_binary and to_binary 功能已集成到官方 bqutil
public 项目中。
将二进制表示形式转换为整数:
SELECT bqutil.fn.from_binary('111')
7
将整数转换为二进制表示:
SELECT bqutil.fn.to_binary(7)
0000000000000000000000000000000000000000000000000000000000000111