BigQuery - 计算数组中有多少个单词相等
BigQuery - Count how many words in array are equal
我想计算一条路径(将在分隔符 /
处拆分)和 return 匹配的整数数组中有多少相似词。
输入数据类似于:
我想添加另一列 match_count
,其中包含一个整数数组。例如:
要复制这种情况,这是我正在使用的查询:
CREATE TEMP FUNCTION HOW_MANY_MATCHES_IN_PATH(src_path ARRAY<STRING>, test_path ARRAY<STRING>) RETURNS ARRAY<INTEGER> AS (
-- WHAT DO I PUT HERE?
);
SELECT
*,
HOW_MANY_MATCHES_IN_PATH(src_path, test_path) as dir_path_match_count
FROM (
SELECT
ARRAY_AGG(x) AS src_path,
ARRAY_AGG(y) as test_path
FROM
UNNEST([
'lib/client/core.js',
'lib/server/core.js'
]) AS x, UNNEST([
'test/server/core.js'
]) as y
)
我已经尝试在 HOW_MANY_MATCHES_IN_PATH
函数中使用 ARRAY
和 UNNEST
,但我要么以错误结束,要么以包含 4 个项目的数组结束(在本例中)
考虑以下方法
create temp function how_many_matches_in_path(src_path string, test_path string) returns integer as (
(select count(distinct src)
from unnest(split(src_path, '/')) src,
unnest(split(test_path, '/')) test
where src = test)
);
select *,
array( select how_many_matches_in_path(src, test)
from t.src_path src with offset
join t.test_path test with offset
using(offset)
) dir_path_match_count
from your_table t
是否适用于您问题中的输入数据样本
with your_table as (
select
['lib/client/core.js', 'lib/server/core.js'] src_path,
['test/server/core.js', 'test/server/core.js'] test_path
)
输出是
我想计算一条路径(将在分隔符 /
处拆分)和 return 匹配的整数数组中有多少相似词。
输入数据类似于:
我想添加另一列 match_count
,其中包含一个整数数组。例如:
要复制这种情况,这是我正在使用的查询:
CREATE TEMP FUNCTION HOW_MANY_MATCHES_IN_PATH(src_path ARRAY<STRING>, test_path ARRAY<STRING>) RETURNS ARRAY<INTEGER> AS (
-- WHAT DO I PUT HERE?
);
SELECT
*,
HOW_MANY_MATCHES_IN_PATH(src_path, test_path) as dir_path_match_count
FROM (
SELECT
ARRAY_AGG(x) AS src_path,
ARRAY_AGG(y) as test_path
FROM
UNNEST([
'lib/client/core.js',
'lib/server/core.js'
]) AS x, UNNEST([
'test/server/core.js'
]) as y
)
我已经尝试在 HOW_MANY_MATCHES_IN_PATH
函数中使用 ARRAY
和 UNNEST
,但我要么以错误结束,要么以包含 4 个项目的数组结束(在本例中)
考虑以下方法
create temp function how_many_matches_in_path(src_path string, test_path string) returns integer as (
(select count(distinct src)
from unnest(split(src_path, '/')) src,
unnest(split(test_path, '/')) test
where src = test)
);
select *,
array( select how_many_matches_in_path(src, test)
from t.src_path src with offset
join t.test_path test with offset
using(offset)
) dir_path_match_count
from your_table t
是否适用于您问题中的输入数据样本
with your_table as (
select
['lib/client/core.js', 'lib/server/core.js'] src_path,
['test/server/core.js', 'test/server/core.js'] test_path
)
输出是