计算 BigQuery 中数组中的匹配项数
Count the number of matches in an array in BigQuery
如何计算数组中的匹配项数?例如,对于数组 [1,2,3] 中的数字 [1,3],将有 2 个匹配项,而对于数组 [1,2],将有 1 个匹配项。现在我只能检查 [1,3] 是否在数组中。
WITH `arrays` AS (
SELECT 1 id, [1,2,3] as arr
UNION ALL
SELECT 2, [1,2]
UNION ALL
SELECT 3, [3]
)
SELECT id, arr, [1,3] as numbers,
CASE
1 IN UNNEST(arr) and
3 IN UNNEST(arr)
WHEN TRUE THEN 'numbers is in array'
ELSE 'numbers is not in array'
END conclusion
FROM `arrays`
我正在尝试得到这样的结果:
考虑以下方法
with `arrays` as (
select 1 id, [1,2,3] as arr union all
select 2, [1,2] union all
select 3, [3]
)
select *,
( select count(*)
from t.numbers num join t.arr num
using(num)
) check,
( select format('number is %sin array',
if(logical_and(if(num2 is null, false, true)), '', 'not '))
from t.numbers num1 left join t.arr num2
on num1 = num2
) conclusion
from (
select id, arr, [1,3] as numbers
from `arrays`
) t
有输出
使用数学,以下似乎是可能的:
- 如果
arr
和 numbers
的联合与 arr
相同,它将是 numbers is in array
- 如果
arr
和numbers
的并集大于arr
,增加的元素不在arr
. 中
- 因此,
numbers_len
- (union_len
- arr_len
) 将是 check
WITH `arrays` AS (
SELECT 1 id, [1,2,3] as arr
UNION ALL
SELECT 2, [1,2]
UNION ALL
SELECT 3, [3]
),
calculated_arrays AS (
SELECT *, [1,3] as numbers,
ARRAY_LENGTH(ARRAY(SELECT DISTINCT * FROM UNNEST(arr || [1, 3]))) AS union_len,
ARRAY_LENGTH(arr) AS arr_len,
ARRAY_LENGTH([1, 3]) AS numbers_len
FROM `arrays`
)
SELECT id, arr, numbers,
numbers_len - union_len + arr_len AS check,
IF (union_len = arr_len, 'numbers is in array', 'numbers is not in array') AS conclusion
FROM calculated_arrays
;
输出:
如何计算数组中的匹配项数?例如,对于数组 [1,2,3] 中的数字 [1,3],将有 2 个匹配项,而对于数组 [1,2],将有 1 个匹配项。现在我只能检查 [1,3] 是否在数组中。
WITH `arrays` AS (
SELECT 1 id, [1,2,3] as arr
UNION ALL
SELECT 2, [1,2]
UNION ALL
SELECT 3, [3]
)
SELECT id, arr, [1,3] as numbers,
CASE
1 IN UNNEST(arr) and
3 IN UNNEST(arr)
WHEN TRUE THEN 'numbers is in array'
ELSE 'numbers is not in array'
END conclusion
FROM `arrays`
我正在尝试得到这样的结果:
考虑以下方法
with `arrays` as (
select 1 id, [1,2,3] as arr union all
select 2, [1,2] union all
select 3, [3]
)
select *,
( select count(*)
from t.numbers num join t.arr num
using(num)
) check,
( select format('number is %sin array',
if(logical_and(if(num2 is null, false, true)), '', 'not '))
from t.numbers num1 left join t.arr num2
on num1 = num2
) conclusion
from (
select id, arr, [1,3] as numbers
from `arrays`
) t
有输出
使用数学,以下似乎是可能的:
- 如果
arr
和numbers
的联合与arr
相同,它将是numbers is in array
- 如果
arr
和numbers
的并集大于arr
,增加的元素不在arr
. 中
- 因此,
numbers_len
- (union_len
-arr_len
) 将是check
WITH `arrays` AS (
SELECT 1 id, [1,2,3] as arr
UNION ALL
SELECT 2, [1,2]
UNION ALL
SELECT 3, [3]
),
calculated_arrays AS (
SELECT *, [1,3] as numbers,
ARRAY_LENGTH(ARRAY(SELECT DISTINCT * FROM UNNEST(arr || [1, 3]))) AS union_len,
ARRAY_LENGTH(arr) AS arr_len,
ARRAY_LENGTH([1, 3]) AS numbers_len
FROM `arrays`
)
SELECT id, arr, numbers,
numbers_len - union_len + arr_len AS check,
IF (union_len = arr_len, 'numbers is in array', 'numbers is not in array') AS conclusion
FROM calculated_arrays
;
输出: