如何编写 redshift aws 查询以在逗号分隔值中搜索值
How to write redshift aws query to search for a value in comma delimited values
表1
user_id
country_code
1
'IN,AU,AC'
2
'MX,IN'
表2
user_id
valid_country
1
'IN'
1
'AU'
2
'MX'
3
'YT'
4
'RU'
如您所见,country_code 列中的某些条目是由逗号分隔的多个代码。我想在 table1 中打印 user_id 及其对应的 country_code ,前提是它们有效。要在此处检查有效性,我需要使用具有 user_id 和 valid_country.
的 table2
期望的输出是:
user_id
country_code
1
'IN'
1
'AU'
2
'MX'
查询看起来像
select tb1.user_id, country_code from table1 tb1, table2 tb2 where
tb1.user_id=tb2.user_id and <Here I need to check if tb2.country_code
is there in tb1.country_code (codes separated by commas)>
是否有任何简单的解决方案我可以在逗号分隔值中检查 valid_country。
简单的方法并不总是最好的。这里可能会出现许多极端情况(比如所有国家/地区代码都是 2 个字母)。那就是说 LIKE 子句很简单:
select tb1.user_id, valid_country as country_code
from table1 tb1, table2 tb2
where tb1.user_id=tb2.user_id
and tb1.country_code like '%'||tb2.valid_country||'%'
或者如果我们要用现代 SQL 语法来表达:
select tb1.user_id, valid_country as country_code
from table1 tb1 join table2 tb2
on tb1.user_id=tb2.user_id
and tb1.country_code like '%'||tb2.valid_country||'%'
试试这个:
a) 通过 CROSS JOIN
使用一系列连续整数(我在通用 Table 表达式中提供)对其进行垂直化 tb1
,并应用 SPLIT_PART()
将逗号分隔列表分解为单个元素的函数。
b) INNER JOIN
垂直化结果与有效 user_id/country 代码组合 table 在两列上的等值连接。
WITH
-- your table 1, don't use in end query ...
tb1(user_id,country_code) AS (
SELECT 1,'IN,AU,AC'
UNION ALL SELECT 2,'MX,IN'
)
,
-- your table 2, don't use in end query ...
tb2(user_id,valid_country) AS (
SELECT 1,'IN'
UNION ALL SELECT 1,'AU'
UNION ALL SELECT 2,'MX'
UNION ALL SELECT 3,'YT'
UNION ALL SELECT 4,'RU'
)
-- real query starts here, replace following comma with "WITH" ...
,
i(i) AS ( -- need a series of integers ...
SELECT 1
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5
)
,
vertical AS (
SELECT
tb1.user_id
, i
, SPLIT_PART(country_code,',',i) AS valid_country
FROM tb1 CROSS JOIN i
WHERE SPLIT_PART(country_code,',',i) <> ''
)
SELECT
vertical.user_id
, vertical.valid_country
FROM vertical
JOIN tb2 USING(user_id,valid_country)
ORDER BY vertical.user_id,vertical.i
;
-- out user_id | valid_country
-- out ---------+---------------
-- out 1 | IN
-- out 1 | AU
-- out 2 | MX
表1
user_id | country_code |
---|---|
1 | 'IN,AU,AC' |
2 | 'MX,IN' |
表2
user_id | valid_country |
---|---|
1 | 'IN' |
1 | 'AU' |
2 | 'MX' |
3 | 'YT' |
4 | 'RU' |
如您所见,country_code 列中的某些条目是由逗号分隔的多个代码。我想在 table1 中打印 user_id 及其对应的 country_code ,前提是它们有效。要在此处检查有效性,我需要使用具有 user_id 和 valid_country.
的 table2期望的输出是:
user_id | country_code |
---|---|
1 | 'IN' |
1 | 'AU' |
2 | 'MX' |
查询看起来像
select tb1.user_id, country_code from table1 tb1, table2 tb2 where tb1.user_id=tb2.user_id and <Here I need to check if tb2.country_code is there in tb1.country_code (codes separated by commas)>
是否有任何简单的解决方案我可以在逗号分隔值中检查 valid_country。
简单的方法并不总是最好的。这里可能会出现许多极端情况(比如所有国家/地区代码都是 2 个字母)。那就是说 LIKE 子句很简单:
select tb1.user_id, valid_country as country_code
from table1 tb1, table2 tb2
where tb1.user_id=tb2.user_id
and tb1.country_code like '%'||tb2.valid_country||'%'
或者如果我们要用现代 SQL 语法来表达:
select tb1.user_id, valid_country as country_code
from table1 tb1 join table2 tb2
on tb1.user_id=tb2.user_id
and tb1.country_code like '%'||tb2.valid_country||'%'
试试这个:
a) 通过 CROSS JOIN
使用一系列连续整数(我在通用 Table 表达式中提供)对其进行垂直化 tb1
,并应用 SPLIT_PART()
将逗号分隔列表分解为单个元素的函数。
b) INNER JOIN
垂直化结果与有效 user_id/country 代码组合 table 在两列上的等值连接。
WITH
-- your table 1, don't use in end query ...
tb1(user_id,country_code) AS (
SELECT 1,'IN,AU,AC'
UNION ALL SELECT 2,'MX,IN'
)
,
-- your table 2, don't use in end query ...
tb2(user_id,valid_country) AS (
SELECT 1,'IN'
UNION ALL SELECT 1,'AU'
UNION ALL SELECT 2,'MX'
UNION ALL SELECT 3,'YT'
UNION ALL SELECT 4,'RU'
)
-- real query starts here, replace following comma with "WITH" ...
,
i(i) AS ( -- need a series of integers ...
SELECT 1
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5
)
,
vertical AS (
SELECT
tb1.user_id
, i
, SPLIT_PART(country_code,',',i) AS valid_country
FROM tb1 CROSS JOIN i
WHERE SPLIT_PART(country_code,',',i) <> ''
)
SELECT
vertical.user_id
, vertical.valid_country
FROM vertical
JOIN tb2 USING(user_id,valid_country)
ORDER BY vertical.user_id,vertical.i
;
-- out user_id | valid_country
-- out ---------+---------------
-- out 1 | IN
-- out 1 | AU
-- out 2 | MX