sql 连接两个 table 的查询和一个布尔标志以指示它是否包含来自第三个 table 的任何单词
sql query to join two tables and a boolean flag to indicate whether it contains any words from third table
我有 3 个 table 具有以下架构
create table main (
main_id int PRIMARY KEY,
secondary_id int NOT NULL
);
create table secondary (
secondary_id int NOT NULL,
tags varchar(100)
);
create table bad_words (
words varchar(100) NOT NULL
);
insert into main values (1, 1001);
insert into main values (2, 1002);
insert into main values (3, 1003);
insert into main values (4, 1004);
insert into secondary values (1001, 'good word');
insert into secondary values (1002, 'bad word');
insert into secondary values (1002, 'good word');
insert into secondary values (1002, 'other word');
insert into secondary values (1003, 'ugly');
insert into secondary values (1003, 'bad word');
insert into secondary values (1004, 'pleasant');
insert into secondary values (1004, 'nice');
insert into bad_words values ('bad word');
insert into bad_words values ('ugly');
insert into bad_words values ('worst');
expected output
----------------
1, 1000, good word, 0 (boolean flag indicating whether the tags contain any one of the words from the bad_words table)
2, 1001, bad word,good word,other word , 1
3, 1002, ugly,bad word, 1
4, 1003, pleasant,nice, 0
我正在尝试使用 case to select 1 或 0 作为最后一列,并使用联接来联接主要和次要 table,但感到困惑和卡住。有人可以帮我查询吗?这些 table 存储在 redshift 中,我希望查询与 redshift 兼容。
您可以使用上述架构在 sqlfiddle
中尝试您的查询
编辑:我现在通过删除辅助 table 中的 PRIMARY KEY
更新了模式和预期输出,以便更容易加入 bad_words table。
select main_id, a.secondary_id, tags, case when c.words is not null then 1 else 0 end
from main a
join secondary b on b.secondary_id = a.secondary_id
left outer join bad_words c on c.words like b.tags
您可以使用 EXISTS 和正则表达式比较 \m 和 \M(分别表示单词的开头和结尾的标记):
with
main(main_id, secondary_id) as (values (1, 1000), (2, 1001), (3, 1002), (4, 1003)),
secondary(secondary_id, tags) as (values (1000, 'very good words'), (1001, 'good and bad words'), (1002, 'ugly'),(1003, 'pleasant')),
bad_words(words) as (values ('bad'), ('ugly'), ('worst'))
select *, exists (select 1 from bad_words where s.tags ~* ('\m'||words||'\M'))::int as flag
from main m
join secondary s using (secondary_id)
SELECT m.main_id, m.secondary_id, t.tags, t.is_bad_word
FROM srini.main m
JOIN (
SELECT st.secondary_id, st.tags, exists (select 1 from srini.bad_words b where st.tags like '%'+b.words+'%') is_bad_word
FROM
( SELECT secondary_id, LISTAGG(tags, ',') as tags
FROM srini.secondary
GROUP BY secondary_id ) st
) t on t.secondary_id = m.secondary_id;
这在 redshift 中对我有用,并使用上述模式生成了以下输出。
1 1001 good word false
3 1003 ugly,bad word true
2 1002 good word,other word,bad word true
4 1004 pleasant,nice false
我有 3 个 table 具有以下架构
create table main (
main_id int PRIMARY KEY,
secondary_id int NOT NULL
);
create table secondary (
secondary_id int NOT NULL,
tags varchar(100)
);
create table bad_words (
words varchar(100) NOT NULL
);
insert into main values (1, 1001);
insert into main values (2, 1002);
insert into main values (3, 1003);
insert into main values (4, 1004);
insert into secondary values (1001, 'good word');
insert into secondary values (1002, 'bad word');
insert into secondary values (1002, 'good word');
insert into secondary values (1002, 'other word');
insert into secondary values (1003, 'ugly');
insert into secondary values (1003, 'bad word');
insert into secondary values (1004, 'pleasant');
insert into secondary values (1004, 'nice');
insert into bad_words values ('bad word');
insert into bad_words values ('ugly');
insert into bad_words values ('worst');
expected output
----------------
1, 1000, good word, 0 (boolean flag indicating whether the tags contain any one of the words from the bad_words table)
2, 1001, bad word,good word,other word , 1
3, 1002, ugly,bad word, 1
4, 1003, pleasant,nice, 0
我正在尝试使用 case to select 1 或 0 作为最后一列,并使用联接来联接主要和次要 table,但感到困惑和卡住。有人可以帮我查询吗?这些 table 存储在 redshift 中,我希望查询与 redshift 兼容。
您可以使用上述架构在 sqlfiddle
中尝试您的查询编辑:我现在通过删除辅助 table 中的 PRIMARY KEY
更新了模式和预期输出,以便更容易加入 bad_words table。
select main_id, a.secondary_id, tags, case when c.words is not null then 1 else 0 end
from main a
join secondary b on b.secondary_id = a.secondary_id
left outer join bad_words c on c.words like b.tags
您可以使用 EXISTS 和正则表达式比较 \m 和 \M(分别表示单词的开头和结尾的标记):
with
main(main_id, secondary_id) as (values (1, 1000), (2, 1001), (3, 1002), (4, 1003)),
secondary(secondary_id, tags) as (values (1000, 'very good words'), (1001, 'good and bad words'), (1002, 'ugly'),(1003, 'pleasant')),
bad_words(words) as (values ('bad'), ('ugly'), ('worst'))
select *, exists (select 1 from bad_words where s.tags ~* ('\m'||words||'\M'))::int as flag
from main m
join secondary s using (secondary_id)
SELECT m.main_id, m.secondary_id, t.tags, t.is_bad_word
FROM srini.main m
JOIN (
SELECT st.secondary_id, st.tags, exists (select 1 from srini.bad_words b where st.tags like '%'+b.words+'%') is_bad_word
FROM
( SELECT secondary_id, LISTAGG(tags, ',') as tags
FROM srini.secondary
GROUP BY secondary_id ) st
) t on t.secondary_id = m.secondary_id;
这在 redshift 中对我有用,并使用上述模式生成了以下输出。
1 1001 good word false
3 1003 ugly,bad word true
2 1002 good word,other word,bad word true
4 1004 pleasant,nice false