SQL BQ Return 用户评价最好和最差的电影
SQL BQ Return user's best and worst rated movie
我有以下格式的数据:
user_id
user_name
movie_rating
movie_name
1
x
1
asd1
1
x
3
asd2
1
x
5
asd3
1
x
5
asd4
2
y
2
asd4
2
y
3
asd5
2
y
4
asd6
我想以 best_rated_movie 和 worst_rated_movie 为数组的格式提取每个用户评价最好和最差的电影(因为用户可以有多部最差的电影等):
user_id
user_name
best_rated_movie
worst_rated_movie
1
x
(asd3,asd4)
asd1
2
y
asd6
asd4
我设法提取 table 输出我只有最好和最差评价的电影:
user
user_name
movie_rating
movie_name
1
x
1
asd1
1
x
5
asd3
1
x
5
asd5
2
y
2
asd4
2
y
4
asd6
这是我使用的代码:
with best_movie_rating as (
select
user_id,
max(movie_rating) as max_rating
from source_table
group by user_id
),
worst_movie_rating as (
select
user_id,
min(movie_rating) as min_rating
from source_table
group by user_id
),
columns_final as (
select
t1.user_id,
t1.user_name,
t1.company_name,
t1.movie_rating,
t1.movie_name
from source_table t1
inner join best_movie_rating t2
on t1.user_id = t2.user_id
inner join worst_movie_rating t3
on t1.user_id = t3.user_id
where (t1.movie_rating= t2.max_rating and t1.user_id = t2.user_id)
or (t1.movie_rating= t3.min_rating and t1.user_id = t3.user_id)
)
select * from columns_final
不幸的是我不知道如何前进,我尝试过聚合但没有成功(尤其是在使用数组的情况下)。至少我会很感激如何处理这个问题的建议。
您可以像下面这样使用 string_agg():
with columns_final as (
select
t1.user_id,
t1.user_name,
t1.movie_rating,
t1.movie_name ,
max (movie_rating)over (partition by user_id ) max_rating,
min (movie_rating)over (partition by user_id ) min_rating
from source_table t1
)
select user_id,user_name,string_agg(case when movie_rating=max_rating then movie_name end,',')best_rated_movie,
string_agg(case when movie_rating=min_rating then movie_name end,',')worst_rated_movie
from columns_final
where movie_rating =max_rating or movie_rating=min_rating
group by user_id,user_name
我还对您的查询进行了一些更改以使其紧凑。我使用 window 函数来计算最大值和最小值 movie_rating.
,而不是两个常见的 table 表达式
I want to extract the best and the worst rated movie per user in a format where best_rated_movie and worst_rated_movie is array (because user can have multiple worst movies etc)
考虑以下
select user_id, user_name,
array_concat_agg(movies order by movie_rating desc limit 1) best_rated_movie,
array_concat_agg(movies order by movie_rating limit 1) worst_rated_movie,
from (
select user_id, user_name, movie_rating, array_agg(movie_name) movies,
from `project.dataset.table` t
group by user_id, user_name, movie_rating
)
group by user_id, user_name
如果应用于您问题中的示例数据 - 输出为
如果(正如其他答案所暗示的那样)您希望结果是一串以逗号分隔的电影 - 您可以在下面使用
select user_id, user_name,
string_agg(movies order by movie_rating desc limit 1) best_rated_movie,
string_agg(movies order by movie_rating limit 1) worst_rated_movie,
from (
select user_id, user_name, movie_rating, string_agg(movie_name) movies,
from `project.dataset.table` t
group by user_id, user_name, movie_rating
)
group by user_id, user_name
有输出
我有以下格式的数据:
user_id | user_name | movie_rating | movie_name |
---|---|---|---|
1 | x | 1 | asd1 |
1 | x | 3 | asd2 |
1 | x | 5 | asd3 |
1 | x | 5 | asd4 |
2 | y | 2 | asd4 |
2 | y | 3 | asd5 |
2 | y | 4 | asd6 |
我想以 best_rated_movie 和 worst_rated_movie 为数组的格式提取每个用户评价最好和最差的电影(因为用户可以有多部最差的电影等):
user_id | user_name | best_rated_movie | worst_rated_movie |
---|---|---|---|
1 | x | (asd3,asd4) | asd1 |
2 | y | asd6 | asd4 |
我设法提取 table 输出我只有最好和最差评价的电影:
user | user_name | movie_rating | movie_name |
---|---|---|---|
1 | x | 1 | asd1 |
1 | x | 5 | asd3 |
1 | x | 5 | asd5 |
2 | y | 2 | asd4 |
2 | y | 4 | asd6 |
这是我使用的代码:
with best_movie_rating as (
select
user_id,
max(movie_rating) as max_rating
from source_table
group by user_id
),
worst_movie_rating as (
select
user_id,
min(movie_rating) as min_rating
from source_table
group by user_id
),
columns_final as (
select
t1.user_id,
t1.user_name,
t1.company_name,
t1.movie_rating,
t1.movie_name
from source_table t1
inner join best_movie_rating t2
on t1.user_id = t2.user_id
inner join worst_movie_rating t3
on t1.user_id = t3.user_id
where (t1.movie_rating= t2.max_rating and t1.user_id = t2.user_id)
or (t1.movie_rating= t3.min_rating and t1.user_id = t3.user_id)
)
select * from columns_final
不幸的是我不知道如何前进,我尝试过聚合但没有成功(尤其是在使用数组的情况下)。至少我会很感激如何处理这个问题的建议。
您可以像下面这样使用 string_agg():
with columns_final as (
select
t1.user_id,
t1.user_name,
t1.movie_rating,
t1.movie_name ,
max (movie_rating)over (partition by user_id ) max_rating,
min (movie_rating)over (partition by user_id ) min_rating
from source_table t1
)
select user_id,user_name,string_agg(case when movie_rating=max_rating then movie_name end,',')best_rated_movie,
string_agg(case when movie_rating=min_rating then movie_name end,',')worst_rated_movie
from columns_final
where movie_rating =max_rating or movie_rating=min_rating
group by user_id,user_name
我还对您的查询进行了一些更改以使其紧凑。我使用 window 函数来计算最大值和最小值 movie_rating.
,而不是两个常见的 table 表达式I want to extract the best and the worst rated movie per user in a format where best_rated_movie and worst_rated_movie is array (because user can have multiple worst movies etc)
考虑以下
select user_id, user_name,
array_concat_agg(movies order by movie_rating desc limit 1) best_rated_movie,
array_concat_agg(movies order by movie_rating limit 1) worst_rated_movie,
from (
select user_id, user_name, movie_rating, array_agg(movie_name) movies,
from `project.dataset.table` t
group by user_id, user_name, movie_rating
)
group by user_id, user_name
如果应用于您问题中的示例数据 - 输出为
如果(正如其他答案所暗示的那样)您希望结果是一串以逗号分隔的电影 - 您可以在下面使用
select user_id, user_name,
string_agg(movies order by movie_rating desc limit 1) best_rated_movie,
string_agg(movies order by movie_rating limit 1) worst_rated_movie,
from (
select user_id, user_name, movie_rating, string_agg(movie_name) movies,
from `project.dataset.table` t
group by user_id, user_name, movie_rating
)
group by user_id, user_name
有输出