BigQuery:如何根据重复项构建新行
BigQuery : how to build a new row based on duplicates
在 BigQuery 中,我得到了以下类型的数据:
#standardSQL
WITH name_table AS (
SELECT 'a' id, 1 hitnumber, 'alpha' page UNION ALL
SELECT 'a', 2, 'beta' UNION ALL
SELECT 'a', 3, 'beta' UNION ALL
SELECT 'a', 4, 'alpha' UNION ALL
SELECT 'a', 5, 'beta' UNION ALL
SELECT 'b', 1, 'gamma' UNION ALL
SELECT 'b', 2, 'gamma'
)
SELECT *
FROM name_table
Output :
id| hitnumber| page
a | 1 | alpha
a | 2 | beta
a | 3 | beta
a | 4 | alpha
a | 5 | beta
b | 1 | gamma
b | 2 | gamma
我想根据下面的重复页面构建一个新的点击率。
id| hitnumber| page | new_hitnumber
a | 1 | alpha| 1
a | 2 | beta | 2
a | 3 | beta | 2
a | 4 | alpha| 3
a | 5 | beta | 4
b | 1 | gamma| 1
b | 2 | gamma| 1
或者如果可以直接去重得到:
id| page | new_hitnumber
a | alpha| 1
a | beta | 2
a | alpha| 3
a | beta | 4
b | gamma| 1
我尝试使用 ROW_NUMBER()
或 RANK()
但没有成功。
非常感谢您的帮助。
阿尔诺
want to build a new hitnumber based on duplicate pages as below.
使用以下方法
select * except(new_group),
countif(ifnull(new_group, true)) over(partition by id order by hitnumber) new_hitnumber
from (
select *,
page != lag(page) over(partition by id order by hitnumber) new_group
from name_table
)
# order by id, hitnumber
如果应用于您问题中的示例数据 - 输出为
Or if it's possible to deduplicate directly
select * except(hitnumber) from (
select * except(new_group),
countif(ifnull(new_group, true)) over(partition by id order by hitnumber) new_hitnumber
from (
select *,
page != lag(page) over(partition by id order by hitnumber) new_group
from name_table
)
)
where true
qualify row_number() over(partition by id, page, new_hitnumber order by hitnumber) = 1
order by id, new_hitnumber
在这种情况下 - 输出是
在 BigQuery 中,我得到了以下类型的数据:
#standardSQL
WITH name_table AS (
SELECT 'a' id, 1 hitnumber, 'alpha' page UNION ALL
SELECT 'a', 2, 'beta' UNION ALL
SELECT 'a', 3, 'beta' UNION ALL
SELECT 'a', 4, 'alpha' UNION ALL
SELECT 'a', 5, 'beta' UNION ALL
SELECT 'b', 1, 'gamma' UNION ALL
SELECT 'b', 2, 'gamma'
)
SELECT *
FROM name_table
Output :
id| hitnumber| page
a | 1 | alpha
a | 2 | beta
a | 3 | beta
a | 4 | alpha
a | 5 | beta
b | 1 | gamma
b | 2 | gamma
我想根据下面的重复页面构建一个新的点击率。
id| hitnumber| page | new_hitnumber
a | 1 | alpha| 1
a | 2 | beta | 2
a | 3 | beta | 2
a | 4 | alpha| 3
a | 5 | beta | 4
b | 1 | gamma| 1
b | 2 | gamma| 1
或者如果可以直接去重得到:
id| page | new_hitnumber
a | alpha| 1
a | beta | 2
a | alpha| 3
a | beta | 4
b | gamma| 1
我尝试使用 ROW_NUMBER()
或 RANK()
但没有成功。
非常感谢您的帮助。
阿尔诺
want to build a new hitnumber based on duplicate pages as below.
使用以下方法
select * except(new_group),
countif(ifnull(new_group, true)) over(partition by id order by hitnumber) new_hitnumber
from (
select *,
page != lag(page) over(partition by id order by hitnumber) new_group
from name_table
)
# order by id, hitnumber
如果应用于您问题中的示例数据 - 输出为
Or if it's possible to deduplicate directly
select * except(hitnumber) from (
select * except(new_group),
countif(ifnull(new_group, true)) over(partition by id order by hitnumber) new_hitnumber
from (
select *,
page != lag(page) over(partition by id order by hitnumber) new_group
from name_table
)
)
where true
qualify row_number() over(partition by id, page, new_hitnumber order by hitnumber) = 1
order by id, new_hitnumber
在这种情况下 - 输出是