根据玩过的游戏创建留存 table
create a retention table base on games played
情况:
我有一个包含三列的 table:
- 日期
- 游戏
- user_id
从那个 table 我需要创建一个最终看起来像这样的保留 table:
输出:
+------------+-------------+------+-------------+------+------+-------+
| date | game | day0 | day1 | day3 | day7 | day10 |
+------------+-------------+------+-------------+------+------+-------+
| 2019-01-01 | fifa | 100 | % of day 0 | | | |
| 2019-01-01 | nba | 100 | | | | |
| 2019-01-01 | battlefield | 100 | | | | |
| 2019-01-02 | fifa | 100 | | | | |
| 2019-01-02 | battlefield | 100 | | | | |
| 2019-01-03 | fifa | 100 | | | | |
| 2019-01-03 | nba | 100 | | | | |
| ... | | | | | | |
+------------+-------------+------+-------------+------+------+-------+
如果至少有一个人玩过游戏,第 0 天就是 100
day1 是 1 天或 2 天后回来的不同用户的比例
day3 是 3 到 6 天后回来的不同用户的比例。
等等...
如果你绘制数字,它应该给你一个非线性递减曲线。
逻辑:
(见下面的示例数据)
到目前为止我构建的骨架如下所示:
;WITH
baseDate AS (
SELECT
'target_date' AS [key_name]
,CAST('2019-01-01' AS date) AS [key_value]
),
durationDate AS (
SELECT DATEADD(DAY,0,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,1,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,2,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,3,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,4,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,5,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,6,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
)
select
dd.target_date
, play.game
, play.[count]/play.count *100 as day0
from durationDate as dd
left join (
select t1.date, t1.game, COUNT(distinct t1.user_id) as [count]
from #t1 t1
group by t1.date, t1.game
) as play on dd.target_date=play.[date]
我可能缺少第 1 天、第 3 天、第 7 天列的另一个基础 table,例如:
durationDateColumn AS (
SELECT
DATEADD(DAY,0,key_value) AS day1
, DATEADD(DAY,1,key_value) AS day2
, DATEADD(DAY,2,key_value) AS day3
, DATEADD(DAY,3,key_value) AS day4
, DATEADD(DAY,4,key_value) AS day5
, DATEADD(DAY,5,key_value) AS day6
, DATEADD(DAY,6,key_value) AS day7
FROM baseDate
WHERE [key_name] = 'target_date'
)
除非有更有效的方法。
示例数据:
drop table if exists #t1
create table #t1 ([date] date, [game] varchar (20),[user_id] bigint)
insert into #t1 values
('2019-01-01', 'fifa',11),
('2019-01-01', 'fifa',12),
('2019-01-01', 'fifa',13),
('2019-01-01', 'fifa',14),
('2019-01-02', 'fifa',12),
('2019-01-02', 'fifa',13),
('2019-01-02', 'fifa',14),
('2019-01-04', 'fifa',12),
('2019-01-04', 'fifa',13),
('2019-01-08', 'fifa',13),
('2019-01-01', 'nba',11),
('2019-01-01', 'nba',13),
('2019-01-01', 'nba',14),
('2019-01-02', 'nba',13),
('2019-01-02', 'nba',14),
('2019-01-04', 'nba',13),
('2019-01-08', 'nba',13)
加入日期和后续日期数据,计算天数和用户数。使用条件聚合按天数进行透视。
create table #t1 ([date] date, [game] varchar (20),[user_id] bigint);
insert into #t1 values
('2019-01-01', 'fifa',11),
('2019-01-01', 'fifa',12),
('2019-01-01', 'fifa',13),
('2019-01-01', 'fifa',14),
('2019-01-02', 'fifa',12),
('2019-01-02', 'fifa',13),
('2019-01-02', 'fifa',14),
('2019-01-04', 'fifa',12),
('2019-01-04', 'fifa',13),
('2019-01-08', 'fifa',13),
('2019-01-01', 'nba',11),
('2019-01-01', 'nba',13),
('2019-01-01', 'nba',14),
('2019-01-02', 'nba',13),
('2019-01-02', 'nba',14),
('2019-01-04', 'nba',13),
('2019-01-08', 'nba',13);
select [date], [game]
, 100.0 day0
, cast(100.0 * (0.0 + sum(case ret when 1 then n else 0 end)) / sum(case ret when 0 then n end) as decimal(5,2)) day1
, cast(100.0 * (0.0 + sum(case ret when 2 then n else 0 end)) / sum(case ret when 0 then n end) as decimal(5,2)) day2
, cast(100.0 * (0.0 + sum(case ret when 3 then n else 0 end)) / sum(case ret when 0 then n end) as decimal(5,2)) day3
, cast(100.0 * (0.0 + sum(case ret when 4 then n else 0 end)) / sum(case ret when 0 then n end) as decimal(5,2)) day4
-- .. more days
from (
select t1.[date], t1.[game], Datediff(dd, t1.[Date], t2.[Date]) ret, count(distinct t1.[user_id]) n
from #t1 t1
left join #t1 t2
on t1.game = t2.game and t1.user_id = t2.user_id and t1.date <= t2.date
group by t1.[date], t1.[game], Datediff(dd, t1.[Date], t2.[Date])
) evt
group by [date], [game]
order by [date], [game];
输出
date game day0 day1 day2 day3 day4
2019-01-01 fifa 100.0 75.00 0.00 50.00 0.00
2019-01-01 nba 100.0 66.67 0.00 33.33 0.00
2019-01-02 fifa 100.0 0.00 66.67 0.00 0.00
2019-01-02 nba 100.0 0.00 50.00 0.00 0.00
2019-01-04 fifa 100.0 0.00 0.00 0.00 50.00
2019-01-04 nba 100.0 0.00 0.00 0.00 100.00
2019-01-08 fifa 100.0 0.00 0.00 0.00 0.00
2019-01-08 nba 100.0 0.00 0.00 0.00 0.00
情况:
我有一个包含三列的 table:
- 日期
- 游戏
- user_id
从那个 table 我需要创建一个最终看起来像这样的保留 table:
输出:
+------------+-------------+------+-------------+------+------+-------+
| date | game | day0 | day1 | day3 | day7 | day10 |
+------------+-------------+------+-------------+------+------+-------+
| 2019-01-01 | fifa | 100 | % of day 0 | | | |
| 2019-01-01 | nba | 100 | | | | |
| 2019-01-01 | battlefield | 100 | | | | |
| 2019-01-02 | fifa | 100 | | | | |
| 2019-01-02 | battlefield | 100 | | | | |
| 2019-01-03 | fifa | 100 | | | | |
| 2019-01-03 | nba | 100 | | | | |
| ... | | | | | | |
+------------+-------------+------+-------------+------+------+-------+
如果至少有一个人玩过游戏,第 0 天就是 100
day1 是 1 天或 2 天后回来的不同用户的比例
day3 是 3 到 6 天后回来的不同用户的比例。
等等...
如果你绘制数字,它应该给你一个非线性递减曲线。
逻辑:
(见下面的示例数据)
到目前为止我构建的骨架如下所示:
;WITH
baseDate AS (
SELECT
'target_date' AS [key_name]
,CAST('2019-01-01' AS date) AS [key_value]
),
durationDate AS (
SELECT DATEADD(DAY,0,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,1,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,2,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,3,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,4,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,5,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
UNION ALL
SELECT DATEADD(DAY,6,key_value) AS target_date FROM baseDate WHERE [key_name] = 'target_date'
)
select
dd.target_date
, play.game
, play.[count]/play.count *100 as day0
from durationDate as dd
left join (
select t1.date, t1.game, COUNT(distinct t1.user_id) as [count]
from #t1 t1
group by t1.date, t1.game
) as play on dd.target_date=play.[date]
我可能缺少第 1 天、第 3 天、第 7 天列的另一个基础 table,例如:
durationDateColumn AS (
SELECT
DATEADD(DAY,0,key_value) AS day1
, DATEADD(DAY,1,key_value) AS day2
, DATEADD(DAY,2,key_value) AS day3
, DATEADD(DAY,3,key_value) AS day4
, DATEADD(DAY,4,key_value) AS day5
, DATEADD(DAY,5,key_value) AS day6
, DATEADD(DAY,6,key_value) AS day7
FROM baseDate
WHERE [key_name] = 'target_date'
)
除非有更有效的方法。
示例数据:
drop table if exists #t1
create table #t1 ([date] date, [game] varchar (20),[user_id] bigint)
insert into #t1 values
('2019-01-01', 'fifa',11),
('2019-01-01', 'fifa',12),
('2019-01-01', 'fifa',13),
('2019-01-01', 'fifa',14),
('2019-01-02', 'fifa',12),
('2019-01-02', 'fifa',13),
('2019-01-02', 'fifa',14),
('2019-01-04', 'fifa',12),
('2019-01-04', 'fifa',13),
('2019-01-08', 'fifa',13),
('2019-01-01', 'nba',11),
('2019-01-01', 'nba',13),
('2019-01-01', 'nba',14),
('2019-01-02', 'nba',13),
('2019-01-02', 'nba',14),
('2019-01-04', 'nba',13),
('2019-01-08', 'nba',13)
加入日期和后续日期数据,计算天数和用户数。使用条件聚合按天数进行透视。
create table #t1 ([date] date, [game] varchar (20),[user_id] bigint);
insert into #t1 values
('2019-01-01', 'fifa',11),
('2019-01-01', 'fifa',12),
('2019-01-01', 'fifa',13),
('2019-01-01', 'fifa',14),
('2019-01-02', 'fifa',12),
('2019-01-02', 'fifa',13),
('2019-01-02', 'fifa',14),
('2019-01-04', 'fifa',12),
('2019-01-04', 'fifa',13),
('2019-01-08', 'fifa',13),
('2019-01-01', 'nba',11),
('2019-01-01', 'nba',13),
('2019-01-01', 'nba',14),
('2019-01-02', 'nba',13),
('2019-01-02', 'nba',14),
('2019-01-04', 'nba',13),
('2019-01-08', 'nba',13);
select [date], [game]
, 100.0 day0
, cast(100.0 * (0.0 + sum(case ret when 1 then n else 0 end)) / sum(case ret when 0 then n end) as decimal(5,2)) day1
, cast(100.0 * (0.0 + sum(case ret when 2 then n else 0 end)) / sum(case ret when 0 then n end) as decimal(5,2)) day2
, cast(100.0 * (0.0 + sum(case ret when 3 then n else 0 end)) / sum(case ret when 0 then n end) as decimal(5,2)) day3
, cast(100.0 * (0.0 + sum(case ret when 4 then n else 0 end)) / sum(case ret when 0 then n end) as decimal(5,2)) day4
-- .. more days
from (
select t1.[date], t1.[game], Datediff(dd, t1.[Date], t2.[Date]) ret, count(distinct t1.[user_id]) n
from #t1 t1
left join #t1 t2
on t1.game = t2.game and t1.user_id = t2.user_id and t1.date <= t2.date
group by t1.[date], t1.[game], Datediff(dd, t1.[Date], t2.[Date])
) evt
group by [date], [game]
order by [date], [game];
输出
date game day0 day1 day2 day3 day4
2019-01-01 fifa 100.0 75.00 0.00 50.00 0.00
2019-01-01 nba 100.0 66.67 0.00 33.33 0.00
2019-01-02 fifa 100.0 0.00 66.67 0.00 0.00
2019-01-02 nba 100.0 0.00 50.00 0.00 0.00
2019-01-04 fifa 100.0 0.00 0.00 0.00 50.00
2019-01-04 nba 100.0 0.00 0.00 0.00 100.00
2019-01-08 fifa 100.0 0.00 0.00 0.00 0.00
2019-01-08 nba 100.0 0.00 0.00 0.00 0.00