如何在没有左连接的情况下获得 D1 D7 D30 table
How to get D1 D7 D30 without left join on the same table
Objective:
我想知道第 0 天注册的每个人,有多少人在 D1、D7 和 D30 之后登录。我想制作一个 table 值函数,用户可以在其中插入日期并获取 D0、D1、D7、D30 的结果。
结果应如下所示:
Date TotalD0 TotalD1 TotalD7 TotalD30
2019-04-01 3 3 2 1
情况:
我有一个登录名 table,使用电子邮件和 login_time。我在同一个 table 上加入了三次,它在一个虚拟 table 上工作。但是,当使用具有数百万行的真实数据时,它永远 运行。必须有一种更有效的方法来做到这一点。
我尝试了什么:
CREATE FUNCTION fnTestData
(
@StartDate AS Date
)
RETURNS TABLE
AS
RETURN
select @startdate,
COUNT(distinct t1.id) As TotalD0,
COUNT(distinct t1a.id) As TotalD1,
COUNT(distinct t1b.id) As TotalD7,
COUNT(distinct t1c.id) As TotalD30
from #test1 t1
left join #test1 t1a on t1.id=t1a.id and t1a.login_time >=
DATEADD(day,1,t1.login_time)
left join #test1 t1b on t1.id=t1b.id and t1b.login_time >=
DATEADD(day,7,t1.login_time)
left join #test1 t1c on t1.id=t1c.id and t1c.login_time >=
DATEADD(day,30,t1.login_time)
where t1.login_time = @startdate
group by t1.login_time
测试数据:
create table #test1 (id int, login_time date)
insert into #test1 values
(1, '2019-04-01'),
(1, '2019-04-01'),
(1, '2019-04-02'),
(1, '2019-04-19'),
(1, '2019-05-05'),
(2, '2019-04-01'),
(2, '2019-04-05'),
(2, '2019-04-10'),
(2, '2019-04-15'),
(3, '2019-04-01'),
(3, '2019-04-01'),
(3, '2019-04-02')
您的查询可以翻译成 GROUP BY
:
DECLARE @StartDate Date = '2019-04-01'
SELECT COUNT(DISTINCT id) D0
, COUNT(DISTINCT CASE WHEN login_time >= DATEADD(DAY, 1, @StartDate) THEN id END) AS D1
, COUNT(DISTINCT CASE WHEN login_time >= DATEADD(DAY, 7, @StartDate) THEN id END) AS D7
, COUNT(DISTINCT CASE WHEN login_time >= DATEADD(DAY, 30, @StartDate) THEN id END) AS D30
FROM #test1 AS t
WHERE login_time >= @StartDate
AND EXISTS (
SELECT 1
FROM #test1 AS x
WHERE x.id = t.id
AND x.login_time = @StartDate
)
D0 D1 D7 D30
3 3 2 1
您需要创建适当的索引来加快速度。
如果您想根据人们开始的日期进行同期群分析:
select first_ld,
count(*) as num_d0,
sum(case when login_date >= dateadd(day, 1, firstld) then 1 else 0 end) as num_d1,
sum(case when login_date >= dateadd(day, 7, firstld) then 1 else 0 end) as num_d7,
sum(case when login_date >= dateadd(day, 30, firstld) then 1 else 0 end) as num_d30
from (select id, convert(date, login_time) as login_date,
min(convert(date, login_time)) over (partition by id) as first_ld
from #test1 t
group by id, convert(date, login_time)
) t
group by first_ld
order by first_ld;
Objective:
我想知道第 0 天注册的每个人,有多少人在 D1、D7 和 D30 之后登录。我想制作一个 table 值函数,用户可以在其中插入日期并获取 D0、D1、D7、D30 的结果。 结果应如下所示:
Date TotalD0 TotalD1 TotalD7 TotalD30
2019-04-01 3 3 2 1
情况:
我有一个登录名 table,使用电子邮件和 login_time。我在同一个 table 上加入了三次,它在一个虚拟 table 上工作。但是,当使用具有数百万行的真实数据时,它永远 运行。必须有一种更有效的方法来做到这一点。
我尝试了什么:
CREATE FUNCTION fnTestData
(
@StartDate AS Date
)
RETURNS TABLE
AS
RETURN
select @startdate,
COUNT(distinct t1.id) As TotalD0,
COUNT(distinct t1a.id) As TotalD1,
COUNT(distinct t1b.id) As TotalD7,
COUNT(distinct t1c.id) As TotalD30
from #test1 t1
left join #test1 t1a on t1.id=t1a.id and t1a.login_time >=
DATEADD(day,1,t1.login_time)
left join #test1 t1b on t1.id=t1b.id and t1b.login_time >=
DATEADD(day,7,t1.login_time)
left join #test1 t1c on t1.id=t1c.id and t1c.login_time >=
DATEADD(day,30,t1.login_time)
where t1.login_time = @startdate
group by t1.login_time
测试数据:
create table #test1 (id int, login_time date)
insert into #test1 values
(1, '2019-04-01'),
(1, '2019-04-01'),
(1, '2019-04-02'),
(1, '2019-04-19'),
(1, '2019-05-05'),
(2, '2019-04-01'),
(2, '2019-04-05'),
(2, '2019-04-10'),
(2, '2019-04-15'),
(3, '2019-04-01'),
(3, '2019-04-01'),
(3, '2019-04-02')
您的查询可以翻译成 GROUP BY
:
DECLARE @StartDate Date = '2019-04-01'
SELECT COUNT(DISTINCT id) D0
, COUNT(DISTINCT CASE WHEN login_time >= DATEADD(DAY, 1, @StartDate) THEN id END) AS D1
, COUNT(DISTINCT CASE WHEN login_time >= DATEADD(DAY, 7, @StartDate) THEN id END) AS D7
, COUNT(DISTINCT CASE WHEN login_time >= DATEADD(DAY, 30, @StartDate) THEN id END) AS D30
FROM #test1 AS t
WHERE login_time >= @StartDate
AND EXISTS (
SELECT 1
FROM #test1 AS x
WHERE x.id = t.id
AND x.login_time = @StartDate
)
D0 D1 D7 D30
3 3 2 1
您需要创建适当的索引来加快速度。
如果您想根据人们开始的日期进行同期群分析:
select first_ld,
count(*) as num_d0,
sum(case when login_date >= dateadd(day, 1, firstld) then 1 else 0 end) as num_d1,
sum(case when login_date >= dateadd(day, 7, firstld) then 1 else 0 end) as num_d7,
sum(case when login_date >= dateadd(day, 30, firstld) then 1 else 0 end) as num_d30
from (select id, convert(date, login_time) as login_date,
min(convert(date, login_time)) over (partition by id) as first_ld
from #test1 t
group by id, convert(date, login_time)
) t
group by first_ld
order by first_ld;