如何使用 PostgreSQL 中的排名函数将在线订单与之前的几个网站访问相关联
How to associate an online order with several preceding website visits using a rank function in PostgreSQL
我正在努力创建一个数据库视图,将在线订单与用户之前访问网站相关联。这是一个电子商务网站,因此一个用户可能会多次访问和订购。
我已经在 user_id 加入了访问 table 和订单 table,并将最近的小于会话时间与订单时间相关联。现在,我希望说每次访问直到订单 #1 为“1”,然后在访问之后直到订单 #2 为“2”。此外,如果该特定用户没有 order_id,我想 return“0”。请参阅下面链接的屏幕截图以供参考。
我已经尝试使用 dense_rank,但它只对存在 order_id 的行进行排名。我要把这些队伍发扬光大
SELECT v.id AS visit_id,
v.user_id,
v.started_at AS visit_date,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY v.started_at) AS visit_number,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY o.id) AS order_number,
o.id AS order_id,
o.created_at AS order_date
FROM visits v
FULL JOIN orders o ON v.user_id = o.user_id AND v.started_at < o.created_at AND o.created_at < (( SELECT min(visits.started_at) AS min
FROM visits
WHERE visits.user_id = v.user_id AND visits.started_at > v.started_at)) AND (v.started_at + '24:00:00'::interval) > o.created_at
GROUP BY v.id, v.user_id, v.started_at, o.id, o.created_at
ORDER BY v.started_at;
使用 lag
检查前一行是否为非空行,以便将其标记为新的组开始。设置标志后,您可以使用 运行 总和来定义组。
SELECT T.*,
1+SUM(FLAG) OVER(PARTITION BY user_id ORDER BY visit_date) AS order_number
FROM (
SELECT v.id AS visit_id,
v.user_id,
v.started_at AS visit_date,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY v.started_at) AS visit_number,
o.id AS order_id,
o.created_at AS order_date,
--conditioncheck with lag
case when lag(o.id) over(partition by v.user_id order by v.started_at) is not null then 1 else 0 end as flag
FROM visits v
FULL JOIN orders o ON v.user_id = o.user_id AND v.started_at < o.created_at AND o.created_at < (( SELECT min(visits.started_at) AS min
FROM visits
WHERE visits.user_id = v.user_id AND visits.started_at > v.started_at)) AND (v.started_at + '24:00:00'::interval) > o.created_at
) T
GROUP BY
似乎没有必要,但我会留下它。你基本上需要一个累计金额。
我会将特定订单之前的所有访问分配给订单号:
SELECT v.id AS visit_id, v.user_id,
v.started_at AS visit_date,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY v.started_at) AS visit_number,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY o.id) AS order_number,
o.id AS order_id,
o.created_at AS order_date,
count(o.id) over (partition by v.user_id order by v.started_at) as order_number
FROM visits v FULL JOIN
orders o
ON v.user_id = o.user_id AND
v.started_at < o.created_at AND
o.created_at < (SELECT min(visits.started_at)
FROM visits v2
WHERE v2.user_id = v.user_id AND
v2.started_at > v.started_at) AND
(v.started_at + '24:00:00'::interval) > o.created_at
GROUP BY v.id, v.user_id, v.started_at, o.id, o.created_at
ORDER BY v.started_at;
我想这就是你想要的逻辑:
SELECT v.id AS visit_id, v.user_id,
v.started_at AS visit_date,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY v.started_at) AS visit_number,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY o.id) AS order_number,
o.id AS order_id,
o.created_at AS order_date,
MIN(o.order_number) OVER (PARTITION BY v.user_id ORDER BY v.started_at DESC) as order_number
FROM visits v FULL JOIN
(SELECT o.*,
ROW_NUMBER() OVER (PARTITION BY o.user_id ORDER BY o.id) as order_number
FROM orders o
) o
ON v.user_id = o.user_id AND
v.started_at < o.created_at AND
o.created_at < (SELECT min(visits.started_at)
FROM visits v2
WHERE v2.user_id = v.user_id AND
v2.started_at > v.started_at) AND
(v.started_at + '24:00:00'::interval) > o.created_at
GROUP BY v.id, v.user_id, v.started_at, o.id, o.created_at
ORDER BY v.started_at;
它可能会在您想要 0
的地方生成 NULL
,但是。
我正在努力创建一个数据库视图,将在线订单与用户之前访问网站相关联。这是一个电子商务网站,因此一个用户可能会多次访问和订购。
我已经在 user_id 加入了访问 table 和订单 table,并将最近的小于会话时间与订单时间相关联。现在,我希望说每次访问直到订单 #1 为“1”,然后在访问之后直到订单 #2 为“2”。此外,如果该特定用户没有 order_id,我想 return“0”。请参阅下面链接的屏幕截图以供参考。
我已经尝试使用 dense_rank,但它只对存在 order_id 的行进行排名。我要把这些队伍发扬光大
SELECT v.id AS visit_id,
v.user_id,
v.started_at AS visit_date,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY v.started_at) AS visit_number,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY o.id) AS order_number,
o.id AS order_id,
o.created_at AS order_date
FROM visits v
FULL JOIN orders o ON v.user_id = o.user_id AND v.started_at < o.created_at AND o.created_at < (( SELECT min(visits.started_at) AS min
FROM visits
WHERE visits.user_id = v.user_id AND visits.started_at > v.started_at)) AND (v.started_at + '24:00:00'::interval) > o.created_at
GROUP BY v.id, v.user_id, v.started_at, o.id, o.created_at
ORDER BY v.started_at;
使用 lag
检查前一行是否为非空行,以便将其标记为新的组开始。设置标志后,您可以使用 运行 总和来定义组。
SELECT T.*,
1+SUM(FLAG) OVER(PARTITION BY user_id ORDER BY visit_date) AS order_number
FROM (
SELECT v.id AS visit_id,
v.user_id,
v.started_at AS visit_date,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY v.started_at) AS visit_number,
o.id AS order_id,
o.created_at AS order_date,
--conditioncheck with lag
case when lag(o.id) over(partition by v.user_id order by v.started_at) is not null then 1 else 0 end as flag
FROM visits v
FULL JOIN orders o ON v.user_id = o.user_id AND v.started_at < o.created_at AND o.created_at < (( SELECT min(visits.started_at) AS min
FROM visits
WHERE visits.user_id = v.user_id AND visits.started_at > v.started_at)) AND (v.started_at + '24:00:00'::interval) > o.created_at
) T
GROUP BY
似乎没有必要,但我会留下它。你基本上需要一个累计金额。
我会将特定订单之前的所有访问分配给订单号:
SELECT v.id AS visit_id, v.user_id,
v.started_at AS visit_date,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY v.started_at) AS visit_number,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY o.id) AS order_number,
o.id AS order_id,
o.created_at AS order_date,
count(o.id) over (partition by v.user_id order by v.started_at) as order_number
FROM visits v FULL JOIN
orders o
ON v.user_id = o.user_id AND
v.started_at < o.created_at AND
o.created_at < (SELECT min(visits.started_at)
FROM visits v2
WHERE v2.user_id = v.user_id AND
v2.started_at > v.started_at) AND
(v.started_at + '24:00:00'::interval) > o.created_at
GROUP BY v.id, v.user_id, v.started_at, o.id, o.created_at
ORDER BY v.started_at;
我想这就是你想要的逻辑:
SELECT v.id AS visit_id, v.user_id,
v.started_at AS visit_date,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY v.started_at) AS visit_number,
dense_rank() OVER (PARTITION BY v.user_id ORDER BY o.id) AS order_number,
o.id AS order_id,
o.created_at AS order_date,
MIN(o.order_number) OVER (PARTITION BY v.user_id ORDER BY v.started_at DESC) as order_number
FROM visits v FULL JOIN
(SELECT o.*,
ROW_NUMBER() OVER (PARTITION BY o.user_id ORDER BY o.id) as order_number
FROM orders o
) o
ON v.user_id = o.user_id AND
v.started_at < o.created_at AND
o.created_at < (SELECT min(visits.started_at)
FROM visits v2
WHERE v2.user_id = v.user_id AND
v2.started_at > v.started_at) AND
(v.started_at + '24:00:00'::interval) > o.created_at
GROUP BY v.id, v.user_id, v.started_at, o.id, o.created_at
ORDER BY v.started_at;
它可能会在您想要 0
的地方生成 NULL
,但是。