SQL 查询具有特定条件的最长连胜
SQL query for longest streak with a specific condition
我有一个 table,看起来像这样:(没有每天的记录,也不是每个 field_name)
date
field_name
field_value
2021-01-01
coffee_available
1
2021-01-02
coffee_available
1
2021-01-03
tea_available
0
2021-01-03
coffee_available
1
2021-01-04
tea_available
0
2021-01-06
coffee_available
0
2021-01-07
coffee_available
0
2021-01-08
coffee_available
1
2021-01-08
tea_available
1
我想查询数据库的特定条件,例如。有咖啡可用的最长连续天数或没有茶可用的最长连续天数。所以结果应该是一个数字,或者 - 甚至更好 - 最长的带开始或结束日期的条纹。
我查看了其他连胜问题,但我不知道如何更改它以适合我的问题。你能帮帮我吗?
您可以使用以下方法,该方法使用 ROW_NUMBER
确定特定条件下的连续日期组,并使用 DATEDIFF
确定连续的天数,并且每个连续天数中的天数仅相差 1。 COUNT
调整为 1 也可用于确定条纹,如下所示。我包括了一个整体查询和特定于问题中共享的场景问题的其他查询。
查询#1
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1,
COUNT(1) - 1 AS day_streak_method_2,
t1.field_name,
t1.field_value
FROM
(
SELECT
DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum
ORDER BY
`DATE`)) AS day_diff,
t.*
FROM
(
SELECT
*,
(
ROW_NUMBER() OVER (PARTITION BY `field_name`
ORDER BY
`DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value`
ORDER BY
`DATE`, `field_name`)
)
AS gnum
FROM
conditions
)
t
)
t1
WHERE
day_diff < 2
GROUP BY
gnum,
field_name,
field_value;
MIN(DATE
)
MAX(DATE
)
day_streak_method_1
day_streak_method_2
field_name
field_value
2021-01-01 00:00:00
2021-01-03 00:00:00
2
2
coffee_available
1
2021-01-03 00:00:00
2021-01-04 00:00:00
1
1
tea_available
0
2021-01-08 00:00:00
2021-01-08 00:00:00
0
0
coffee_available
1
2021-01-08 00:00:00
2021-01-08 00:00:00
0
0
tea_available
1
2021-01-06 00:00:00
2021-01-07 00:00:00
1
1
coffee_available
0
查询#2
SELECT 'The longest streak of days with coffee available' as `Question 1`;
Question 1
The longest streak of days with coffee available
查询#3
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1
FROM
(
SELECT
DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum
ORDER BY
`DATE`)) AS day_diff,
t.*
FROM
(
SELECT
*,
(
ROW_NUMBER() OVER (PARTITION BY `field_name`
ORDER BY
`DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value`
ORDER BY
`DATE`, `field_name`)
)
AS gnum
FROM
conditions
WHERE
field_name='coffee_available' AND
field_value=1
)
t
)
t1
WHERE
day_diff < 2
GROUP BY
gnum,
field_name,
field_value
ORDER BY
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) DESC
LIMIT 1;
MIN(DATE
)
MAX(DATE
)
day_streak_method_1
2021-01-01 00:00:00
2021-01-03 00:00:00
2
查询#4
SELECT 'The longest streak of days with no tea available' as `Question 2`;
Question 2
The longest streak of days with no tea available
查询#5
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1
FROM
(
SELECT
DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum
ORDER BY
`DATE`)) AS day_diff,
t.*
FROM
(
SELECT
*,
(
ROW_NUMBER() OVER (PARTITION BY `field_name`
ORDER BY
`DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value`
ORDER BY
`DATE`, `field_name`)
)
AS gnum
FROM
conditions
WHERE
field_name='tea_available' AND
field_value=0
)
t
)
t1
WHERE
day_diff < 2
GROUP BY
gnum,
field_name,
field_value
ORDER BY
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) DESC
LIMIT 1;
MIN(DATE
)
MAX(DATE
)
day_streak_method_1
2021-01-03 00:00:00
2021-01-04 00:00:00
1
编辑 1
在 case 表达式的帮助下减少了子查询的使用。 根据需要在 where 子句中应用其他过滤器。
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1,
field_name,
field_value
FROM (
SELECT
*,
CASE
WHEN DATEDIFF(
`date`,
LAG(`date`,1,`date`) OVER (
PARTITION BY `field_name`, `field_value`
ORDER BY `date`
)
) < 2 THEN (
ROW_NUMBER() OVER (
PARTITION BY `field_name`
ORDER BY `date`, `field_name`
) -
ROW_NUMBER() OVER (
PARTITION BY `field_name`, `field_value`
ORDER BY `date`, `field_name`
)
)
ELSE NULL
END as gnum
FROM
conditions
) t
WHERE
gnum IS NOT NULL
GROUP BY
gnum, field_name, field_value
使用旧版本 mysql
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak,
field_name,
field_value
FROM (
SELECT
*,
@seq1:=IF(@seq1_prev = `field_name`,@seq1+1,1),
@seq2:=IF(@seq2_prev = CONCAT(`field_name`, `field_value`),@seq2+1,1),
CASE
WHEN DATEDIFF(
`date`,
IF(
@seq2_prev=CONCAT(`field_name`, `field_value`),
@date_prev,
`date`
)
) < 2 THEN @seq1 - @seq2
ELSE NULL
END as gnum,
@seq1_prev:=`field_name`,
@seq2_prev:=CONCAT(`field_name`, `field_value`),
@date_prev:=`date`
FROM
conditions
CROSS JOIN (
SELECT
@seq1:=0,
@seq1_prev:=NULL,
@seq2:=0,
@seq2_prev:=NULL,
@date_prev:=NULL
) as vars
ORDER BY `date`, `field_name`
) t
GROUP BY
gnum, field_name, field_value
View working demo on db fiddle
让我知道这是否适合你
我有一个 table,看起来像这样:(没有每天的记录,也不是每个 field_name)
date | field_name | field_value |
---|---|---|
2021-01-01 | coffee_available | 1 |
2021-01-02 | coffee_available | 1 |
2021-01-03 | tea_available | 0 |
2021-01-03 | coffee_available | 1 |
2021-01-04 | tea_available | 0 |
2021-01-06 | coffee_available | 0 |
2021-01-07 | coffee_available | 0 |
2021-01-08 | coffee_available | 1 |
2021-01-08 | tea_available | 1 |
我想查询数据库的特定条件,例如。有咖啡可用的最长连续天数或没有茶可用的最长连续天数。所以结果应该是一个数字,或者 - 甚至更好 - 最长的带开始或结束日期的条纹。
我查看了其他连胜问题,但我不知道如何更改它以适合我的问题。你能帮帮我吗?
您可以使用以下方法,该方法使用 ROW_NUMBER
确定特定条件下的连续日期组,并使用 DATEDIFF
确定连续的天数,并且每个连续天数中的天数仅相差 1。 COUNT
调整为 1 也可用于确定条纹,如下所示。我包括了一个整体查询和特定于问题中共享的场景问题的其他查询。
查询#1
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1,
COUNT(1) - 1 AS day_streak_method_2,
t1.field_name,
t1.field_value
FROM
(
SELECT
DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum
ORDER BY
`DATE`)) AS day_diff,
t.*
FROM
(
SELECT
*,
(
ROW_NUMBER() OVER (PARTITION BY `field_name`
ORDER BY
`DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value`
ORDER BY
`DATE`, `field_name`)
)
AS gnum
FROM
conditions
)
t
)
t1
WHERE
day_diff < 2
GROUP BY
gnum,
field_name,
field_value;
MIN(DATE ) |
MAX(DATE ) |
day_streak_method_1 | day_streak_method_2 | field_name | field_value |
---|---|---|---|---|---|
2021-01-01 00:00:00 | 2021-01-03 00:00:00 | 2 | 2 | coffee_available | 1 |
2021-01-03 00:00:00 | 2021-01-04 00:00:00 | 1 | 1 | tea_available | 0 |
2021-01-08 00:00:00 | 2021-01-08 00:00:00 | 0 | 0 | coffee_available | 1 |
2021-01-08 00:00:00 | 2021-01-08 00:00:00 | 0 | 0 | tea_available | 1 |
2021-01-06 00:00:00 | 2021-01-07 00:00:00 | 1 | 1 | coffee_available | 0 |
查询#2
SELECT 'The longest streak of days with coffee available' as `Question 1`;
Question 1 |
---|
The longest streak of days with coffee available |
查询#3
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1
FROM
(
SELECT
DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum
ORDER BY
`DATE`)) AS day_diff,
t.*
FROM
(
SELECT
*,
(
ROW_NUMBER() OVER (PARTITION BY `field_name`
ORDER BY
`DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value`
ORDER BY
`DATE`, `field_name`)
)
AS gnum
FROM
conditions
WHERE
field_name='coffee_available' AND
field_value=1
)
t
)
t1
WHERE
day_diff < 2
GROUP BY
gnum,
field_name,
field_value
ORDER BY
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) DESC
LIMIT 1;
MIN(DATE ) |
MAX(DATE ) |
day_streak_method_1 |
---|---|---|
2021-01-01 00:00:00 | 2021-01-03 00:00:00 | 2 |
查询#4
SELECT 'The longest streak of days with no tea available' as `Question 2`;
Question 2 |
---|
The longest streak of days with no tea available |
查询#5
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1
FROM
(
SELECT
DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum
ORDER BY
`DATE`)) AS day_diff,
t.*
FROM
(
SELECT
*,
(
ROW_NUMBER() OVER (PARTITION BY `field_name`
ORDER BY
`DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value`
ORDER BY
`DATE`, `field_name`)
)
AS gnum
FROM
conditions
WHERE
field_name='tea_available' AND
field_value=0
)
t
)
t1
WHERE
day_diff < 2
GROUP BY
gnum,
field_name,
field_value
ORDER BY
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) DESC
LIMIT 1;
MIN(DATE ) |
MAX(DATE ) |
day_streak_method_1 |
---|---|---|
2021-01-03 00:00:00 | 2021-01-04 00:00:00 | 1 |
编辑 1
在 case 表达式的帮助下减少了子查询的使用。 根据需要在 where 子句中应用其他过滤器。
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1,
field_name,
field_value
FROM (
SELECT
*,
CASE
WHEN DATEDIFF(
`date`,
LAG(`date`,1,`date`) OVER (
PARTITION BY `field_name`, `field_value`
ORDER BY `date`
)
) < 2 THEN (
ROW_NUMBER() OVER (
PARTITION BY `field_name`
ORDER BY `date`, `field_name`
) -
ROW_NUMBER() OVER (
PARTITION BY `field_name`, `field_value`
ORDER BY `date`, `field_name`
)
)
ELSE NULL
END as gnum
FROM
conditions
) t
WHERE
gnum IS NOT NULL
GROUP BY
gnum, field_name, field_value
使用旧版本 mysql
SELECT
MIN(`DATE`),
MAX(`DATE`),
DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak,
field_name,
field_value
FROM (
SELECT
*,
@seq1:=IF(@seq1_prev = `field_name`,@seq1+1,1),
@seq2:=IF(@seq2_prev = CONCAT(`field_name`, `field_value`),@seq2+1,1),
CASE
WHEN DATEDIFF(
`date`,
IF(
@seq2_prev=CONCAT(`field_name`, `field_value`),
@date_prev,
`date`
)
) < 2 THEN @seq1 - @seq2
ELSE NULL
END as gnum,
@seq1_prev:=`field_name`,
@seq2_prev:=CONCAT(`field_name`, `field_value`),
@date_prev:=`date`
FROM
conditions
CROSS JOIN (
SELECT
@seq1:=0,
@seq1_prev:=NULL,
@seq2:=0,
@seq2_prev:=NULL,
@date_prev:=NULL
) as vars
ORDER BY `date`, `field_name`
) t
GROUP BY
gnum, field_name, field_value
View working demo on db fiddle
让我知道这是否适合你