SQL 查询具有特定条件的最长连胜

SQL query for longest streak with a specific condition

我有一个 table,看起来像这样:(没有每天的记录,也不是每个 field_name)

date field_name field_value
2021-01-01 coffee_available 1
2021-01-02 coffee_available 1
2021-01-03 tea_available 0
2021-01-03 coffee_available 1
2021-01-04 tea_available 0
2021-01-06 coffee_available 0
2021-01-07 coffee_available 0
2021-01-08 coffee_available 1
2021-01-08 tea_available 1

我想查询数据库的特定条件,例如。有咖啡可用的最长连续天数或没有茶可用的最长连续天数。所以结果应该是一个数字,或者 - 甚至更好 - 最长的带开始或结束日期的条纹。

我查看了其他连胜问题,但我不知道如何更改它以适合我的问题。你能帮帮我吗?

您可以使用以下方法,该方法使用 ROW_NUMBER 确定特定条件下的连续日期组,并使用 DATEDIFF 确定连续的天数,并且每个连续天数中的天数仅相差 1。 COUNT 调整为 1 也可用于确定条纹,如下所示。我包括了一个整体查询和特定于问题中共享的场景问题的其他查询。

查询#1

SELECT
    MIN(`DATE`),
    MAX(`DATE`),
    DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1,
    COUNT(1) - 1 AS day_streak_method_2,
    t1.field_name,
    t1.field_value 
FROM
    (
        SELECT
            DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum 
        ORDER BY
            `DATE`)) AS day_diff,
            t.* 
        FROM
            (
                SELECT
                    *,
                    (
                        ROW_NUMBER() OVER (PARTITION BY `field_name` 
                    ORDER BY
                        `DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value` 
                    ORDER BY
                        `DATE`, `field_name`)
                    )
                    AS gnum 
                FROM
                    conditions
            )
            t
    )
    t1 
WHERE
    day_diff < 2 
GROUP BY
    gnum,
    field_name,
    field_value;
MIN(DATE) MAX(DATE) day_streak_method_1 day_streak_method_2 field_name field_value
2021-01-01 00:00:00 2021-01-03 00:00:00 2 2 coffee_available 1
2021-01-03 00:00:00 2021-01-04 00:00:00 1 1 tea_available 0
2021-01-08 00:00:00 2021-01-08 00:00:00 0 0 coffee_available 1
2021-01-08 00:00:00 2021-01-08 00:00:00 0 0 tea_available 1
2021-01-06 00:00:00 2021-01-07 00:00:00 1 1 coffee_available 0

查询#2

SELECT 'The longest streak of days with coffee available' as `Question 1`;
Question 1
The longest streak of days with coffee available

查询#3

SELECT
    MIN(`DATE`),
    MAX(`DATE`),
    DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1
FROM
    (
        SELECT
            DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum 
        ORDER BY
            `DATE`)) AS day_diff,
            t.* 
        FROM
            (
                SELECT
                    *,
                    (
                        ROW_NUMBER() OVER (PARTITION BY `field_name` 
                    ORDER BY
                        `DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value` 
                    ORDER BY
                        `DATE`, `field_name`)
                    )
                    AS gnum 
                FROM
                    conditions
                WHERE 
                    field_name='coffee_available' AND
                    field_value=1
            )
            t
    )
    t1 
WHERE
    day_diff < 2 
GROUP BY
    gnum,
    field_name,
    field_value
ORDER BY 
    DATEDIFF(MAX(`DATE`), MIN(`DATE`)) DESC
LIMIT 1;
MIN(DATE) MAX(DATE) day_streak_method_1
2021-01-01 00:00:00 2021-01-03 00:00:00 2

查询#4

SELECT 'The longest streak of days with no tea available' as `Question 2`;
Question 2
The longest streak of days with no tea available

查询#5

SELECT
    MIN(`DATE`),
    MAX(`DATE`),
    DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1
FROM
    (
        SELECT
            DATEDIFF(`DATE`, LAG(`DATE`, 1, `DATE`) OVER (PARTITION BY gnum 
        ORDER BY
            `DATE`)) AS day_diff,
            t.* 
        FROM
            (
                SELECT
                    *,
                    (
                        ROW_NUMBER() OVER (PARTITION BY `field_name` 
                    ORDER BY
                        `DATE`, `field_name`) - ROW_NUMBER() OVER (PARTITION BY `field_name`, `field_value` 
                    ORDER BY
                        `DATE`, `field_name`)
                    )
                    AS gnum 
                FROM
                    conditions
                WHERE 
                    field_name='tea_available' AND
                    field_value=0
            )
            t
    )
    t1 
WHERE
    day_diff < 2 
GROUP BY
    gnum,
    field_name,
    field_value
ORDER BY 
    DATEDIFF(MAX(`DATE`), MIN(`DATE`)) DESC
LIMIT 1;
MIN(DATE) MAX(DATE) day_streak_method_1
2021-01-03 00:00:00 2021-01-04 00:00:00 1

View on DB Fiddle

编辑 1

在 case 表达式的帮助下减少了子查询的使用。 根据需要在 where 子句中应用其他过滤器。

SELECT
    MIN(`DATE`),
    MAX(`DATE`),
    DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak_method_1,
    field_name,
    field_value
FROM (
    SELECT
        *,
        CASE 
            WHEN DATEDIFF(
                    `date`,
                    LAG(`date`,1,`date`) OVER (
                        PARTITION BY `field_name`, `field_value`
                        ORDER BY `date`
                    )
                ) < 2 THEN (
                    ROW_NUMBER() OVER (
                        PARTITION BY `field_name` 
                        ORDER BY `date`, `field_name`
                    ) - 
                    ROW_NUMBER() OVER (
                        PARTITION BY `field_name`, `field_value` 
                        ORDER BY `date`, `field_name`
                    )
                )
            ELSE NULL
        END as gnum
    FROM 
        conditions
) t
WHERE 
    gnum IS NOT NULL 
GROUP BY 
    gnum, field_name, field_value

使用旧版本 mysql

SELECT
    MIN(`DATE`),
    MAX(`DATE`),
    DATEDIFF(MAX(`DATE`), MIN(`DATE`)) AS day_streak,
    field_name,
    field_value
FROM (
    SELECT
        *,
        @seq1:=IF(@seq1_prev = `field_name`,@seq1+1,1),
        @seq2:=IF(@seq2_prev = CONCAT(`field_name`, `field_value`),@seq2+1,1),
        CASE 
            WHEN DATEDIFF(
                    `date`,
                     IF(
                         @seq2_prev=CONCAT(`field_name`, `field_value`),
                         @date_prev,
                         `date`
                     )
                ) < 2 THEN @seq1 - @seq2
                
            ELSE NULL
        END as gnum,
        
        @seq1_prev:=`field_name`,
        @seq2_prev:=CONCAT(`field_name`, `field_value`),
        @date_prev:=`date`
    FROM 
        conditions
    CROSS JOIN (
        SELECT
            @seq1:=0,
            @seq1_prev:=NULL,
            @seq2:=0,
            @seq2_prev:=NULL,
            @date_prev:=NULL
    ) as vars
    ORDER BY `date`, `field_name`
) t
GROUP BY 
    gnum, field_name, field_value

View working demo on db fiddle

让我知道这是否适合你