根据查询结果制作可子查询的 UNION ALL
Crafting a Subquery-able UNION ALL based on the results of a query
数据
我有几个 table 是这样的:
CREATE TABLE cycles (
`cycle` varchar(6) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`cycle_type` varchar(140) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`start` date DEFAULT NULL,
`end` date DEFAULT NULL
);
CREATE TABLE rsvn (
`str` varchar(140) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`start_date` date DEFAULT NULL,
`end_date` date DEFAULT NULL
);
INSERT INTO `cycles` (`cycle`, `cycle_type`, `start`, `end`) values
('202013', 'a', '2021-01-04', '2021-01-31'),
('202013', 'b', '2021-01-04', '2021-01-31'),
('202101', 'a', '2021-01-04', '2021-01-31'),
('202101', 'b', '2021-01-04', '2021-01-31'),
('202102', 'a', '2021-02-01', '2021-02-28'),
('202102', 'b', '2021-02-01', '2021-02-28'),
('202103', 'a', '2021-03-01', '2021-03-28'),
('202103', 'b', '2021-03-01', '2021-03-28');
INSERT INTO `rsvn` (str, start_date, end_date) values
('STR01367', '2020-12-07', '2020-06-21'),
('STR00759', '2020-12-07', '2021-04-25'),
('STR01367', '2021-01-04', '2021-09-12'),
('STR01367', '2021-06-21', '2022-02-27');
期望的结果
对于任何给定的循环,我想计算 str 跨循环出现的次数。所以在周期 2108 - 2108(一个周期)之间,我看到:
str
count
STR01367
1
STR00759
1
从 2108 年到 2109 年(两个周期)我看到:
str
count
STR01367
2
STR00759
1
我试过的
我正在尝试弄清楚如何动态获取这些结果。我没有看到 UNION ALL 查询之外的任何选项(每个循环一个查询),所以我尝试编写一个 PROCEDURE。但是,这没有用,因为我想对查询结果进行 post 处理,而且我认为您不能在 CTE 或子查询中使用 PROCEDURE 的结果。
我的程序(有效,不能在 SELECT * FROM call count_cycles
(?) 这样的子查询中包含结果):
CREATE PROCEDURE `count_cycles`(start_cycle CHAR(6), end_cycle CHAR(6))
BEGIN
SET @cycles := (
SELECT CONCAT('WITH installed_cycles_count AS (',
GROUP_CONCAT(
CONCAT('
SELECT rsvn.str, 1 AS installed_cycles
FROM rsvn
WHERE "', `cy`.`start`, '" BETWEEN rsvn.start_date AND COALESCE(rsvn.end_date, "9999-01-01")
OR "', `cy`.`end`, '" BETWEEN rsvn.start_date AND COALESCE(rsvn.end_date, "9999-01-01")
GROUP BY rsvn.str
'
)
SEPARATOR ' UNION ALL '
),
')
SELECT
store.chain AS "Chain"
,store.division AS "Division"
,dividers_store AS "Store"
,SUM(installed_cycles) AS "Installed Cycles"
FROM installed_cycles_count r
LEFT JOIN store ON store.name = r.dividers_store
GROUP BY dividers_store
ORDER BY chain, division, dividers_store, installed_cycles'
)
FROM cycles `cy`
WHERE `cy`.`cycle_type` = 'Ad Cycle'
AND `cy`.`cycle` >= CONCAT('20', RIGHT(start_cycle, 4))
AND `cy`.`cycle` <= CONCAT('20', RIGHT(end_cycle, 4))
GROUP BY `cy`.`cycle_type`
);
EXECUTE IMMEDIATE @cycles;
END
或者,我尝试使用递归查询通过递增循环来获取结果。这给了我想要的周期:
WITH RECURSIVE xyz AS (
SELECT cy.`cycle`, cy.`start`, cy.`end`
FROM cycles cy
WHERE cycle_type = 'Ad Cycle'
AND `cycle` = '202101'
UNION ALL
SELECT cy.`cycle`, cy.`start`, cy.`end`
FROM xyz
JOIN cycles cy
ON cy.`cycle` = increment_cycle(xyz.`cycle`, 1)
AND cy.`cycle_type` = 'Ad Cycle'
WHERE cy.`cycle` <= '202110'
)
SELECT * FROM xyz;
但是当我添加预订时我无法让它工作 table:
无限循环?
WITH RECURSIVE xyz AS (
SELECT cy.`cycle`, 'dr.dividers_store', 1 AS installed_cycles
FROM cycles cy
LEFT JOIN rsvn dr
ON cy.`start` BETWEEN dr.start_date AND COALESCE(dr.end_date, "9999-01-01")
OR cy.`end` BETWEEN dr.start_date AND COALESCE(dr.end_date, "9999-01-01")
WHERE cy.`cycle_type` = 'Ad Cycle'
AND cy.`cycle` = '202101'
UNION ALL
SELECT cy.`cycle`, 'dr.dividers_store', 1 AS installed_cycles
FROM xyz
JOIN cycles cy
ON cy.`cycle` = increment_cycle(xyz.`cycle`, 1)
AND cy.`cycle_type` = 'Ad Cycle'
LEFT JOIN rsvn dr
ON cy.`start` BETWEEN dr.start_date AND COALESCE(dr.end_date, "9999-01-01")
OR cy.`end` BETWEEN dr.start_date AND COALESCE(dr.end_date, "9999-01-01")
WHERE cy.`cycle` <= '202102'
)
SELECT * FROM xyz
我有哪些选择才能获得我需要的结果,以便我可以在 CTE 或子查询中使用它们?
我要查找的结果很容易通过 two-stage 分组获得。像这样:
WITH sbc AS (
SELECT cy.`cycle`, dr.str, 1 AS 'count'
FROM cycles cy
LEFT JOIN rsvn dr
ON cy.`start` BETWEEN dr.start_date AND dr.end_date
OR cy.`end` BETWEEN dr.start_date AND dr.end_date
WHERE cy.`cycle_type` = 'Ad Cycle'
AND cy.`cycle` BETWEEN '202201' AND '202205'
GROUP BY cy.`cycle`, dr.str
ORDER BY dr.str, cy.`cycle`
)
SELECT `cycle`, str, SUM(`count`) as `count`
FROM sbc
GROUP BY str
CTE 在每个循环中为每个 rsvn 生成一个结果。之后只需要按商店分组并统计出现的次数。
除了更简单之外,我怀疑这个查询比我问这个问题时坚持的并集概念更快,因为除其他外,服务器不需要对多个分组查询执行并集。但是,我不明白 MariaDB 如何优化此类查询,虽然我很好奇,但我没有时间去 运行 基准测试来找出答案。
数据
我有几个 table 是这样的:
CREATE TABLE cycles (
`cycle` varchar(6) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`cycle_type` varchar(140) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`start` date DEFAULT NULL,
`end` date DEFAULT NULL
);
CREATE TABLE rsvn (
`str` varchar(140) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`start_date` date DEFAULT NULL,
`end_date` date DEFAULT NULL
);
INSERT INTO `cycles` (`cycle`, `cycle_type`, `start`, `end`) values
('202013', 'a', '2021-01-04', '2021-01-31'),
('202013', 'b', '2021-01-04', '2021-01-31'),
('202101', 'a', '2021-01-04', '2021-01-31'),
('202101', 'b', '2021-01-04', '2021-01-31'),
('202102', 'a', '2021-02-01', '2021-02-28'),
('202102', 'b', '2021-02-01', '2021-02-28'),
('202103', 'a', '2021-03-01', '2021-03-28'),
('202103', 'b', '2021-03-01', '2021-03-28');
INSERT INTO `rsvn` (str, start_date, end_date) values
('STR01367', '2020-12-07', '2020-06-21'),
('STR00759', '2020-12-07', '2021-04-25'),
('STR01367', '2021-01-04', '2021-09-12'),
('STR01367', '2021-06-21', '2022-02-27');
期望的结果
对于任何给定的循环,我想计算 str 跨循环出现的次数。所以在周期 2108 - 2108(一个周期)之间,我看到:
str | count |
---|---|
STR01367 | 1 |
STR00759 | 1 |
从 2108 年到 2109 年(两个周期)我看到:
str | count |
---|---|
STR01367 | 2 |
STR00759 | 1 |
我试过的
我正在尝试弄清楚如何动态获取这些结果。我没有看到 UNION ALL 查询之外的任何选项(每个循环一个查询),所以我尝试编写一个 PROCEDURE。但是,这没有用,因为我想对查询结果进行 post 处理,而且我认为您不能在 CTE 或子查询中使用 PROCEDURE 的结果。
我的程序(有效,不能在 SELECT * FROM call count_cycles
(?) 这样的子查询中包含结果):
CREATE PROCEDURE `count_cycles`(start_cycle CHAR(6), end_cycle CHAR(6))
BEGIN
SET @cycles := (
SELECT CONCAT('WITH installed_cycles_count AS (',
GROUP_CONCAT(
CONCAT('
SELECT rsvn.str, 1 AS installed_cycles
FROM rsvn
WHERE "', `cy`.`start`, '" BETWEEN rsvn.start_date AND COALESCE(rsvn.end_date, "9999-01-01")
OR "', `cy`.`end`, '" BETWEEN rsvn.start_date AND COALESCE(rsvn.end_date, "9999-01-01")
GROUP BY rsvn.str
'
)
SEPARATOR ' UNION ALL '
),
')
SELECT
store.chain AS "Chain"
,store.division AS "Division"
,dividers_store AS "Store"
,SUM(installed_cycles) AS "Installed Cycles"
FROM installed_cycles_count r
LEFT JOIN store ON store.name = r.dividers_store
GROUP BY dividers_store
ORDER BY chain, division, dividers_store, installed_cycles'
)
FROM cycles `cy`
WHERE `cy`.`cycle_type` = 'Ad Cycle'
AND `cy`.`cycle` >= CONCAT('20', RIGHT(start_cycle, 4))
AND `cy`.`cycle` <= CONCAT('20', RIGHT(end_cycle, 4))
GROUP BY `cy`.`cycle_type`
);
EXECUTE IMMEDIATE @cycles;
END
或者,我尝试使用递归查询通过递增循环来获取结果。这给了我想要的周期:
WITH RECURSIVE xyz AS (
SELECT cy.`cycle`, cy.`start`, cy.`end`
FROM cycles cy
WHERE cycle_type = 'Ad Cycle'
AND `cycle` = '202101'
UNION ALL
SELECT cy.`cycle`, cy.`start`, cy.`end`
FROM xyz
JOIN cycles cy
ON cy.`cycle` = increment_cycle(xyz.`cycle`, 1)
AND cy.`cycle_type` = 'Ad Cycle'
WHERE cy.`cycle` <= '202110'
)
SELECT * FROM xyz;
但是当我添加预订时我无法让它工作 table: 无限循环?
WITH RECURSIVE xyz AS (
SELECT cy.`cycle`, 'dr.dividers_store', 1 AS installed_cycles
FROM cycles cy
LEFT JOIN rsvn dr
ON cy.`start` BETWEEN dr.start_date AND COALESCE(dr.end_date, "9999-01-01")
OR cy.`end` BETWEEN dr.start_date AND COALESCE(dr.end_date, "9999-01-01")
WHERE cy.`cycle_type` = 'Ad Cycle'
AND cy.`cycle` = '202101'
UNION ALL
SELECT cy.`cycle`, 'dr.dividers_store', 1 AS installed_cycles
FROM xyz
JOIN cycles cy
ON cy.`cycle` = increment_cycle(xyz.`cycle`, 1)
AND cy.`cycle_type` = 'Ad Cycle'
LEFT JOIN rsvn dr
ON cy.`start` BETWEEN dr.start_date AND COALESCE(dr.end_date, "9999-01-01")
OR cy.`end` BETWEEN dr.start_date AND COALESCE(dr.end_date, "9999-01-01")
WHERE cy.`cycle` <= '202102'
)
SELECT * FROM xyz
我有哪些选择才能获得我需要的结果,以便我可以在 CTE 或子查询中使用它们?
我要查找的结果很容易通过 two-stage 分组获得。像这样:
WITH sbc AS (
SELECT cy.`cycle`, dr.str, 1 AS 'count'
FROM cycles cy
LEFT JOIN rsvn dr
ON cy.`start` BETWEEN dr.start_date AND dr.end_date
OR cy.`end` BETWEEN dr.start_date AND dr.end_date
WHERE cy.`cycle_type` = 'Ad Cycle'
AND cy.`cycle` BETWEEN '202201' AND '202205'
GROUP BY cy.`cycle`, dr.str
ORDER BY dr.str, cy.`cycle`
)
SELECT `cycle`, str, SUM(`count`) as `count`
FROM sbc
GROUP BY str
CTE 在每个循环中为每个 rsvn 生成一个结果。之后只需要按商店分组并统计出现的次数。
除了更简单之外,我怀疑这个查询比我问这个问题时坚持的并集概念更快,因为除其他外,服务器不需要对多个分组查询执行并集。但是,我不明白 MariaDB 如何优化此类查询,虽然我很好奇,但我没有时间去 运行 基准测试来找出答案。