我想知道是否可以进一步简化查询(或)以任何其他方式重写 SQL 查询
I would like to know if the query can be simplified further (or) any other way to rewrite the SQL query
SELECT id_col,order_col,
sum(nvl(CASE WHEN INDEX = 2 AND trim(order_list) NOT IN('', ' ') THEN order_list END,0)) OVER (partition BY order_col) AS total_qty,
sum(nvl(CASE WHEN INDEX = 3 AND trim(order_list) NOT IN('',' ') THEN order_list END,0)) OVER (partition BY order_col) AS total_amt,
sum(nvl(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END,0))
OVER (partition BY order_col) AS sav_001,
sum(nvl(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')
END,0)) OVER (partition BY order_col) AS sav_002,
sum(nvl(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')
END,0)) OVER (partition BY order_col)AS sav_003
FROM tbl_001 T1
qualify row_number() over (partition by order_col order by date desc) = 1
上述查询花费了很长时间 运行。无论如何我们可以重写上面的查询并提高性能吗?
NVL(CASE WHEN cond THEN val END, 0)
与 CASE WHEN cond THEN val ELSE 0 END
相同,甚至进一步加窗 SUM 省略 NULLS 如此简单 CASE WHEN cond THEN val END
:
SELECT id_col,order_col,
sum(CASE WHEN INDEX = 2 AND trim(order_list) != '' THEN order_list END)
OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 AND trim(order_list) != '' THEN order_list END)
OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END)
OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')
END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')
END) OVER (partition BY order_col)AS sav_003
FROM tbl_001 T1
qualify row_number() over (partition by order_col order by date desc) = 1;
首先是小事:
trim(order_list) NOT IN('', ' ')
如果你修剪了白色space,第二种情况就不行了。所以它可以只是 trim(order_list) <> ''
它的大小,你可能会把它放到一个 WHERE 子句中,(它会消除可能的 order_col
行只有零,这可能是你想要的。
Lukasz 的 SUM 忽略空值,仍然成立。
SELECT
id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) OVER (partition BY order_col) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
qualify row_number() over (partition by order_col order by date desc) = 1
您目前正在为每一行构建 SUM,然后丢弃除 ROW 之外的所有行,并且存在的行关心的是 id_col
但从未在任何 OVER() 中使用过,所以改为按 order_col
分组
带有一些示例的旧代码:
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) OVER (partition BY order_col) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
qualify row_number() over (partition by order_col order by date desc) = 1
给出:
ID_COL
ORDER_COL
TOTAL_QTY
TOTAL_AMT
SAV_001
SAV_002
SAV_003
2
10
10
11
12
13
14
所以换成 GROUP BY 形式,id_col
有点讨厌。
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
array_agg(id_col)within group(order by date desc)[0] as id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
GROUP BY order_col;
给出(相同的答案):
ID_COL
ORDER_COL
TOTAL_QTY
TOTAL_AMT
SAV_001
SAV_002
SAV_003
2
10
10
11
12
13
14
所以这可能会表现得更好,并且对我来说更具可读性。
如果性能非常关键,我会尝试看看这是否有所改善:
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
any_value(id_col) as id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) AS total_amt,
sum(CASE WHEN s_order_list[0] = '21235' THEN s_order_list[1] END) AS sav_001,
sum(CASE WHEN s_order_list[0] = '21236' THEN s_order_list[1] END) AS sav_002,
sum(CASE WHEN s_order_list[0] = '21237' THEN s_order_list[1] END) AS sav_003
FROM (
SELECT
first_value(id_col) over(partition by order_col order by date desc) as id_col,
order_col,
order_list,
split(order_list,'=') as s_order_list,
index
FROM tbl_001
WHERE trim(order_list) <> ''
) T1
GROUP BY order_col;
而且我个人完全不会使用 CASE 而会使用 IFF
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
any_value(id_col) as id_col,
order_col,
sum(IFF(INDEX = 2, order_list, null)) AS total_qty,
sum(IFF(INDEX = 3, order_list, null)) AS total_amt,
sum(IFF(s1 = 21235, s2, null)) AS sav_001,
sum(IFF(s1 = 21236, s2, null)) AS sav_002,
sum(IFF(s1 = 21237, s2, null)) AS sav_003
FROM (
SELECT
first_value(id_col) over(partition by order_col order by date desc) as id_col,
order_col,
order_list,
split_part(order_list,'=', 1)::int as s1,
split_part(order_list,'=', 2)::double as s2,
index
FROM tbl_001
WHERE trim(order_list) <> ''
) T1
GROUP BY order_col;
SELECT id_col,order_col,
sum(nvl(CASE WHEN INDEX = 2 AND trim(order_list) NOT IN('', ' ') THEN order_list END,0)) OVER (partition BY order_col) AS total_qty,
sum(nvl(CASE WHEN INDEX = 3 AND trim(order_list) NOT IN('',' ') THEN order_list END,0)) OVER (partition BY order_col) AS total_amt,
sum(nvl(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END,0))
OVER (partition BY order_col) AS sav_001,
sum(nvl(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')
END,0)) OVER (partition BY order_col) AS sav_002,
sum(nvl(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')
END,0)) OVER (partition BY order_col)AS sav_003
FROM tbl_001 T1
qualify row_number() over (partition by order_col order by date desc) = 1
上述查询花费了很长时间 运行。无论如何我们可以重写上面的查询并提高性能吗?
NVL(CASE WHEN cond THEN val END, 0)
与 CASE WHEN cond THEN val ELSE 0 END
相同,甚至进一步加窗 SUM 省略 NULLS 如此简单 CASE WHEN cond THEN val END
:
SELECT id_col,order_col,
sum(CASE WHEN INDEX = 2 AND trim(order_list) != '' THEN order_list END)
OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 AND trim(order_list) != '' THEN order_list END)
OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END)
OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')
END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')
END) OVER (partition BY order_col)AS sav_003
FROM tbl_001 T1
qualify row_number() over (partition by order_col order by date desc) = 1;
首先是小事:
trim(order_list) NOT IN('', ' ')
如果你修剪了白色space,第二种情况就不行了。所以它可以只是 trim(order_list) <> ''
它的大小,你可能会把它放到一个 WHERE 子句中,(它会消除可能的 order_col
行只有零,这可能是你想要的。
Lukasz 的 SUM 忽略空值,仍然成立。
SELECT
id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) OVER (partition BY order_col) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
qualify row_number() over (partition by order_col order by date desc) = 1
您目前正在为每一行构建 SUM,然后丢弃除 ROW 之外的所有行,并且存在的行关心的是 id_col
但从未在任何 OVER() 中使用过,所以改为按 order_col
带有一些示例的旧代码:
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) OVER (partition BY order_col) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
qualify row_number() over (partition by order_col order by date desc) = 1
给出:
ID_COL | ORDER_COL | TOTAL_QTY | TOTAL_AMT | SAV_001 | SAV_002 | SAV_003 |
---|---|---|---|---|---|---|
2 | 10 | 10 | 11 | 12 | 13 | 14 |
所以换成 GROUP BY 形式,id_col
有点讨厌。
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
array_agg(id_col)within group(order by date desc)[0] as id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
GROUP BY order_col;
给出(相同的答案):
ID_COL | ORDER_COL | TOTAL_QTY | TOTAL_AMT | SAV_001 | SAV_002 | SAV_003 |
---|---|---|---|---|---|---|
2 | 10 | 10 | 11 | 12 | 13 | 14 |
所以这可能会表现得更好,并且对我来说更具可读性。
如果性能非常关键,我会尝试看看这是否有所改善:
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
any_value(id_col) as id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) AS total_amt,
sum(CASE WHEN s_order_list[0] = '21235' THEN s_order_list[1] END) AS sav_001,
sum(CASE WHEN s_order_list[0] = '21236' THEN s_order_list[1] END) AS sav_002,
sum(CASE WHEN s_order_list[0] = '21237' THEN s_order_list[1] END) AS sav_003
FROM (
SELECT
first_value(id_col) over(partition by order_col order by date desc) as id_col,
order_col,
order_list,
split(order_list,'=') as s_order_list,
index
FROM tbl_001
WHERE trim(order_list) <> ''
) T1
GROUP BY order_col;
而且我个人完全不会使用 CASE 而会使用 IFF
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
any_value(id_col) as id_col,
order_col,
sum(IFF(INDEX = 2, order_list, null)) AS total_qty,
sum(IFF(INDEX = 3, order_list, null)) AS total_amt,
sum(IFF(s1 = 21235, s2, null)) AS sav_001,
sum(IFF(s1 = 21236, s2, null)) AS sav_002,
sum(IFF(s1 = 21237, s2, null)) AS sav_003
FROM (
SELECT
first_value(id_col) over(partition by order_col order by date desc) as id_col,
order_col,
order_list,
split_part(order_list,'=', 1)::int as s1,
split_part(order_list,'=', 2)::double as s2,
index
FROM tbl_001
WHERE trim(order_list) <> ''
) T1
GROUP BY order_col;