根据类型获取记录 Z-Score
Get records Z-Score based on their type
请考虑以下记录:
Id Type Price
---------------------------
1 1 100
2 2 200
3 1 150
4 1 300
5 2 200
6 2 400
7 2 500
基于这篇文章:Simple Anomaly Detection Using Plain SQL我想根据每种类型计算 Z-Score。我的意思是,首先我想根据 Type
列对记录进行分区,然后在每个分区中计算 Z-Score。我写了这个查询,但我不知道如何将它与 Partition
函数结合起来:
WITH series AS (
SELECT Price
FROM MyTable AS n
),
stats AS (
SELECT avg(Price) series_avg,
stddev(Price) as series_stddev
FROM series
),
zscores AS (
SELECT Id, Type, Price, (price - series_avg) / series_stddev AS zscore
FROM series, stats
)
SELECT *,case when zscore NOT BETWEEN -1 AND 1 then 'T' else 'F' end AS is_anomaly
FROM zscores;
谢谢
计算 AVG 和 STDEV 时,只需按类型分组即可。
然后将数据加入 Type 上的摘要。
WITH CTE_DATA AS (
SELECT Id, [Type], Price
FROM YourTable
)
, CTE_STATS AS (
SELECT
[Type]
, AVG(Price) AS series_avg
, STDEV(Price) AS series_stddev
FROM CTE_DATA
GROUP BY [Type]
)
, CTE_ZSCORES AS (
SELECT d.Id, d.[Type], d.Price
, (d.price - st.series_avg) / st.series_stddev AS zscore
FROM CTE_DATA d
JOIN CTE_STATS st ON d.[Type] = st.[Type]
)
SELECT *
, CASE
WHEN zscore NOT BETWEEN -1 AND 1
THEN 'T'
ELSE 'F'
END AS is_anomaly
FROM CTE_ZSCORES
ORDER BY [Type], Id;
Id | Type | Price | zscore | is_anomaly
-: | ---: | ----: | -----------------: | :---------
1 | 1 | 100 | -0.797438205949334 | F
3 | 1 | 150 | -0.317053744534072 | F
4 | 1 | 300 | 1.12409963971171 | T
2 | 2 | 200 | -0.833333333333333 | F
5 | 2 | 200 | -0.833333333333333 | F
6 | 2 | 400 | 0.5 | F
7 | 2 | 500 | 1.16666666666667 | T
或使用 window 分区函数。
SELECT *
, CASE
WHEN zscore NOT BETWEEN -1 AND 1
THEN 'T'
ELSE 'F'
END AS is_anomaly
FROM
(
SELECT Id, [Type], Price
, zscore = (Price - AVG(Price) OVER (PARTITION BY [Type])) / STDEV(Price) OVER (PARTITION BY [Type])
FROM YourTable
) q
ORDER BY [Type], Id
Id | Type | Price | zscore | is_anomaly
-: | ---: | ----: | -----------------: | :---------
1 | 1 | 100 | -0.797438205949334 | F
3 | 1 | 150 | -0.317053744534072 | F
4 | 1 | 300 | 1.12409963971171 | T
2 | 2 | 200 | -0.833333333333333 | F
5 | 2 | 200 | -0.833333333333333 | F
6 | 2 | 400 | 0.5 | F
7 | 2 | 500 | 1.16666666666667 | T
测试 db<>fiddle here
您可以使用 window 函数
对基 table 进行单次扫描而不进行任何连接来完成此操作
SELECT
t.Id,
t.Type,
t.Price,
(Price - AVG(t.Price) OVER (PARTITION BY t.Type)) /
STDEV(t.Price) OVER (PARTITION BY t.Type) as zscore
FROM MyTable t
请考虑以下记录:
Id Type Price
---------------------------
1 1 100
2 2 200
3 1 150
4 1 300
5 2 200
6 2 400
7 2 500
基于这篇文章:Simple Anomaly Detection Using Plain SQL我想根据每种类型计算 Z-Score。我的意思是,首先我想根据 Type
列对记录进行分区,然后在每个分区中计算 Z-Score。我写了这个查询,但我不知道如何将它与 Partition
函数结合起来:
WITH series AS (
SELECT Price
FROM MyTable AS n
),
stats AS (
SELECT avg(Price) series_avg,
stddev(Price) as series_stddev
FROM series
),
zscores AS (
SELECT Id, Type, Price, (price - series_avg) / series_stddev AS zscore
FROM series, stats
)
SELECT *,case when zscore NOT BETWEEN -1 AND 1 then 'T' else 'F' end AS is_anomaly
FROM zscores;
谢谢
计算 AVG 和 STDEV 时,只需按类型分组即可。
然后将数据加入 Type 上的摘要。
WITH CTE_DATA AS ( SELECT Id, [Type], Price FROM YourTable ) , CTE_STATS AS ( SELECT [Type] , AVG(Price) AS series_avg , STDEV(Price) AS series_stddev FROM CTE_DATA GROUP BY [Type] ) , CTE_ZSCORES AS ( SELECT d.Id, d.[Type], d.Price , (d.price - st.series_avg) / st.series_stddev AS zscore FROM CTE_DATA d JOIN CTE_STATS st ON d.[Type] = st.[Type] ) SELECT * , CASE WHEN zscore NOT BETWEEN -1 AND 1 THEN 'T' ELSE 'F' END AS is_anomaly FROM CTE_ZSCORES ORDER BY [Type], Id;
Id | Type | Price | zscore | is_anomaly -: | ---: | ----: | -----------------: | :--------- 1 | 1 | 100 | -0.797438205949334 | F 3 | 1 | 150 | -0.317053744534072 | F 4 | 1 | 300 | 1.12409963971171 | T 2 | 2 | 200 | -0.833333333333333 | F 5 | 2 | 200 | -0.833333333333333 | F 6 | 2 | 400 | 0.5 | F 7 | 2 | 500 | 1.16666666666667 | T
或使用 window 分区函数。
SELECT * , CASE WHEN zscore NOT BETWEEN -1 AND 1 THEN 'T' ELSE 'F' END AS is_anomaly FROM ( SELECT Id, [Type], Price , zscore = (Price - AVG(Price) OVER (PARTITION BY [Type])) / STDEV(Price) OVER (PARTITION BY [Type]) FROM YourTable ) q ORDER BY [Type], Id
Id | Type | Price | zscore | is_anomaly -: | ---: | ----: | -----------------: | :--------- 1 | 1 | 100 | -0.797438205949334 | F 3 | 1 | 150 | -0.317053744534072 | F 4 | 1 | 300 | 1.12409963971171 | T 2 | 2 | 200 | -0.833333333333333 | F 5 | 2 | 200 | -0.833333333333333 | F 6 | 2 | 400 | 0.5 | F 7 | 2 | 500 | 1.16666666666667 | T
测试 db<>fiddle here
您可以使用 window 函数
对基 table 进行单次扫描而不进行任何连接来完成此操作SELECT
t.Id,
t.Type,
t.Price,
(Price - AVG(t.Price) OVER (PARTITION BY t.Type)) /
STDEV(t.Price) OVER (PARTITION BY t.Type) as zscore
FROM MyTable t