SQL:计算每个日期范围的中位数
SQL: Calculate the median for each date range
我正在使用 SQL Server 2008 并尝试计算每个日期范围的中值。
示例:
每个日期有 4 个具有多个值的日期(1/1/16、3/1/16、7/1/16、10/1/16),将计算日期 10/1/16 的中值来自日期范围 1/1/16 – 7/1/16 中的值。日期 7/1/16 的中值将根据日期范围 1/1/16 – 3/1/16 中的值计算得出。
如果日期是 2016 年 10 月 1 日,10/1/16 的值不应包含在中位数中(这适用于所有日期。此外,未来的日期不应包含在计算中)。
下面的查询计算 MAX Visit Date 的中值。但是,我还需要它来计算其他 3 个访问日期的中位数。我尝试删除 MAX CTE 并为所有访问日期添加一个连接 < 访问日期,但我无法让它工作。到目前为止,我写这篇文章一直没有成功,所以任何帮助都会很棒。我在下面包含了示例数据和我的预期结果。
编辑:某种递归是否可行?
;CREATE TABLE #TEST(QUESTION VARCHAR(15), VISIT_DATE DATE, VALUE INT)
;INSERT #TEST(QUESTION, VISIT_DATE, VALUE)
VALUES
('ABC', '1/1/2016', '80'),
('ABC', '1/1/2016', '90'),
('ABC', '1/1/2016', '100'),
('ABC', '3/1/2016', '70'),
('ABC', '3/1/2016', '80'),
('ABC', '3/1/2016', '90'),
('ABC', '3/1/2016', '100'),
('ABC', '7/1/2016', '50'),
('ABC', '7/1/2016', '60'),
('ABC', '7/1/2016', '70'),
('ABC', '10/1/2016', '10'),
('ABC', '10/1/2016', '20'),
('ABC', '10/1/2016', '30'),
('ABC', '10/1/2016', '40')
;WITH MAX_VISITDATE AS (
SELECT MAX(VISIT_DATE) AS MAX_VISITDATE
FROM #TEST
), MEDIAN AS (
SELECT RN.Question, AVG(RN.VALUE) AS GroupMedianPastQtrs
FROM
( SELECT QUESTION, VALUE, ROW_NUMBER() OVER (PARTITION BY QUESTION ORDER BY VALUE) AS ROWNUMBER, COUNT(*) OVER (PARTITION BY Question) AS QuestionCount
FROM #TEST T
WHERE VISIT_DATE NOT IN (SELECT MAX_VISITDATE FROM MAX_VISITDATE)
) RN
WHERE RN.ROWNUMBER IN (RN.QuestionCount/2+1, (RN.QuestionCount+1)/2)
GROUP BY RN.Question
)
SELECT *
FROM #TEST T
INNER JOIN MEDIAN ON T.Question = MEDIAN.Question
--Expected Results:
Question|Visit_DAte |Value|GroupMedian |
--------|-----------|-----|-------------|
'ABC' |'1/1/2016' |'80' |'' |--No Median, no previous values
'ABC' |'1/1/2016' |'90' |'' |--No Median, no previous values
'ABC' |'1/1/2016' |'100'|'' |--No Median, no previous values
'ABC' |'3/1/2016' |'70' |'90' |--Median value from date 1/1/16
'ABC' |'3/1/2016' |'80' |'90' |--Median value from date 1/1/16
'ABC' |'3/1/2016' |'90' |'90' |--Median value from date 1/1/16
'ABC' |'3/1/2016' |'100'|'90' |--Median value from date 1/1/16
'ABC' |'7/1/2016' |'50' |'90' |--Median value from date range 1/1/16 to 3/1/16
'ABC' |'7/1/2016' |'60' |'90' |--Median value from date range 1/1/16 to 3/1/16
'ABC' |'7/1/2016' |'70' |'90' |--Median value from date range 1/1/16 to 3/1/16
'ABC' |'10/1/2016'|'10' |'80' |--Median value from date range 1/1/16 to 7/1/16
'ABC' |'10/1/2016'|'20' |'80' |--Median value from date range 1/1/16 to 7/1/16
'ABC' |'10/1/2016'|'30' |'80' |--Median value from date range 1/1/16 to 7/1/16
'ABC' |'10/1/2016'|'40' |'80' |--Median value from date range 1/1/16 to 7/1/16
我没有 SQL Server 2008 盒子来测试它。所以我尽力交叉检查以下每个功能在 2008 年可用:
;WITH
tmp AS
(
SELECT a.QUESTION
, a.VISIT_DATE
, b.VALUE
, ROW_NUMBER() OVER (PARTITION BY a.QUESTION, a.VISIT_DATE ORDER BY b.VALUE)
AS RowNumber
, FLOOR(CONVERT(float, COUNT(b.Value) OVER (PARTITION BY a.QUESTION, a.VISIT_DATE) + 1) / 2)
AS LowerMedianRowNumber
, CEILING(CONVERT(float, COUNT(b.Value) OVER (PARTITION BY a.QUESTION, a.VISIT_DATE) + 1) / 2)
AS UpperMedianRowNumber
FROM (
SELECT DISTINCT
QUESTION
, VISIT_DATE
FROM #TEST
) a
INNER JOIN #TEST b ON a.QUESTION = b.QUESTION
AND a.VISIT_DATE > b.VISIT_DATE
),
GroupMedian AS
(
SELECT QUESTION
, VISIT_DATE
, AVG(Value) AS MedianValue
FROM tmp
WHERE RowNumber IN (LowerMedianRowNumber, UpperMedianRowNumber)
GROUP BY QUESTION
, VISIT_DATE
)
SELECT a.*
, b.MedianValue
FROM #TEST a
LEFT JOIN GroupMedian b ON a.QUESTION = b.QUESTION
AND a.VISIT_DATE = b.VISIT_DATE
ORDER BY QUESTION
, VISIT_DATE
我正在使用 SQL Server 2008 并尝试计算每个日期范围的中值。
示例: 每个日期有 4 个具有多个值的日期(1/1/16、3/1/16、7/1/16、10/1/16),将计算日期 10/1/16 的中值来自日期范围 1/1/16 – 7/1/16 中的值。日期 7/1/16 的中值将根据日期范围 1/1/16 – 3/1/16 中的值计算得出。
如果日期是 2016 年 10 月 1 日,10/1/16 的值不应包含在中位数中(这适用于所有日期。此外,未来的日期不应包含在计算中)。
下面的查询计算 MAX Visit Date 的中值。但是,我还需要它来计算其他 3 个访问日期的中位数。我尝试删除 MAX CTE 并为所有访问日期添加一个连接 < 访问日期,但我无法让它工作。到目前为止,我写这篇文章一直没有成功,所以任何帮助都会很棒。我在下面包含了示例数据和我的预期结果。
编辑:某种递归是否可行?
;CREATE TABLE #TEST(QUESTION VARCHAR(15), VISIT_DATE DATE, VALUE INT)
;INSERT #TEST(QUESTION, VISIT_DATE, VALUE)
VALUES
('ABC', '1/1/2016', '80'),
('ABC', '1/1/2016', '90'),
('ABC', '1/1/2016', '100'),
('ABC', '3/1/2016', '70'),
('ABC', '3/1/2016', '80'),
('ABC', '3/1/2016', '90'),
('ABC', '3/1/2016', '100'),
('ABC', '7/1/2016', '50'),
('ABC', '7/1/2016', '60'),
('ABC', '7/1/2016', '70'),
('ABC', '10/1/2016', '10'),
('ABC', '10/1/2016', '20'),
('ABC', '10/1/2016', '30'),
('ABC', '10/1/2016', '40')
;WITH MAX_VISITDATE AS (
SELECT MAX(VISIT_DATE) AS MAX_VISITDATE
FROM #TEST
), MEDIAN AS (
SELECT RN.Question, AVG(RN.VALUE) AS GroupMedianPastQtrs
FROM
( SELECT QUESTION, VALUE, ROW_NUMBER() OVER (PARTITION BY QUESTION ORDER BY VALUE) AS ROWNUMBER, COUNT(*) OVER (PARTITION BY Question) AS QuestionCount
FROM #TEST T
WHERE VISIT_DATE NOT IN (SELECT MAX_VISITDATE FROM MAX_VISITDATE)
) RN
WHERE RN.ROWNUMBER IN (RN.QuestionCount/2+1, (RN.QuestionCount+1)/2)
GROUP BY RN.Question
)
SELECT *
FROM #TEST T
INNER JOIN MEDIAN ON T.Question = MEDIAN.Question
--Expected Results:
Question|Visit_DAte |Value|GroupMedian |
--------|-----------|-----|-------------|
'ABC' |'1/1/2016' |'80' |'' |--No Median, no previous values
'ABC' |'1/1/2016' |'90' |'' |--No Median, no previous values
'ABC' |'1/1/2016' |'100'|'' |--No Median, no previous values
'ABC' |'3/1/2016' |'70' |'90' |--Median value from date 1/1/16
'ABC' |'3/1/2016' |'80' |'90' |--Median value from date 1/1/16
'ABC' |'3/1/2016' |'90' |'90' |--Median value from date 1/1/16
'ABC' |'3/1/2016' |'100'|'90' |--Median value from date 1/1/16
'ABC' |'7/1/2016' |'50' |'90' |--Median value from date range 1/1/16 to 3/1/16
'ABC' |'7/1/2016' |'60' |'90' |--Median value from date range 1/1/16 to 3/1/16
'ABC' |'7/1/2016' |'70' |'90' |--Median value from date range 1/1/16 to 3/1/16
'ABC' |'10/1/2016'|'10' |'80' |--Median value from date range 1/1/16 to 7/1/16
'ABC' |'10/1/2016'|'20' |'80' |--Median value from date range 1/1/16 to 7/1/16
'ABC' |'10/1/2016'|'30' |'80' |--Median value from date range 1/1/16 to 7/1/16
'ABC' |'10/1/2016'|'40' |'80' |--Median value from date range 1/1/16 to 7/1/16
我没有 SQL Server 2008 盒子来测试它。所以我尽力交叉检查以下每个功能在 2008 年可用:
;WITH
tmp AS
(
SELECT a.QUESTION
, a.VISIT_DATE
, b.VALUE
, ROW_NUMBER() OVER (PARTITION BY a.QUESTION, a.VISIT_DATE ORDER BY b.VALUE)
AS RowNumber
, FLOOR(CONVERT(float, COUNT(b.Value) OVER (PARTITION BY a.QUESTION, a.VISIT_DATE) + 1) / 2)
AS LowerMedianRowNumber
, CEILING(CONVERT(float, COUNT(b.Value) OVER (PARTITION BY a.QUESTION, a.VISIT_DATE) + 1) / 2)
AS UpperMedianRowNumber
FROM (
SELECT DISTINCT
QUESTION
, VISIT_DATE
FROM #TEST
) a
INNER JOIN #TEST b ON a.QUESTION = b.QUESTION
AND a.VISIT_DATE > b.VISIT_DATE
),
GroupMedian AS
(
SELECT QUESTION
, VISIT_DATE
, AVG(Value) AS MedianValue
FROM tmp
WHERE RowNumber IN (LowerMedianRowNumber, UpperMedianRowNumber)
GROUP BY QUESTION
, VISIT_DATE
)
SELECT a.*
, b.MedianValue
FROM #TEST a
LEFT JOIN GroupMedian b ON a.QUESTION = b.QUESTION
AND a.VISIT_DATE = b.VISIT_DATE
ORDER BY QUESTION
, VISIT_DATE