如何在 mysql 5.7 中正确计算中位数
how to count median properly in mysql 5.7
这是我的fiddlehttps://dbfiddle.uk/?rdbms=mysql_5.7&fiddle=7946871d9c25cd8914353c70fde1fe8d
所以这是我的查询
select count(user_id) as itung, user_Id 来自
(SELECT t1.user_id,
t1.createdAt cretecompare1,
t2.createdAt cretecompare2,
DATEDIFF(t2.createdAt, t1.createdAt) diff
-- table for a transaction
FROM test t1
-- table for prev. transaction
JOIN test t2 ON t1.user_id = t2.user_id
AND t1.createdAt < t2.createdAt
AND 7 NOT IN (t1.status_id, t2.status_id)
JOIN (SELECT t3.user_id
FROM test t3
WHERE t3.status_id != 7
GROUP BY t3.user_id
HAVING SUM(t3.createdAt < '2020-04-01') > 1
AND SUM(t3.createdAt BETWEEN '2020-02-01' AND '2020-04-01')) t4 ON t1.user_id = t4.user_id
WHERE NOT EXISTS (SELECT NULL
FROM test t5
WHERE t1.user_id = t5.user_id
AND t5.status_id != 7
AND t1.createdAt < t5.createdAt
AND t5.createdAt < t2.createdAt)
HAViNG cretecompare2 BETWEEN '2020-02-01' AND '2020-04-01') aa
group by user_Id
output table:
+--------+---------+
| itung | user_Id |
+--------+---------+
| 1 | 13 |
| 2 | 14 |
+--------+---------+
基于此 table 我想通过此查询找出最大值 (itung)、最小值 (itung) 和中值 (itung)
select max(itung), min(itung), format(avg(itung), 2), IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',')
, ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), ROUND((CAST(SUBSTRING_INDEX(SUBSTRING_INDEX
( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100
* COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX
( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100
* COUNT(*)), ',', -1) AS DECIMAL)) / 2)) as median from
(select count(user_id) as itung, user_Id from
(SELECT t1.user_id,
t1.createdAt cretecompare1,
t2.createdAt cretecompare2,
DATEDIFF(t2.createdAt, t1.createdAt) diff
-- table for a transaction
FROM test t1
-- table for prev. transaction
JOIN test t2 ON t1.user_id = t2.user_id
AND t1.createdAt < t2.createdAt
AND 7 NOT IN (t1.status_id, t2.status_id)
JOIN (SELECT t3.user_id
FROM test t3
WHERE t3.status_id != 7
GROUP BY t3.user_id
HAVING SUM(t3.createdAt < '2020-04-01') > 1
AND SUM(t3.createdAt BETWEEN '2020-02-01' AND '2020-04-01')) t4 ON t1.user_id = t4.user_id
WHERE NOT EXISTS (SELECT NULL
FROM test t5
WHERE t1.user_id = t5.user_id
AND t5.status_id != 7
AND t1.createdAt < t5.createdAt
AND t5.createdAt < t2.createdAt)
HAViNG cretecompare2 BETWEEN '2020-02-01' AND '2020-04-01') aa
group by user_Id) ab
output table:
+------------+------------+-----------------------+--------+
| max(itung) | min(itung) | format(avg(itung), 2) | median |
+------------+------------+-----------------------+--------+
| 2 | 1 | 1.50 | 2 |
+------------+------------+-----------------------+--------+
你知道中位数查询是错误的,因为中位数应该是 1,5 而不是 2。我的中位数查询错在哪里?
您可以使用 ROUND() 将报告的中位数四舍五入为整数。如果您不想要它,请将其删除:
select max(itung), min(itung), format(avg(itung), 2), IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',') , ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), (CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL)) / 2) as median
或者在舍入后加上小数位舍入,这里是3:
select max(itung), min(itung), format(avg(itung), 2), IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',') , ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), ROUND((CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL)) / 2, 3)) as median
请注意,从所有值的 GROUP_CONCAT comma-separated 列表中查找中值仅在行数不多的情况下有效,因为 GROUP_CONCAT 将被截断@@group_concat_max_len,在 MySQL 或 10.2.
之前的 MariaDB 上默认为 1024 个字符
这是我的fiddlehttps://dbfiddle.uk/?rdbms=mysql_5.7&fiddle=7946871d9c25cd8914353c70fde1fe8d
所以这是我的查询 select count(user_id) as itung, user_Id 来自
(SELECT t1.user_id,
t1.createdAt cretecompare1,
t2.createdAt cretecompare2,
DATEDIFF(t2.createdAt, t1.createdAt) diff
-- table for a transaction
FROM test t1
-- table for prev. transaction
JOIN test t2 ON t1.user_id = t2.user_id
AND t1.createdAt < t2.createdAt
AND 7 NOT IN (t1.status_id, t2.status_id)
JOIN (SELECT t3.user_id
FROM test t3
WHERE t3.status_id != 7
GROUP BY t3.user_id
HAVING SUM(t3.createdAt < '2020-04-01') > 1
AND SUM(t3.createdAt BETWEEN '2020-02-01' AND '2020-04-01')) t4 ON t1.user_id = t4.user_id
WHERE NOT EXISTS (SELECT NULL
FROM test t5
WHERE t1.user_id = t5.user_id
AND t5.status_id != 7
AND t1.createdAt < t5.createdAt
AND t5.createdAt < t2.createdAt)
HAViNG cretecompare2 BETWEEN '2020-02-01' AND '2020-04-01') aa
group by user_Id
output table:
+--------+---------+
| itung | user_Id |
+--------+---------+
| 1 | 13 |
| 2 | 14 |
+--------+---------+
基于此 table 我想通过此查询找出最大值 (itung)、最小值 (itung) 和中值 (itung)
select max(itung), min(itung), format(avg(itung), 2), IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',')
, ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), ROUND((CAST(SUBSTRING_INDEX(SUBSTRING_INDEX
( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100
* COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX
( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100
* COUNT(*)), ',', -1) AS DECIMAL)) / 2)) as median from
(select count(user_id) as itung, user_Id from
(SELECT t1.user_id,
t1.createdAt cretecompare1,
t2.createdAt cretecompare2,
DATEDIFF(t2.createdAt, t1.createdAt) diff
-- table for a transaction
FROM test t1
-- table for prev. transaction
JOIN test t2 ON t1.user_id = t2.user_id
AND t1.createdAt < t2.createdAt
AND 7 NOT IN (t1.status_id, t2.status_id)
JOIN (SELECT t3.user_id
FROM test t3
WHERE t3.status_id != 7
GROUP BY t3.user_id
HAVING SUM(t3.createdAt < '2020-04-01') > 1
AND SUM(t3.createdAt BETWEEN '2020-02-01' AND '2020-04-01')) t4 ON t1.user_id = t4.user_id
WHERE NOT EXISTS (SELECT NULL
FROM test t5
WHERE t1.user_id = t5.user_id
AND t5.status_id != 7
AND t1.createdAt < t5.createdAt
AND t5.createdAt < t2.createdAt)
HAViNG cretecompare2 BETWEEN '2020-02-01' AND '2020-04-01') aa
group by user_Id) ab
output table:
+------------+------------+-----------------------+--------+
| max(itung) | min(itung) | format(avg(itung), 2) | median |
+------------+------------+-----------------------+--------+
| 2 | 1 | 1.50 | 2 |
+------------+------------+-----------------------+--------+
你知道中位数查询是错误的,因为中位数应该是 1,5 而不是 2。我的中位数查询错在哪里?
您可以使用 ROUND() 将报告的中位数四舍五入为整数。如果您不想要它,请将其删除:
select max(itung), min(itung), format(avg(itung), 2), IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',') , ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), (CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL)) / 2) as median
或者在舍入后加上小数位舍入,这里是3:
select max(itung), min(itung), format(avg(itung), 2), IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',') , ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), ROUND((CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL)) / 2, 3)) as median
请注意,从所有值的 GROUP_CONCAT comma-separated 列表中查找中值仅在行数不多的情况下有效,因为 GROUP_CONCAT 将被截断@@group_concat_max_len,在 MySQL 或 10.2.
之前的 MariaDB 上默认为 1024 个字符