MysqL 大 table 查询优化
MysqL big table query optimization
我有一个聊天应用程序。我有一个 api 的 returns 用户列表,其中包含与用户交谈过的用户。但是mysqlreturn一个list messages达到100000行数据需要很长时间。
这是我的留言table
CREATE TABLE IF NOT EXISTS `messages` (
`_id` int(11) NOT NULL AUTO_INCREMENT,
`fromid` int(11) NOT NULL,
`toid` int(11) NOT NULL,
`message` text NOT NULL,
`attachments` text NOT NULL,
`status` tinyint(1) NOT NULL DEFAULT '0',
`date` datetime NOT NULL,
`delete` varchar(50) NOT NULL,
`uuid_read` varchar(250) NOT NULL,
PRIMARY KEY (`_id`),
KEY `fromid` (`fromid`,`toid`,`status`,`delete`,`uuid_read`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=118561 ;
这是我的用户 table(简体)
CREATE TABLE IF NOT EXISTS `users` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`login` varchar(50) DEFAULT '',
`sex` tinyint(1) DEFAULT '0',
`status` varchar(255) DEFAULT '',
`avatar` varchar(30) DEFAULT '0',
`last_active` datetime DEFAULT NULL,
`active` tinyint(1) DEFAULT '1',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=15523 ;
这是我的查询(针对 ID 为 1930 的用户)
select SQL_CALC_FOUND_ROWS `u_id`, `id`, `login`, `sex`, `birthdate`, `avatar`, `online_status`, SUM(`count`) as `count`, SUM(`nr_count`) as `nr_count`, `date`, `last_mesg` from
(
(select `m`.`fromid` as `u_id`, `u`.`id`, `u`.`login`, `u`.`sex`, `u`.`birthdate`, `u`.`avatar`, `u`.`last_active` as online_status, COUNT(`m`.`_id`) as `count`, (COUNT(`m`.`_id`)-SUM(`m`.`status`)) as `nr_count`, `tm`.`date` as `date`, `tm`.`message` as `last_mesg` from `messages` as m inner join `messages` as tm on `tm`.`_id`=(select MAX(`_id`) from `messages` as `tmz` where `tmz`.`fromid`=`m`.`fromid`) left join `users` as u on `u`.`id`=`m`.`fromid` where `m`.`toid`=1930 and `m`.`delete` not like '%1930;%' group by `u`.`id`)
UNION
(select `m`.toid as `u_id`, `u`.`id`, `u`.`login`, `u`.`sex`, `u`.`birthdate`, `u`.`avatar`, `u`.`last_active` as online_status, COUNT(`m`.`_id`) as `count`, 0 as `nr_count`, `tm`.`date` as `date`, `tm`.`message` as `last_mesg` from `messages` as m inner join `messages` as tm on `tm`.`_id`=(select MAX(`_id`) from `messages` as `tmz` where `tmz`.`toid`=`m`.`toid`) left join `users` as u on `u`.`id`=`m`.`toid` where `m`.`fromid`=1930 and `m`.`delete` not like '%1930;%' group by `u`.`id`)
order by `date` desc ) as `f` group by `u_id` order by `date` desc limit 0,10
请帮助优化此查询
我需要的,
用户与谁交谈(姓名、性别等)
最后一条消息是什么(来自我或给我的)
消息计数(全部)
未读消息数(仅限我)
查询运行良好,但耗时过长。
输出一定是这样的
您的查询和数据库存在一些设计问题。
- 您应该避免将关键字作为列名,如
delete
列或 count
列;
- 您应该避免选择未在
group by
中声明且没有聚合函数的列...尽管 MySQL 允许这样做,但这不是标准,您无法控制哪些数据将被选中;
- 您的
not like
构造可能会导致您的查询出现不良行为,因为 '%1930;%'
可能匹配 11930;
而 11930 不等于 1930;
- 您应该避免
like
以 %
通配符开头和结尾的构造,这将导致文本处理时间更长;
- 您应该设计一种更好的方式来表示消息删除,可能是一个更好的标记 and/or 另一个 table 来保存与该操作相关的任何重要数据;
- 尝试
limit
你的结果在加入条件之前(用派生table)执行更少的处理;
我尝试以我理解的最佳方式重写您的查询。我在消息 table 中执行了我的查询,其中包含 ~200.000 行且没有索引,并且在 0.15 秒内执行。但是,您肯定应该创建正确的索引以帮助它在数据量增加时更好地执行。
SELECT SQL_CALC_FOUND_ROWS
u.id,
u.login,
u.sex,
u.birthdate,
u.avatar,
u.last_active AS online_status,
g._count,
CASE WHEN m.toid = 1930
THEN g.nr_count
ELSE 0
END AS nr_count,
m.`date`,
m.message AS last_mesg
FROM
(
SELECT
MAX(_id) AS _id,
COUNT(*) AS _count,
COUNT(*) - SUM(m.status) AS nr_count
FROM messages m
WHERE 1=1
AND m.`delete` NOT LIKE '%1930;%'
AND
(0=1
OR m.fromid = 1930
OR m.toid = 1930
)
GROUP BY
CASE WHEN m.fromid = 1930
THEN m.toid
ELSE m.fromid
END
ORDER BY MAX(`date`) DESC
LIMIT 0, 10
) g
INNER JOIN messages AS m ON 1=1
AND m._id = g._id
LEFT JOIN users AS u ON 0=1
OR (m.fromid <> 1930 AND u.id = m.fromid)
OR (m.toid <> 1930 AND u.id = m.toid)
ORDER BY m.`date` DESC
;
我有一个聊天应用程序。我有一个 api 的 returns 用户列表,其中包含与用户交谈过的用户。但是mysqlreturn一个list messages达到100000行数据需要很长时间。 这是我的留言table
CREATE TABLE IF NOT EXISTS `messages` (
`_id` int(11) NOT NULL AUTO_INCREMENT,
`fromid` int(11) NOT NULL,
`toid` int(11) NOT NULL,
`message` text NOT NULL,
`attachments` text NOT NULL,
`status` tinyint(1) NOT NULL DEFAULT '0',
`date` datetime NOT NULL,
`delete` varchar(50) NOT NULL,
`uuid_read` varchar(250) NOT NULL,
PRIMARY KEY (`_id`),
KEY `fromid` (`fromid`,`toid`,`status`,`delete`,`uuid_read`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=118561 ;
这是我的用户 table(简体)
CREATE TABLE IF NOT EXISTS `users` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`login` varchar(50) DEFAULT '',
`sex` tinyint(1) DEFAULT '0',
`status` varchar(255) DEFAULT '',
`avatar` varchar(30) DEFAULT '0',
`last_active` datetime DEFAULT NULL,
`active` tinyint(1) DEFAULT '1',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=15523 ;
这是我的查询(针对 ID 为 1930 的用户)
select SQL_CALC_FOUND_ROWS `u_id`, `id`, `login`, `sex`, `birthdate`, `avatar`, `online_status`, SUM(`count`) as `count`, SUM(`nr_count`) as `nr_count`, `date`, `last_mesg` from
(
(select `m`.`fromid` as `u_id`, `u`.`id`, `u`.`login`, `u`.`sex`, `u`.`birthdate`, `u`.`avatar`, `u`.`last_active` as online_status, COUNT(`m`.`_id`) as `count`, (COUNT(`m`.`_id`)-SUM(`m`.`status`)) as `nr_count`, `tm`.`date` as `date`, `tm`.`message` as `last_mesg` from `messages` as m inner join `messages` as tm on `tm`.`_id`=(select MAX(`_id`) from `messages` as `tmz` where `tmz`.`fromid`=`m`.`fromid`) left join `users` as u on `u`.`id`=`m`.`fromid` where `m`.`toid`=1930 and `m`.`delete` not like '%1930;%' group by `u`.`id`)
UNION
(select `m`.toid as `u_id`, `u`.`id`, `u`.`login`, `u`.`sex`, `u`.`birthdate`, `u`.`avatar`, `u`.`last_active` as online_status, COUNT(`m`.`_id`) as `count`, 0 as `nr_count`, `tm`.`date` as `date`, `tm`.`message` as `last_mesg` from `messages` as m inner join `messages` as tm on `tm`.`_id`=(select MAX(`_id`) from `messages` as `tmz` where `tmz`.`toid`=`m`.`toid`) left join `users` as u on `u`.`id`=`m`.`toid` where `m`.`fromid`=1930 and `m`.`delete` not like '%1930;%' group by `u`.`id`)
order by `date` desc ) as `f` group by `u_id` order by `date` desc limit 0,10
请帮助优化此查询
我需要的, 用户与谁交谈(姓名、性别等) 最后一条消息是什么(来自我或给我的) 消息计数(全部) 未读消息数(仅限我)
查询运行良好,但耗时过长。
输出一定是这样的
您的查询和数据库存在一些设计问题。
- 您应该避免将关键字作为列名,如
delete
列或count
列; - 您应该避免选择未在
group by
中声明且没有聚合函数的列...尽管 MySQL 允许这样做,但这不是标准,您无法控制哪些数据将被选中; - 您的
not like
构造可能会导致您的查询出现不良行为,因为'%1930;%'
可能匹配11930;
而 11930 不等于 1930; - 您应该避免
like
以%
通配符开头和结尾的构造,这将导致文本处理时间更长; - 您应该设计一种更好的方式来表示消息删除,可能是一个更好的标记 and/or 另一个 table 来保存与该操作相关的任何重要数据;
- 尝试
limit
你的结果在加入条件之前(用派生table)执行更少的处理;
我尝试以我理解的最佳方式重写您的查询。我在消息 table 中执行了我的查询,其中包含 ~200.000 行且没有索引,并且在 0.15 秒内执行。但是,您肯定应该创建正确的索引以帮助它在数据量增加时更好地执行。
SELECT SQL_CALC_FOUND_ROWS
u.id,
u.login,
u.sex,
u.birthdate,
u.avatar,
u.last_active AS online_status,
g._count,
CASE WHEN m.toid = 1930
THEN g.nr_count
ELSE 0
END AS nr_count,
m.`date`,
m.message AS last_mesg
FROM
(
SELECT
MAX(_id) AS _id,
COUNT(*) AS _count,
COUNT(*) - SUM(m.status) AS nr_count
FROM messages m
WHERE 1=1
AND m.`delete` NOT LIKE '%1930;%'
AND
(0=1
OR m.fromid = 1930
OR m.toid = 1930
)
GROUP BY
CASE WHEN m.fromid = 1930
THEN m.toid
ELSE m.fromid
END
ORDER BY MAX(`date`) DESC
LIMIT 0, 10
) g
INNER JOIN messages AS m ON 1=1
AND m._id = g._id
LEFT JOIN users AS u ON 0=1
OR (m.fromid <> 1930 AND u.id = m.fromid)
OR (m.toid <> 1930 AND u.id = m.toid)
ORDER BY m.`date` DESC
;