今天无意中听到有同事在讨论,distinct和group by谁的速度会更快一点,意件不一,其实我也不知道那个好,下午有时间做了一下测试。
1,测试前的准备
//准备一张测试表 mysql> CREATE TABLE `test_test` ( -> `id` int(11) NOT NULL auto_increment, -> `num` int(11) NOT NULL default '0', -> PRIMARY KEY (`id`) -> ) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ; Query OK, 0 rows affected (0.05 sec) mysql> delimiter || //改变mysql命令结束符为|| //建个储存过程向表中插入10W条数据 mysql> create procedure p_test(pa int(11)) -> begin -> -> declare max_num int(11) default 100000; -> declare i int default 0; -> declare rand_num int; -> -> select count(id) into max_num from test_test; -> -> while i < pa do -> if max_num < 100000 then -> select cast(rand()*100 as unsigned) into rand_num; -> insert into test_test(num)values(rand_num); -> end if; -> set i = i +1; -> end while; -> end|| Query OK, 0 rows affected (0.00 sec) mysql> call p_test(100000)|| Query OK, 1 row affected (5.66 sec) mysql> delimiter ;//改变mysql命令结束符为; mysql> select count(id) from test_test; //数据都进去了 +-----------+ | count(id) | +-----------+ | 100000 | +-----------+ 1 row in set (0.00 sec) mysql> show variables like "%pro%"; //查看一下,记录执行的profiling是不是开启动了,默认是不开启的 +---------------------------+-------+ | Variable_name | Value | +---------------------------+-------+ | profiling | OFF | | profiling_history_size | 15 | | protocol_version | 10 | | slave_compressed_protocol | OFF | +---------------------------+-------+ 4 rows in set (0.00 sec) mysql> set profiling=1; //开启 Query OK, 0 rows affected (0.00 sec)
2,测试
//做了4组测试 mysql> select distinct(num) from test_test; mysql> select num from test_test group by num; mysql> show profiles; //查看结果 +----------+------------+-------------------------------------------+ | Query_ID | Duration | Query | +----------+------------+-------------------------------------------+ | 1 | 0.07298225 | select distinct(num) from test_test | | 2 | 0.07319975 | select num from test_test group by num | | 3 | 0.07313525 | select num from test_test group by num | | 4 | 0.07317725 | select distinct(num) from test_test | | 5 | 0.07275200 | select distinct(num) from test_test | | 6 | 0.07298600 | select num from test_test group by num | | 7 | 0.07500700 | select num from test_test group by num | | 8 | 0.07331325 | select distinct(num) from test_test | | 9 | 0.57831575 | create index num_index on test_test (num) | //在这儿的时候,我加了索引 | 10 | 0.00243550 | select distinct(num) from test_test | | 11 | 0.00121975 | select num from test_test group by num | | 12 | 0.00116550 | select distinct(num) from test_test | | 13 | 0.00107650 | select num from test_test group by num | +----------+------------+-------------------------------------------+ 13 rows in set (0.00 sec)
上面的1-8是4组数据,并且是没有加索引的,从中我们可以看出,distinct比group by 会好一点点
10-13是2组数据,是加了索引以后的,从中我们可以看出,group by 比distinct 会好一点点
一般情况,数据量比较大的表,关联字段都会加索引的,,并且加索引后检索时间只有以前的六分之一左右。
转载请注明
作者:海底苍鹰
地址:http://blog.51yip.com/mysql/1105.html
存储过程
DELIMITER $$
DROP PROCEDURE IF EXISTS `test`.`dis`$$
CREATE DEFINER=`root`@`localhost` PROCEDURE `dis`(num int(10))
begin
declare i int(10);
set i = 1;
while i <= num do
insert into dis_group (num) values (floor(rand()*100));
set i=i+1;
end while;
end$$
DELIMITER ;
| Query_ID | Duration | Query |
+----------+------------+------------------------------------------+
| 1 | 0.00099400 | show variables like "%pro%" |
| 2 | 0.09483400 | select distinct(num) from dis_group |
| 3 | 0.09505400 | select num from dis_group group by num |
| 4 | 0.00004600 | select num from dis_group group by num |
| 5 | 0.00004200 | select distinct(num) from dis_group |
| 6 | 0.00004400 | select distinct(num) from dis_group |
| 7 | 0.00004400 | select num from dis_group group by num |
| 8 | 0.00004700 | select distinct(num) from dis_group |
| 9 | 0.00004400 | select num from dis_group group by num |
| 10 | 1.00069100 | create index num_index on dis_group(num) |
| 11 | 0.00362300 | select num from dis_group group by num |
| 12 | 0.00200000 | select distinct(num) from dis_group |
| 13 | 0.00091000 | select distinct(num) from dis_group |
| 14 | 0.00098700 | select num from dis_group group by num |
| 15 | 0.00090600 | select num from dis_group group by num
我的结果中,没加索引前速度更快。估计我的机子太烂了~~
索引就好比是一本书的目录,如果这本书很厚的话,找起东西来比较麻烦的,比如汉语字典,找字的时候,我们会先去找目录,因为这样找比较快一点。如果一本书,只有十几页,这个时候,我们找东西,根本不要去看目录了,因为这样更浪费时间。
从你贴出来的数据看,你测试的数据表里面,数据量比较小,这个时候加索引反而会影响速度的。