遇到一个关于优化问题,大数据量的数据 in 和 exists 的区别
特地在自己机器上实验了一把
1.数据库版本
Oracle Database 11g Enterprise Edition Release 11.2.0.1.0
2.表
SCOTT.EMP
SCOTT.DEPT
--SCOTT.DEPT.DEPTNO是SCOTT.EMP.DEPTNO的外键
CREATE TABLE SCOTT.EMP1 AS SELECT * FROM SCOTT.EMP
CREATE TABLE SCOTT.DEPT1 AS SELECT * FROM SCOTT.DEPT
3.执行计划统计
select * from table(dbms_xplan.display_cursor('sql_id'))
--------------------------------------------------------------------------------------------------------------
1.RBO+主外键关系的执行计划
select /*+ rule */* from emp where emp.deptno in (select deptno from dept) --6sk9v0wuwzbct
-=====================================================SQL_ID 6sk9v0wuwzbct, child number 0-------------------------------------select /*+ rule */* from emp where emp.deptno in (select deptno from dept) Plan hash value: 3956160932 ----------------------------------| Id | Operation | Name |----------------------------------| 0 | SELECT STATEMENT | ||* 1 | TABLE ACCESS FULL| EMP |---------------------------------- Predicate Information (identified by operation id):--------------------------------------------------- 1 - filter("EMP"."DEPTNO" IS NOT NULL) Note----- - rule based optimizer used (consider using cbo)
/*
基于规则的优化器如果存在主外键的时候会忽略掉dept表的扫描
*/
2.主外键 + RBO 的主外键exists操作
select /*+ rule */* from emp where exists (select 1 from dept where dept.deptno=emp.deptno) --1b9t1auw0zpnc
--===============================================================
SQL_ID 1b9t1auw0zpnc, child number 0
-------------------------------------
select /*+ rule */* from emp where exists (select 1 from dept where
dept.deptno=emp.deptno)
Plan hash value: 1783302997
--------------------------------------
| Id | Operation | Name |
--------------------------------------
| 0 | SELECT STATEMENT | |
|* 1 | FILTER | |
| 2 | TABLE ACCESS FULL| EMP |
|* 3 | INDEX UNIQUE SCAN| PK_DEPT |
--------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter( IS NOT NULL)
3 - access("DEPT"."DEPTNO"=:B1)
Note
-----
- rule based optimizer used (consider using cbo)
/*
可以看到比in写法多了一次扫描
*/
3.无索引+RBO in操作的执行计划
select /*+ rule */* from emp1 where deptno in (select deptno from dept1) --fwtdq5hj1w5v7
--===================================================
SQL_ID fwtdq5hj1w5v7, child number 0
-------------------------------------
select /*+ rule */* from emp1 where deptno in (select deptno from dept1)
Plan hash value: 1261695393
------------------------------------------
| Id | Operation | Name |
------------------------------------------
| 0 | SELECT STATEMENT | |
| 1 | MERGE JOIN | |
| 2 | SORT JOIN | |
| 3 | TABLE ACCESS FULL | EMP1 |
|* 4 | SORT JOIN | |
| 5 | VIEW | VW_NSO_1 |
| 6 | SORT UNIQUE | |
| 7 | TABLE ACCESS FULL| DEPT1 |
------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
4 - access("DEPTNO"="DEPTNO")
filter("DEPTNO"="DEPTNO")
Note
-----
- rule based optimizer used (consider using cbo)
/*
就这个sql来看,是先把in中的子查询结果集计算完成并取唯一,然后在与EMP1表排序合并连接
会有大量的排序时间
*/
4.无索引+RBO exists操作执行计划
select /*+ rule */* from emp1 where exists (select 1 from dept1 where dept1.deptno=emp1.deptno) --7cd23zjzhmyja
SQL_ID 7cd23zjzhmyja, child number 0
-------------------------------------
select /*+ rule */* from emp1 where exists (select 1 from dept1 where
dept1.deptno=emp1.deptno)
Plan hash value: 4169263234
------------------------------------
| Id | Operation | Name |
------------------------------------
| 0 | SELECT STATEMENT | |
|* 1 | FILTER | |
| 2 | TABLE ACCESS FULL| EMP1 |
|* 3 | TABLE ACCESS FULL| DEPT1 |
------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter( IS NOT NULL)
3 - filter("DEPT1"."DEPTNO"=:B1)
Note
-----
- rule based optimizer used (consider using cbo)
/*
会用emp1的值去匹配dept1中是否有此值,对dept1会有大量的扫描
dept1 的deptno 字段有索引+ emp1的值比较少 适合用此方法
*/
5.存在主外键+CBO in操作的执行计划
select * from emp where emp.deptno in (select deptno from dept) --9hs3bpna0nun4
--============================================
SQL_ID 9hs3bpna0nun4, child number 0
-------------------------------------
select * from emp where emp.deptno in (select deptno from dept)
Plan hash value: 3956160932
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 3 (100)| |
|* 1 | TABLE ACCESS FULL| EMP | 14 | 532 | 3 (0)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("EMP"."DEPTNO" IS NOT NULL)
/*
类似 CBO 下in操作
*/
6.CBO+主外键的 exists 操作
select * from emp where exists (select 1 from dept where dept.deptno=emp.deptno) --6t5nqxmnu0sa5
--========================================================
SQL_ID 6t5nqxmnu0sa5, child number 0
-------------------------------------
select * from emp where exists (select 1 from dept where
dept.deptno=emp.deptno)
Plan hash value: 3956160932
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 3 (100)| |
|* 1 | TABLE ACCESS FULL| EMP | 14 | 532 | 3 (0)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("EMP"."DEPTNO" IS NOT NULL)
/*
</pre><pre name="code" class="sql">*/
7.CBO+无索引 in操作
select * from emp1 where deptno in (select deptno from dept1) --d4gg9u75dsdd6
--==============================================================
SQL_ID d4gg9u75dsdd6, child number 0
-------------------------------------
select * from emp1 where deptno in (select deptno from dept1)
Plan hash value: 1645276872
----------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 7 (100)| |
|* 1 | HASH JOIN SEMI | | 14 | 1400 | 7 (15)| 00:00:01 |
| 2 | TABLE ACCESS FULL| EMP1 | 14 | 1218 | 3 (0)| 00:00:01 |
| 3 | TABLE ACCESS FULL| DEPT1 | 4 | 52 | 3 (0)| 00:00:01 |
----------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("DEPTNO"="DEPTNO")
Note
-----
- dynamic sampling used for this statement (level=2)
/*
</pre><pre name="code" class="sql">*/
8.无索引+CBO exists操作
select * from emp1 where exists (select 1 from dept1 where dept1.deptno=emp1.deptno) --5cxf3nqy3rctb
--==========================================================================
SQL_ID 5cxf3nqy3rctb, child number 0
-------------------------------------
select * from emp1 where exists (select 1 from dept1 where
dept1.deptno=emp1.deptno)
Plan hash value: 1645276872
----------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 7 (100)| |
|* 1 | HASH JOIN SEMI | | 14 | 1400 | 7 (15)| 00:00:01 |
| 2 | TABLE ACCESS FULL| EMP1 | 14 | 1218 | 3 (0)| 00:00:01 |
| 3 | TABLE ACCESS FULL| DEPT1 | 4 | 52 | 3 (0)| 00:00:01 |
----------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("DEPT1"."DEPTNO"="EMP1"."DEPTNO")
Note
-----
- dynamic sampling used for this statement (level=2)
--创建索引
CREATE INDEX EMP1_DEPTNO ON EMP1(DEPTNO)
CREATE INDEX DEPT1_DEPTNO ON DEPT1(DEPTNO)
9.有索引+RBO in操作
select /* +RULE */* from emp1 where emp1.deptno in (select deptno from dept1) --38jnq9n32jjt4
---=====================================================
SQL_ID 38jnq9n32jjt4, child number 0
-------------------------------------
select /* +RULE */* from emp1 where emp1.deptno in (select deptno from
dept1)
Plan hash value: 4046719091
-----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 3 (100)| |
| 1 | NESTED LOOPS SEMI | | 14 | 1400 | 3 (0)| 00:00:01 |
| 2 | TABLE ACCESS FULL| EMP1 | 14 | 1218 | 3 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | DEPT1_DEPTNO | 4 | 52 | 0 (0)| |
-----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("EMP1"."DEPTNO"="DEPTNO")
Note
-----
- dynamic sampling used for this statement (level=2)
/*
走的嵌套循环联结
*/
10.有索引+RBO exists操作
select /* +RULE */* from emp1 where exists (select 1 from dept1 where dept1.deptno=emp1.deptno) --2uajdy6rsmgdu
--==========================================================
SQL_ID 2uajdy6rsmgdu, child number 0
-------------------------------------
select /* +RULE */* from emp1 where exists (select 1 from dept1 where
dept1.deptno=emp1.deptno)
Plan hash value: 4046719091
-----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 3 (100)| |
| 1 | NESTED LOOPS SEMI | | 14 | 1400 | 3 (0)| 00:00:01 |
| 2 | TABLE ACCESS FULL| EMP1 | 14 | 1218 | 3 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | DEPT1_DEPTNO | 4 | 52 | 0 (0)| |
-----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("DEPT1"."DEPTNO"="EMP1"."DEPTNO")
Note
-----
- dynamic sampling used for this statement (level=2)
</pre>11.有索引+CBO in操作<p></p><p></p><pre name="code" class="sql"> select /* CHENJ */* from emp1 where emp1.deptno in (select deptno from dept1) --0zztfa0s5agkh SQL_ID 0zztfa0s5agkh, child number 0-------------------------------------select /* CHENJ */* from emp1 where emp1.deptno in (select deptno from dept1) Plan hash value: 4046719091 -----------------------------------------------------------------------------------| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |-----------------------------------------------------------------------------------| 0 | SELECT STATEMENT | | | | 3 (100)| || 1 | NESTED LOOPS SEMI | | 14 | 1400 | 3 (0)| 00:00:01 || 2 | TABLE ACCESS FULL| EMP1 | 14 | 1218 | 3 (0)| 00:00:01 ||* 3 | INDEX RANGE SCAN | DEPT1_DEPTNO | 4 | 52 | 0 (0)| |----------------------------------------------------------------------------------- Predicate Information (identified by operation id):--------------------------------------------------- 3 - access("EMP1"."DEPTNO"="DEPTNO") Note----- - dynamic sampling used for this statement (level=2)
12.有索引+CBO exists操作
select /* CHENJ */* from emp1 where exists (select 1 from dept1 where dept1.deptno=emp1.deptno) --gq143uhy91jy6
SQL_ID gq143uhy91jy6, child number 0
-------------------------------------
select /* CHENJ */* from emp1 where exists (select 1 from dept1 where
dept1.deptno=emp1.deptno)
Plan hash value: 4046719091
-----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 3 (100)| |
| 1 | NESTED LOOPS SEMI | | 14 | 1400 | 3 (0)| 00:00:01 |
| 2 | TABLE ACCESS FULL| EMP1 | 14 | 1218 | 3 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | DEPT1_DEPTNO | 4 | 52 | 0 (0)| |
-----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("DEPT1"."DEPTNO"="EMP1"."DEPTNO")
Note
-----
- dynamic sampling used for this statement (level=2)
不完全结论:测试数据+一些条件限制可能不完善,但是初步结论用CBO优化器时,in 与 exists 最后的执行计划并无太大区别
RBO优化器时,in 与 exists 要看具体应用场景了