如何优化大数据表的LEFTJOIN

时间:2021-01-17 14:32:09
有两个表 分别是A和B  A表是字典表有2500条记录 B表是日志表每天有300万记录.
现在从A表取出700条记录 去跟B表匹配 主要是检查这700条记录可有日志.通过4个ID来匹配.

B表量大的话要130分钟,小的话也要20分钟. 我想提高速度 把B表按时间来划分6个子部分
with tmp as (select * from  a where xxxx=xxx)
select a.*
from tmp a
left join
(
 select * from b 
 where inserttime >= trunc(sysdate)+0/24
   and inserttime  < trunc(sysdate)+4/24
) b on 
where b.id is null

union all

select a.*
from tmp a
left join
(
 select * from b 
 where inserttime >= trunc(sysdate)+4/24
   and inserttime  < trunc(sysdate)+8/24
) b on 
where b.id is null


性能也没提供很多跟使用一条语句相比.我把它分成24个部分也没提高

请问各位想这样有何提供呢? 只能有SQL语句 ,不能使用PL/SQL 因为它使用在其他程序中

5 个解决方案

#1


1、你这样拆分不会提搞性能
2、相关条件列上有没建立索引
3、修改成not exists试一下
select * from a where exists(
select 1 from b where b.xx=a.xx and ...)
4、贴出相关的执行计划看一下 

#2


是not exists
select * from a where  not exists(
select 1 from b where b.xx=a.xx and ...)

#3


inserttime 的有索引

#4


SELECT STATEMENT, GOAL = ALL_ROWS		Cost=90	CPU cost=252284361	IO cost=61	Cardinality=7716	Bytes=100308	
 TEMP TABLE TRANSFORMATION
  LOAD AS SELECT
   TABLE ACCESS FULL Object name=ETL_RULE Cost=37 CPU cost=207190251 IO cost=13 Cardinality=1286 Bytes=136316
  FILTER
   HASH GROUP BY Cost=53 CPU cost=45094111 IO cost=48 Cardinality=7716 Bytes=100308
    VIEW Cost=52 CPU cost=32037059 IO cost=48 Cardinality=7716 Bytes=100308
     UNION-ALL
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316

#5


      select * 
        from tmp_actionid_lst a
        where not exists
        (
         select 1
         from lars01.OP_MAIL_VISIT_03_1105@oss139.localdomain   b
         where  a.action_id=b.actionid and a.opcontent_id=b.opcontentid
          and   b.inserttime >=trunc(to_date('20110510000000','yyyymmddhh24miss'))
          and   b.inserttime <trunc(to_date('20110510000000','yyyymmddhh24miss'))+1
         )


这个不错

#1


1、你这样拆分不会提搞性能
2、相关条件列上有没建立索引
3、修改成not exists试一下
select * from a where exists(
select 1 from b where b.xx=a.xx and ...)
4、贴出相关的执行计划看一下 

#2


是not exists
select * from a where  not exists(
select 1 from b where b.xx=a.xx and ...)

#3


inserttime 的有索引

#4


SELECT STATEMENT, GOAL = ALL_ROWS		Cost=90	CPU cost=252284361	IO cost=61	Cardinality=7716	Bytes=100308	
 TEMP TABLE TRANSFORMATION
  LOAD AS SELECT
   TABLE ACCESS FULL Object name=ETL_RULE Cost=37 CPU cost=207190251 IO cost=13 Cardinality=1286 Bytes=136316
  FILTER
   HASH GROUP BY Cost=53 CPU cost=45094111 IO cost=48 Cardinality=7716 Bytes=100308
    VIEW Cost=52 CPU cost=32037059 IO cost=48 Cardinality=7716 Bytes=100308
     UNION-ALL
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316
      FILTER
       HASH JOIN RIGHT OUTER Cost=9 CPU cost=5339510 IO cost=8 Cardinality=1286 Bytes=111882
        TABLE ACCESS FULL Object name=OP_MAIL_VISIT_17_1104 Cost=2 CPU cost=7121 IO cost=2 Cardinality=1 Bytes=48
        VIEW Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=50154
         TABLE ACCESS FULL Object name=SYS_TEMP_0FD9D660B_D73396C9 Cost=6 CPU cost=919767 IO cost=6 Cardinality=1286 Bytes=136316

#5


      select * 
        from tmp_actionid_lst a
        where not exists
        (
         select 1
         from lars01.OP_MAIL_VISIT_03_1105@oss139.localdomain   b
         where  a.action_id=b.actionid and a.opcontent_id=b.opcontentid
          and   b.inserttime >=trunc(to_date('20110510000000','yyyymmddhh24miss'))
          and   b.inserttime <trunc(to_date('20110510000000','yyyymmddhh24miss'))+1
         )


这个不错