批量更新大表

时间:2021-12-13 14:51:20

批量表更新,一下是几种,如果小表几万行左右直接sql即可

一,create/rename table
SQL> drop table t_update_test;

表已删除。

SQL> create table t_update_test as select * from dba_objects where 1=0;

表已创建。

SQL> set timing on;
SQL> insert /* append nologging */ into t_update_test(object_name,object_id,data
_object_id) select object_name,data_object_id+1,data_object_id from dba_objects;


已创建135248行。

已用时间: 00: 00: 00.77
SQL> commit;

提交完成。

已用时间: 00: 00: 00.01
SQL> alter table t_update_test rename to t_test;

表已更改。

已用时间: 00: 00: 00.33
最后创建约束和索引即可

二,forall
declare
type rowid_list_type is table of urowid index by binary_integer;
rowid_list rowid_list_type;
inumber;
cursorc_rowids is select rowid from t_test;
begin
open c_rowids;
loop
fetch c_rowids bulk collect into rowid_list limit 2000;
forall i in 1..rowid_list.count
update t_test set object_id=object_id+1 where rowid=rowid_list(i);
commit;
exit when rowid_list.count<2000;
end loop;
close c_rowids;
end;
/
1 declare
2 type rowid_list_type is table of urowid index by binary_integer;
3 rowid_list rowid_list_type;
4 i number;
5 cursor c_rowids is select rowid from t_test;
6 begin
7 open c_rowids;
8 loop
9 fetch c_rowids bulk collect into rowid_list limit 2000;
10 forall i in 1..rowid_list.count
11 update t_test set object_id=object_id+1 where rowid=rowid_list(i
);
12 commit;
13 exit when rowid_list.count<2000;
14 end loop;
15 close c_rowids;
16* end;
SQL> /

PL/SQL 过程已成功完成。

已用时间: 00: 00: 06.66

三,使用DBMS_PARALLEL_EXECUTE
--1,环境准备
SQL> create table t_update_test as select * from dba_objects;

表已创建。

SQL> select count(1) from dba_objects;

COUNT(1)
----------
135287

--2,测试
DECLARE
l_sql VARCHAR2(1000);
l_try int;
l_status NUMBER;
BEGIN

-- Create the TASK
DBMS_PARALLEL_EXECUTE.CREATE_TASK ('mytask');

-- Chunk the table by ROWID
DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_ROWID('mytask', 'SYSTEM', 'T_UPDATE_TEST', true, 100);

-- Execute the DML in parallel
l_sql := 'update /*+ ROWID (dda) */ T_UPDATE_TEST t
SET t.object_id = t.data_object_id + 1
WHERE rowid BETWEEN :start_id AND :end_id';
DBMS_PARALLEL_EXECUTE.RUN_TASK('mytask', l_sql, DBMS_SQL.NATIVE,
parallel_level => 10);

-- If there is an error, RESUME it for at most 2 times.
L_try := 0;
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('mytask');
WHILE(l_try < 2 and L_status != DBMS_PARALLEL_EXECUTE.FINISHED)
LOOP
L_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.RESUME_TASK('mytask');
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('mytask');
END LOOP;

-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.DROP_TASK('mytask');

END;
/
-- 查看速度
1 DECLARE
2 l_sql VARCHAR2(1000);
3 l_try int;
4 l_status NUMBER;
5 BEGIN
6 -- Create the TASK
7 DBMS_PARALLEL_EXECUTE.CREATE_TASK ('mytask');
8 -- Chunk the table by ROWID
9 DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_ROWID('mytask', 'SYSTEM', 'T_UPDAT
E_TEST', true, 100);
10 -- Execute the DML in parallel
11 l_sql := 'update /*+ ROWID (dda) */ T_UPDATE_TEST t
12 SET t.object_id = t.data_object_id + 1
13 WHERE rowid BETWEEN :start_id AND :end_id';
14 DBMS_PARALLEL_EXECUTE.RUN_TASK('mytask', l_sql, DBMS_SQL.NATIVE,
15 parallel_level => 10);
16 -- If there is an error, RESUME it for at most 2 times.
17 L_try := 0;
18 L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('mytask');
19 WHILE(l_try < 2 and L_status != DBMS_PARALLEL_EXECUTE.FINISHED)
20 LOOP
21 L_try := l_try + 1;
22 DBMS_PARALLEL_EXECUTE.RESUME_TASK('mytask');
23 L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('mytask');
24 END LOOP;
25 -- Done with processing; drop the task
26 DBMS_PARALLEL_EXECUTE.DROP_TASK('mytask');
27* END;
28 /

PL/SQL 过程已成功完成。

已用时间: 00: 00: 03.72

--3 查看查看Task执行情况和各个chunk执行完成情况。:
SQL> select * from user_parallel_execute_chunks;
SQL> select * from user_parallel_execute_tasks;
SQL> select object_id,data_object_id from t_update_test where rownum<=5;

OBJECT_ID DATA_OBJECT_ID
---------- --------------
3 2
47 46
29 28
16 15
30 29
四,结论
第一种create/rename table方式使用nologging 和append生成的redo log不足以进行还原,数据库不能连续还原,这样需要操作之后立即备份
数据库,这种方式适合运维人员操作,这种方法处理G级别数据还是很快的
forall速度比不上第一种,并且产生大量日志,另外批量适合在几万条,我限制为2000
第三种DBMS_PARALLEL_EXECUTE将工作数据集合分割为若干chunk,之后启动多个后台job进行工作可以使用rowid,column_value,sql,rowid通用型强些
这个方法在处理海量数据时候优势明显