1.创建常规的企业信息表
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
create table t_centerprises(
objectid bigint not null , /*唯一编号(6位行政区号+6位sn)*/
divid uuid not null , /*行政区唯一代码*/
name text not null , /*企业名称*/
address text not null , /*企业地址*/
post text, /*企业邮编*/
contacts text, /*联系人*/
tel text, /*联系电话*/
fax text, /*传真*/
describe text, /*企业备注*/
date timestamp default now() not null , /*创建日期*/
constraint pk_centerprisess_objectid primary key (objectid),
constraint fk_centerprises_divid foreign key (divid) references ts_divisions(objectid) on delete cascade
);
create index idx_centerprises_divid on t_centerprises(divid);
|
2.需要使用的函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
/*转换16进制到字符*/
drop function if exists hex_to_string(text);
create or replace function hex_to_string( text)
returns text as
$$
declare
result text;
begin
execute 'select U&' '\' || $1 || ' '' ' INTO result;
return result;
end;
$$ language plpgsql;
/*随机生成汉字
汉字范围U+4E00..U+9FA5
*/
drop function if exists gen_random_zh(int,int);
create or replace function gen_random_zh(imin int,imax int)
returns text as
$$
declare
vlen integer;
result text;
begin
result := ' ';
vlen = floor(random()*(imax-imin)+imin);
for i in 1..vlen loop
result := result || hex_to_string(to_hex(floor(random()*(42191-19968)+19968):: integer ));
end loop;
return result;
end ;
$$ language plpgsql;
|
1
2
3
4
5
6
7
|
insert into t_centerprises(objectid,divid, name ,address,post,contacts,tel,fax,describe)
select (vdivid|| lpad(id::text,6, '0' )):: bigint as objectid, '110101' ,
gen_random_zh(5,25) as name ,gen_random_zh(10,50) as address,
floor(random()*(699999-600000)+600000) as post,gen_random_zh(2,8) as contacts,
floor(random()*(69999999-60000000)+60000000) as tel,floor(random()*(69999999-60000000)+60000000) as fax,
gen_random_zh(32,128) as describe
from generate_series(1,5000000) as id;
|
在普通pc机上插入,大概完成时间约8小时,过程不可监控,并且cpu/内存占用率高,磁盘基本满负荷动作,读写率基本上都是100%.
4.改进后的方法, 插入(10000000条)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
do $$
declare vStart bigint ;
declare vEnd bigint ;
declare MAXVALE bigint ;
declare INTERVAL bigint ;
declare vprovince integer ;
declare vprefecture integer ;
declare vcounty integer ;
declare vdivid text;
declare vdividex uuid;
begin
vprovince := 10;vprefecture := 1;vcounty := 1;
MAXVALE := 1000000;
INTERVAL := 1000; vStart := 1 ;vEnd := INTERVAL;
vdivid := (lpad(vprovince::text,2, '0' ) || lpad(vprefecture::text,2, '0' ) || lpad(vcounty::text,2, '0' ))::text;
vdividex := ( select objectid from ts_divisions where province=vprovince and prefecture=vprefecture and county=vcounty);
loop
insert into t_centerprises(objectid,divid, name ,address,post,contacts,tel,fax,describe)
select (vdivid|| lpad(id::text,6, '0' )):: bigint as objectid,vdividex as divid,
gen_random_zh(5,25) as name ,gen_random_zh(10,50) as address,
floor(random()*(699999-600000)+600000) as post,gen_random_zh(2,8) as contacts,
floor(random()*(69999999-60000000)+60000000) as tel,floor(random()*(69999999-60000000)+60000000) as fax,
gen_random_zh(32,128) as describe
from generate_series(vStart,vEnd) as id;
raise notice '%' , vEnd;
vStart := vEnd + 1; vEnd := vEnd + INTERVAL;
if( vEnd > MAXVALE ) then
return ;
elsif(vEnd = MAXVALE) then
vEnd := vEnd - 1;
end if;
end loop;
end $$;
|
因为运算原因, cpu/内存占用率仍然很高, 硬盘负荷较小,读写率也比较低,大概完成时间约1.5小时.
补充:postgreSQL数据库 向表中快速插入1000000条数据
不用创建函数,直接向表中快速插入1000000条数据
1
2
3
|
create table tbl_test (id int , info text, c_time timestamp );
insert into tbl_test select generate_series(1,100000),md5(random()::text),clock_timestamp();
select count (id) from tbl_test; --查看个数据条数
|
以上为个人经验,希望能给大家一个参考,也希望大家多多支持服务器之家。如有错误或未考虑完全的地方,望不吝赐教。
原文链接:https://blog.csdn.net/kmblack1/article/details/69666667