postgresql 分区表创建及测试

时间:2024-04-07 11:34:42

1      建立分区

1.1.  创建主表

CREATE TABLE measurement (

city_id         int not null,

logdate        date not null,

peaktemp        int,

unitsales       int

);

CREATE TABLE

这里date类型精确到天,如'2006-02-01',如果时间精确到秒,如'2013-01-09 00:00:00' 使用timestamp 类型

1.2.  创建分区表

CREATE TABLEmeasurement_y2006m01 (

CHECK (logdate >= DATE '2006-01-01' AND logdate < DATE '2006-02-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m02 (

CHECK (logdate >= DATE '2006-02-01' AND logdate < DATE '2006-03-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m03 (

CHECK (logdate >= DATE '2006-03-01' AND logdate < DATE '2006-04-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m04 (

CHECK (logdate >= DATE '2006-04-01' AND logdate < DATE '2006-05-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m05 (

CHECK (logdate >= DATE '2006-05-01' AND logdate < DATE '2006-06-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m06 (

CHECK (logdate >= DATE '2006-06-01' AND logdate < DATE '2006-07-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m07 (

CHECK (logdate >= DATE '2006-07-01' AND logdate < DATE '2006-08-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m08 (

CHECK (logdate >= DATE '2006-08-01' AND logdate < DATE '2006-09-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m09 (

CHECK (logdate >= DATE '2006-09-01' AND logdate < DATE '2006-10-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m10 (

CHECK (logdate >= DATE '2006-10-01' AND logdate < DATE '2006-11-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m11 (

CHECK (logdate >= DATE '2006-11-01' AND logdate < DATE '2006-12-01' )

) INHERITS (measurement);

CREATE TABLE measurement_y2006m12 (

CHECK (logdate >= DATE '2006-12-01' AND logdate < DATE '2007-01-01' )

) INHERITS (measurement);

CREATE TABLE

1.3.  分区键上建索引

CREATE INDEX measurement_y2006m02_logdateON measurement_y2006m02 (logdate);

CREATE INDEX measurement_y2006m03_logdateON measurement_y2006m03 (logdate);

CREATE INDEX measurement_y2006m04_logdateON measurement_y2006m04 (logdate);

CREATE INDEX measurement_y2006m05_logdateON measurement_y2006m05 (logdate);

CREATE INDEX measurement_y2006m06_logdateON measurement_y2006m06 (logdate);

CREATE INDEX measurement_y2006m07_logdateON measurement_y2006m07 (logdate);

CREATE INDEX measurement_y2006m08_logdateON measurement_y2006m08 (logdate);

CREATE INDEX measurement_y2006m09_logdateON measurement_y2006m09 (logdate);

CREATE INDEX measurement_y2006m10_logdateON measurement_y2006m10 (logdate);

CREATE INDEX measurement_y2006m11_logdateON measurement_y2006m11 (logdate);

CREATE INDEX measurement_y2006m12_logdateON measurement_y2006m12 (logdate);

CREATE INDEX

执行insert into measurement命令向表中插入数据,希望数据按一定的规则插入相应分区,需要前期人工处理下,这里有两种策略,建立触发器和RULE规则。建立触发器方法需要1.4、1.5两步操作,1.6是建立RULE规则方法。

1.4.  创建触发器函数

CREATE OR REPLACEFUNCTION measurement_insert_trigger()

RETURNSTRIGGER AS $$

BEGIN

IF ( NEW.logdate >= DATE '2006-01-01'AND

NEW.logdate < DATE '2006-02-01' )THEN

INSERT INTO measurement_y2006m01 VALUES(NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-02-01' AND

NEW.logdate < DATE '2006-03-01' ) THEN

INSERT INTO measurement_y2006m02 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-03-01'AND

NEW.logdate < DATE '2006-04-01' ) THEN

INSERT INTO measurement_y2006m03 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-04-01'AND

NEW.logdate < DATE '2006-05-01' ) THEN

INSERT INTO measurement_y2006m04 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-05-01'AND

NEW.logdate < DATE '2006-06-01' ) THEN

INSERT INTO measurement_y2006m05 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-06-01'AND

NEW.logdate < DATE '2006-07-01' ) THEN

INSERT INTO measurement_y2006m06 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-07-01'AND

NEW.logdate < DATE '2006-08-01' ) THEN

INSERT INTO measurement_y2006m07 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-08-01'AND

NEW.logdate < DATE '2006-09-01' ) THEN

INSERT INTO measurement_y2006m08 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-09-01' AND

NEW.logdate < DATE '2006-10-01' ) THEN

INSERT INTO measurement_y2006m09 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-10-01' AND

NEW.logdate < DATE '2006-11-01' ) THEN

INSERT INTO measurement_y2006m10 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-11-01'AND

NEW.logdate < DATE '2006-12-01' ) THEN

INSERT INTO measurement_y2006m11 VALUES (NEW.*);

ELSIF ( NEW.logdate >= DATE '2006-12-01' AND

NEW.logdate < DATE '2007-01-01' ) THEN

INSERT INTO measurement_y2006m12 VALUES (NEW.*);

ELSE

RAISE EXCEPTION 'Date out of range. Fix the measurement_insert_trigger() function!';

END IF;

RETURN NULL;

END;

$$

LANGUAGE plpgsql;

CREATE FUNCTION

说明:如果不想丢失数据,上面的ELSE 条件可以改成 INSERTINTO measurement _error_ logdate VALUES (NEW.*); 同时需要创建一张结构和measurement 一样的表measurement_error_logdate,这样,错误的logdate 数据就可以插入到这张表中而不是报错了。

1.5.  创建触发器

CREATE TRIGGERinsert_measurement_trigger

BEFORE INSERT ON measurement

FOR EACH ROW EXECUTE PROCEDUREmeasurement_insert_trigger();

CREATE TRIGGER

1.6.  改用RULE规则

也可以不用触发器,改用RULE规则,为每个分表创建规则

CREATE RULE measurement_y2006m01 AS
ON INSERT TO measurement where(NEW.logdate >= DATE '2006-01-01' AND

NEW.logdate < DATE'2006-02-01')

DO INSTEAD

INSERT INTO measurement VALUES (NEW.*);

CREATE RULE measurement_y2006m02 AS
ON INSERT TO measurement where(NEW.logdate >= DATE '2006-02-01' AND

NEW.logdate < DATE'2006-03-01')

DO INSTEAD

INSERT INTO measurement VALUES (NEW.*);

……省略

2      测试

2.1.  查看所有表

mydb=# \d

List of relations

Schema |         Name         | Type | Owner

--------+----------------------+-------+-------

public | measurement          | table | super

public | measurement_y2006m01 | table | super

public | measurement_y2006m02 | table | super

public | measurement_y2006m03 | table | super

public | measurement_y2006m04 | table | super

public | measurement_y2006m05 | table | super

public | measurement_y2006m06 | table | super

public | measurement_y2006m07 | table | super

public | measurement_y2006m08 | table | super

public | measurement_y2006m09 | table | super

public | measurement_y2006m10 | table | super

public | measurement_y2006m11 | table | super

public | measurement_y2006m12 | table | super

(13rows)

2.2.  查看主表结构

mydb=#\d measurement

Table "public.measurement"

Column   |  Type  | Modifiers

-----------+---------+-----------

city_id    | integer  | not null

logdate   | date     |not null

peaktemp | integer   |

unitsales   | integer   |

Triggers:

insert_measurement_trigger BEFORE INSERT ON measurement FOR EACH ROWEXECUTE PROCEDURE measurement_insert_trigger().

Number of child tables: 12 (Use \d+ to listthem.)

2.3.  插入数据

mydb=# insert into measurement values(1,date '2006-02-10',1,1);

INSERT 0 0

mydb=# insert into measurement values(1,date '2006-03-10',1,1);

INSERT 0 0

mydb=# insert into measurement values(1,date '2006-04-10',1,1);

INSERT 0 0

2.4.  查看主表数据

mydb=# select * from measurement;

city_id | logdate   | peaktemp | unitsales

---------+------------+----------+-----------

1 | 2006-02-10 |        1 |         1

1 | 2006-03-10 |        1 |         1

1 | 2006-04-10 |        1 |         1

(3 rows)

2.5.  查看分表数据

mydb=# select * frommeasurement_y2006m03;

city_id | logdate   | peaktemp | unitsales

---------+------------+----------+-----------

1 | 2006-03-10 |        1 |         1

(1      row)

2.6.  分区排除和查询优化

默认情况下,是不会开启分区排除的,索引针对一个基于分区键条件的检索,要扫描所有的分区

mydb=# SET constraint_exclusion = off;

SET

mydb=# EXPLAIN SELECT count(*) FROMmeasurement WHERE logdate <= DATE '2006-02-01';

QUERYPLAN

-----------------------------------------------------------------------------------------------

Aggregate (cost=436.80..436.81 rows=1 width=0)

->  Append  (cost=0.00..417.62 rows=7670 width=0)

->  Seq Scan onmeasurement  (cost=0.00..32.12 rows=590width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m01 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m02 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m03 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m04 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m05 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan on measurement_y2006m06measurement  (cost=0.00..32.12 rows=590width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m07 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <= '2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m08 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m09 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m10 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m11 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan onmeasurement_y2006m12 measurement (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

(28 rows)

2.7.  开启约束排除

mydb=# SET constraint_exclusion = on;

SET

mydb=> EXPLAIN SELECT count(*) FROM measurement WHERE logdate<= DATE '2006-02-01';

QUERYPLAN

-----------------------------------------------------------------------------------------------

Aggregate  (cost=100.80..100.81 rows=1 width=0)

->  Append (cost=0.00..96.38 rows=1770 width=0)

->  Seq Scan on measurement  (cost=0.00..32.12 rows=590 width=0)

Filter: (logdate <='2006-02-01'::date)

->  Seq Scan on measurement_y2006m01measurement  (cost=0.00..32.12 rows=590width=0)

Filter:(logdate <= '2006-02-01'::date)

->  Seq Scan on measurement_y2006m02 measurement  (cost=0.00..32.12 rows=590 width=0)

Filter:(logdate <= '2006-02-01'::date)

(8 rows)

至此分区表建立完成