Greenplum中通过外部表进行数据导入导出

时间:2022-09-23 08:17:38
外部表创建服务器后台
gpfdist -p 8081 -d /var/data/staging -l /home/gpadmin/log &

创建以|为分隔符的外部表
CREATE EXTERNAL TABLE ext_expenses ( name text,
date date, amount float4, category text, desc1 text )
LOCATION ('gpfdist://etlhost-1:8081/*',
'gpfdist://etlhost-2:8081/*')
FORMAT 'TEXT' (DELIMITER '|');

创建分布式的外部表
CREATE EXTERNAL TABLE ext_expenses ( name text,
date date, amount float4, category text, desc1 text )
LOCATION ('gpfdist://etlhost-1:8081/*.txt',
'gpfdist://etlhost-2:8082/*.txt')
FORMAT 'TEXT' ( DELIMITER '|' NULL ' ') ;

创建ssl的外部表
CREATE EXTERNAL TABLE ext_expenses ( name text,
date date, amount float4, category text, desc1 text )
LOCATION ('gpfdists://etlhost-1:8081/*.txt',
'gpfdists://etlhost-2:8082/*.txt'

创建日志记录的外部表
CREATE EXTERNAL TABLE ext_expenses ( name text,
date date, amount float4, category text, desc1 text )
LOCATION ('gpfdist://etlhost-1:8081/*.txt',
'gpfdist://etlhost-2:8082/*.txt')
FORMAT 'TEXT' ( DELIMITER '|' NULL ' ')
LOG ERRORS INTO err_customer SEGMENT REJECT LIMIT 5;

创建csv格式的外部表
CREATE EXTERNAL TABLE ext_expenses ( name text,
date date, amount float4, category text, desc1 text )
LOCATION ('gpfdist://etlhost-1:8081/*.txt',
'gpfdist://etlhost-2:8082/*.txt')
FORMAT 'CSV' ( DELIMITER ',' )
LOG ERRORS INTO err_customer SEGMENT REJECT LIMIT 5;

创建hadoop分布式外部表
CREATE EXTERNAL TABLE ext_expenses ( name text,
date date, amount float4, category text, desc1 text )
LOCATION ('gphdfs://hdfshost-1:8081/data/filename.txt')
FORMAT 'TEXT' (DELIMITER '|');

自定义的hadoop格式外部表
CREATE EXTERNAL TABLE ext_expenses
LOCATION ('gphdfs://hdfshost-1:8081/data/custdat*.dat')
FORMAT 'custom' (formatter='gphdfs_import');

带压缩的写hadoop外部表
CREATE WRITABLE EXTERNAL TABLE ext_expenses
LOCATION ('gphdfs://hdfshost-1:8081/data/?compress=true&compression_type=BLOCK&codec=org.apache.hadoop.io.compress.DefaultCodec')
FORMAT 'custom' (formatter='gphdfs_export');

使用默认的压缩格式hadoop的外部表
CREATE WRITABLE EXTERNAL TABLE ext_expenses
LOCATION ('gphdfs://hdfshost-1:8081/data?compress=true')
FORMAT 'custom' (formatter='gphdfs_export');

CREATE EXTERNAL TABLE ext_expenses ( name text,
date date, amount float4, category text, desc1 text )
LOCATION ('file://filehost:5432/data/international/*',
'file://filehost:5432/data/regional/*'
'file://filehost:5432/data/supplement/*.csv')
FORMAT 'CSV' (HEADER);

创建带表达式的外部表
CREATE EXTERNAL WEB TABLE log_output (linenum int, message text) EXECUTE '/var/load_scripts/get_log_data.sh' ON HOST
FORMAT 'TEXT' (DELIMITER '|');

创建可写的外部表
CREATE WRITABLE EXTERNAL TABLE sales_out (LIKE sales)
LOCATION ('gpfdist://etl1:8081/sales.out')
FORMAT 'TEXT' ( DELIMITER '|' NULL ' ')
DISTRIBUTED BY (txn_id);

创建带表达式的外部表
CREATE WRITABLE EXTERNAL WEB TABLE campaign_out
(LIKE campaign)
EXECUTE '/var/unload_scripts/to_adreport_etl.sh'
FORMAT 'TEXT' (DELIMITER '|');

插入外部表数据
INSERT INTO campaign_out SELECT * FROM campaign WHERE customer_id=123;

创建读取网页的外部表
CREATE READABLE EXTERNAL TABLE prices_readable (LIKE prices)
LOCATION ('gpfdist://127.0.0.1:8080/data/ \
prices.xml#transform=prices_input')
FORMAT 'text' (delimiter '|')
LOG ERRORS INTO prices_errortable SEGMENT REJECT LIMIT 10;

创建可写的网页外部表
CREATE WRITABLE EXTERNAL TABLE prices_readable (LIKE prices)
LOCATION ('gpfdist://127.0.0.1:8080/data/ \
prices.xml#transform=prices_input')
FORMAT 'text' (delimiter '|');

限制长度的外部表
CREATE READABLE EXTERNAL TABLE students (
name varchar(20), address varchar(30), age int)
LOCATION ('file://<host>/file/path/')
FORMAT 'CUSTOM' (formatter=fixedwidth_in,
name='20', address='30', age='4');

创建无限定符的外部表
CREATE READABLE EXTERNAL TABLE students (
name varchar(20), address varchar(30), age int)
LOCATION ('file://<host>/file/path/')
FORMAT 'CUSTOM' (formatter=fixedwidth_in,
name=20, address=30, age=4, line_delim='')

将数据拷贝出去
COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/home/gpadmin/a_list_countries.out';