1.在impala里建立好文本表:
create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/customer';
create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/lineitem';
create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/nation';
create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/orders';
create external table part (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/part';
create external table partsupp (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION'/user/training/tpch_10g/partsupp';
create external table region (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_100g/region';
create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/supplier';
3.建立kudu-impala表
create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'customer_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'C_CUSTKEY'
);
insert into customer_kudu select * from customer; create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'nation_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'N_NATIONKEY'
);
insert into nation_kudu select * from nation; create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'part_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'P_PARTKEY'
);
insert into part_kudu select * from part; create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'supplier_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'S_SUPPKEY'
);
insert into supplier_kudu select * from supplier; create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'partsupp_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
);
insert into partsupp_kudu select * from partsupp; create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'region_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'R_REGIONKEY'
);
insert into region_kudu select * from region; create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'orders_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'O_ORDERKEY'
);
insert into orders_kudu select * from orders; create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'lineitem_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
);
insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;
下面是日志信息:
[node2:] > use tpch_10g;
Query: use tpch_10g
[node2:] > show tables;
Query: show tables
+----------+
| name |
+----------+
| customer |
| lineitem |
| nation |
| orders |
| part |
| partsupp |
| region |
| supplier |
+----------+
Fetched row(s) in .01s
[node2:] > create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
> TBLPROPERTIES(
> 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
> 'kudu.table_name' = 'customer_kudu',
> 'kudu.master_addresses' = 'node1:7051',
> 'kudu.key_columns' = 'C_CUSTKEY'
> );
Query: create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'customer_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'C_CUSTKEY'
) Fetched row(s) in .68s
[node2:] > insert into customer_kudu select * from customer;
Query: insert into customer_kudu select * from customer
Inserted row(s) in .02s
[node2:] > show tables;
Query: show tables
+---------------+
| name |
+---------------+
| customer |
| customer_kudu |
| lineitem |
| nation |
| orders |
| part |
| partsupp |
| region |
| supplier |
+---------------+
Fetched row(s) in .01s
[node2:] > create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
> TBLPROPERTIES(
> 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
> 'kudu.table_name' = 'nation_kudu',
> 'kudu.master_addresses' = 'node1:7051',
> 'kudu.key_columns' = 'N_NATIONKEY'
> );
Query: create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'nation_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'N_NATIONKEY'
) Fetched row(s) in .72s
[node2:] > insert into nation_kudu select * from nation;
Query: insert into nation_kudu select * from nation
Inserted row(s) in .26s
[node2:] > create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
> TBLPROPERTIES(
> 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
> 'kudu.table_name' = 'part_kudu',
> 'kudu.master_addresses' = 'node1:7051',
> 'kudu.key_columns' = 'P_PARTKEY'
> );
Query: create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'part_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'P_PARTKEY'
) Fetched row(s) in .68s
[node2:] > insert into part_kudu select * from part;
Query: insert into part_kudu select * from part
Inserted row(s) in .71s
[node2:] > create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
> TBLPROPERTIES(
> 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
> 'kudu.table_name' = 'supplier_kudu',
> 'kudu.master_addresses' = 'node1:7051',
> 'kudu.key_columns' = 'S_SUPPKEY'
> );
Query: create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'supplier_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'S_SUPPKEY'
) Fetched row(s) in .73s
[node2:] > insert into supplier_kudu select * from supplier_kudu;
Query: insert into supplier_kudu select * from supplier_kudu
Inserted row(s) in .67s
[node2:] > create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING)
> TBLPROPERTIES(
> 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
> 'kudu.table_name' = 'partsupp_kudu',
> 'kudu.master_addresses' = 'node1:7051',
> 'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
> );
Query: create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'partsupp_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
) Fetched row(s) in .72s
[node2:] > insert into partsupp_kudu select * from partsupp_kudu;
Query: insert into partsupp_kudu select * from partsupp_kudu
Inserted row(s) in .07s
[node2:] > insert into supplier_kudu select * from supplier;
Query: insert into supplier_kudu select * from supplier
Inserted row(s) in .78s
[node2:] > insert into partsupp_kudu select * from partsupp;
Query: insert into partsupp_kudu select * from partsupp
Inserted row(s) in .44s
[node2:] > create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING)
> TBLPROPERTIES(
> 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
> 'kudu.table_name' = 'region_kudu',
> 'kudu.master_addresses' = 'node1:7051',
> 'kudu.key_columns' = 'R_REGIONKEY'
> );
Query: create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'region_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'R_REGIONKEY'
) Fetched row(s) in .72s
[node2:] > insert into region_kudu select * from region;
Query: insert into region_kudu select * from region
Inserted row(s) in .05s [node2:] > create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
> TBLPROPERTIES(
> 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
> 'kudu.table_name' = 'lineitem_kudu',
> 'kudu.master_addresses' = 'node1:7051',
> 'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
> );
Query: create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'lineitem_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
) Fetched row(s) in .72s
[node2:] > insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;
Query: insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem
Inserted row(s) in .42s [node2:] > create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
> TBLPROPERTIES(
> 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
> 'kudu.table_name' = 'orders_kudu',
> 'kudu.master_addresses' = 'node1:7051',
> 'kudu.key_columns' = 'O_ORDERKEY'
> );
Query: create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'orders_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'O_ORDERKEY'
) Fetched row(s) in .86s
[node2:] > insert into orders_kudu select * from orders;
Query: insert into orders_kudu select * from orders
Inserted row(s) in .11s
[node2:] >
3.计算表的统计信息:
compute stats customer;
compute stats lineitem;
compute stats nation;
compute stats orders;
compute stats part;
compute stats partsupp;
compute stats region;
compute stats supplier;
compute stats customer_kudu;
ompute stats lineitem_kudu;
compute stats nation_kudu;
compute stats orders_kudu;
compute stats part_kudu;
compute stats partsupp_kudu;
compute stats region_kudu;
compute stats supplier_kudu;