1.创建分区表
CREATE TABLE tbl1 (
ID VARCHAR ( 30 ) NOT NULL,
ROWKEY VARCHAR ( 100 ) NOT NULL,
.......
PASS_TIME DATETIME NOT NULL
) DUPLICATE KEY ( ID ) partition BY RANGE ( PASS_TIME ) ( ) DISTRIBUTED BY HASH ( ID ) BUCKETS 5 PROPERTIES (
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.start" = "-2147483648",
"dynamic_partition.end" = "3",
"dynamic_partition.prefix" = "P",
"dynamic_partition.buckets" = "10",
"replication_num" = "2"
);
2.查看分区情况以及每个分区对应的时间区间
show partitions from tbl1;
2.创建历史分区
动态分区,是说会随着时间推移自动创建后续的分区,例如今天是21号,你设置动态分区是自动创建后面三天的,那么会把22、23、24三天的分区创建。并不是数据过来后,自动创建数据对应的分区。
创建历史分区,可以先关掉动态分区属性,然后修改表的分区
参考:第2.4章:StarRocks表设计--分区分桶与副本数_流木的博客-****博客
ALTER TABLE tbl1 SET("dynamic_partition.enable"="false");
批量添加分区
PARTITION BY RANGE (PASS_TIME) (
START ("2021-10-01") END ("2021-10-03") EVERY (INTERVAL 1 day)
);
增加一个指定上下界的分区p20210312:
ALTER TABLE tbl1 ADD PARTITION p20210312 VALUES [("2021-03-12"), ("2021-03-13"));
左闭右开
PS:不创建历史分区导入数据报错:
Reason: there are 4096 rows couldn't find a partition.
3.DataX导入数据的文件
{
"job": {
"setting": {
"speed": {
"channel": 5
}
},
"content": [
{
"reader": {
"name": "oraclereader",
"parameter": {
"username": "${r_username}",
"password": "${r_password}",
"column": [
"ID",
"ROWKEY",
......
"PASS_TIME"
],
"connection": [
{
"table": [
"tbl1"
],
"jdbcUrl": [
"jdbc:oracle:thin:@//${r_ip}:${r_port}/${r_dbname}"
]
}
]
}
},
"writer": {
"name": "starrockswriter",
"parameter": {
"username": "${w_username}",
"password": "",
"database": "${w_dbname}",
"table": "tbl1",
"column": [
"ID",
"ROWKEY",
......
"PASS_TIME"
],
"preSql": [
"truncate table ${w_dbname}.tbl1"
],
"postSql": [ ],
"jdbcUrl": "jdbc:mysql://${w_ip}:${w_port}/",
"loadUrl": [
"beip:8030",
"beip:8030"
],
"loadProps": {
"column_separator": "\\x01",
"row_delimiter": "\\x02"
}
}
}
}
]
}
}
PS:需要指定一下分隔符
报错
Reason: column count mismatch,expect=58 real=6. src line:
Reason: column count mismatch,expect=58 real=53. src line: