Sqoop与Shell脚本数据迁移实战
#!/bin/bash
DB_DATABASE=''
DB_TABLE=''
DB_HOST=''
DB_PORT=''
DB_CONNECTOR="jdbc:sqlserver://$BS_DB_HOST:$BS_DB_PORT;database=$BS_DB_DATABASE"
DB_USER=''
DB_PASSWORD=''
HIVE_DATABASE='ods_'
HIVE_TABLE="ods_${BS_DB_TABLE}_df"
IMPALA_CMD='impala-shell'
##########################################################################################
FIELDS=''
##########################################################################################
# 创建Impala表
create_impala_table() {
echo "Executing DDL for table $1.$2:"
# 构建Impala建表语句
create_table_sql="
CREATE TABLE IF NOT EXISTS $1.$2 (
${FIELDS//,/ STRING,} STRING
)
WITH SERDEPROPERTIES ('serialization.format'='1')
STORED AS PARQUET
TBLPROPERTIES ('DO_NOT_UPDATE_STATS'='true', 'parquet.compression'='snappy');
"
echo "$create_table_sql"
# 创建表
$IMPALA_CMD -q "$create_table_sql"
}
##########################################################################################
# 函数:清空表数据
truncate_table() {
$IMPALA_CMD -q "TRUNCATE TABLE $1.$2"
}
##########################################################################################
# 刷新hive元数据
hive_table_refresh() {
$IMPALA_CMD -q "REFRESH $1.$2"
}
##########################################################################################
# 获取hive表字段
get_hive_table_fields() {
$IMPALA_CMD --quiet -q "DESCRIBE $1.$2;" | awk 'NR>3{print $2}' | paste -sd, | sed '$ s/,$//'
}
##########################################################################################
# 把sqlserver数据导入hive
db_import_to_hive() {
local db_connector=$1
local db_user=$2
local db_pw=$3
local table_name=$4
local hive_dbname=$5
local hive_tbname=$6
sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect "$db_connector" \
--username "$db_user" \
--password "$db_pw" \
--query "select $FIELDS from ${table_name} where \$CONDITIONS" \
--hcatalog-database "$hive_dbname" \
--hcatalog-table "$hive_tbname" \
--fields-terminated-by '\0001' \
--lines-terminated-by '\n' \
--hive-drop-import-delims \
--null-string '\\N' \
--null-non-string '\\N' \
-m 1
}
##########################################################################################
# 如果表不存在则创建表
create_impala_table $HIVE_DATABASE $HIVE_TABLE
# 比较Impala表的字段与脚本字段是否一样
# 开启不区分大小写
#hive_fields=$(get_hive_table_fields $HIVE_DATABASE $HIVE_TABLE)
#shopt -s nocasematch
#if [ "$FIELDS" == "$hive_fields" ]; then
# echo "字符串内容一致(忽略大小写)"
#else
# echo "字符串内容不一致"
#fi
# 关闭不区分大小写
#shopt -u nocasematch
# 清空表数据
truncate_table $HIVE_DATABASE $HIVE_TABLE
# 导入数据到hive
db_import_to_hive $DB_CONNECTOR $DB_USER $DB_PASSWORD $DB_TABLE $HIVE_DATABASE $HIVE_TABLE
# 刷新hive元数据
hive_table_refresh $HIVE_DATABASE $HIVE_TABLE
exit 0