Sqoop与Shell脚本数据迁移实战

时间:2024-06-03 11:28:06
#!/bin/bash DB_DATABASE='' DB_TABLE='' DB_HOST='' DB_PORT='' DB_CONNECTOR="jdbc:sqlserver://$BS_DB_HOST:$BS_DB_PORT;database=$BS_DB_DATABASE" DB_USER='' DB_PASSWORD='' HIVE_DATABASE='ods_' HIVE_TABLE="ods_${BS_DB_TABLE}_df" IMPALA_CMD='impala-shell' ########################################################################################## FIELDS='' ########################################################################################## # 创建Impala表 create_impala_table() { echo "Executing DDL for table $1.$2:" # 构建Impala建表语句 create_table_sql=" CREATE TABLE IF NOT EXISTS $1.$2 ( ${FIELDS//,/ STRING,} STRING ) WITH SERDEPROPERTIES ('serialization.format'='1') STORED AS PARQUET TBLPROPERTIES ('DO_NOT_UPDATE_STATS'='true', 'parquet.compression'='snappy'); " echo "$create_table_sql" # 创建表 $IMPALA_CMD -q "$create_table_sql" } ########################################################################################## # 函数:清空表数据 truncate_table() { $IMPALA_CMD -q "TRUNCATE TABLE $1.$2" } ########################################################################################## # 刷新hive元数据 hive_table_refresh() { $IMPALA_CMD -q "REFRESH $1.$2" } ########################################################################################## # 获取hive表字段 get_hive_table_fields() { $IMPALA_CMD --quiet -q "DESCRIBE $1.$2;" | awk 'NR>3{print $2}' | paste -sd, | sed '$ s/,$//' } ########################################################################################## # 把sqlserver数据导入hive db_import_to_hive() { local db_connector=$1 local db_user=$2 local db_pw=$3 local table_name=$4 local hive_dbname=$5 local hive_tbname=$6 sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \ --connect "$db_connector" \ --username "$db_user" \ --password "$db_pw" \ --query "select $FIELDS from ${table_name} where \$CONDITIONS" \ --hcatalog-database "$hive_dbname" \ --hcatalog-table "$hive_tbname" \ --fields-terminated-by '\0001' \ --lines-terminated-by '\n' \ --hive-drop-import-delims \ --null-string '\\N' \ --null-non-string '\\N' \ -m 1 } ########################################################################################## # 如果表不存在则创建表 create_impala_table $HIVE_DATABASE $HIVE_TABLE # 比较Impala表的字段与脚本字段是否一样 # 开启不区分大小写 #hive_fields=$(get_hive_table_fields $HIVE_DATABASE $HIVE_TABLE) #shopt -s nocasematch #if [ "$FIELDS" == "$hive_fields" ]; then # echo "字符串内容一致(忽略大小写)" #else # echo "字符串内容不一致" #fi # 关闭不区分大小写 #shopt -u nocasematch # 清空表数据 truncate_table $HIVE_DATABASE $HIVE_TABLE # 导入数据到hive db_import_to_hive $DB_CONNECTOR $DB_USER $DB_PASSWORD $DB_TABLE $HIVE_DATABASE $HIVE_TABLE # 刷新hive元数据 hive_table_refresh $HIVE_DATABASE $HIVE_TABLE exit 0