#!/bin/bash source /etc/profile runlog='/tmp/zewei/check_schema_log'
hive_database_schema=/tmp/hive_database_schema/hive/
mysql_database_schema=/tmp/hive_database_schema/mysql/ > $runlog
#每天下午一点删除现有的表结构缓存文件.
#防止hive有变动.
if [ `date +%k` -eq ]
then
rm -rf /tmp/hive_database_schema/hive/*
fi while read DB; do
HiveDB=`echo $DB |awk '{print $1}'`
MysqlDB=`echo $DB |awk '{print $2}'`
MysqlHost=`echo $DB |awk '{print $3}'`
MysqlPort=`echo $DB |awk '{print $4}'`
PartTag=`echo $DB |awk '{print $5}'` connect_mysql="mysql -u TableSchemaCheck -pSchemacheck666 -h $MysqlHost -P $MysqlPort $MysqlDB -BNe"
#缓存文件夹不在就创建
ls $mysql_database_schema/$MysqlDB > /dev/null || mkdir -p $mysql_database_schema/$MysqlDB
ls $hive_database_schema/$HiveDB > /dev/null || mkdir -p $hive_database_schema/$HiveDB #通过part标记检查是否为分区库
if [ "$PartTag" == 'part' ]
then
table_list=`hive -S -e "use $HiveDB; show tables;" | grep "_part$"`
else
table_list=`hive -S -e "use $HiveDB; show tables;"`
fi #对获取到的tables进行循环检查
for table in $table_list
do
if [ "$PartTag" == 'part' ]
then
hive_table_name=$table
mysql_table_name=`echo $table | sed 's/_part$//'`
else
hive_table_name=$table
mysql_table_name=$table
fi #获取mysql的表结构
$connect_mysql "desc $mysql_table_name;" > /tmp/mysql_column
if [ $? -ne 0 ]
then
continue
else
#把大写转换为小写.因为hive里面的列名没有大小写之分,所以转换一下
cat /tmp/mysql_column | awk '{print $1}' | tr '[A-Z]' '[a-z]' > $mysql_database_schema/$MysqlDB/$mysql_table_name
fi #如果没有hive的表结构缓存文件就去hive取...然后对分区字段进行删除,因为这对于MySQL的表结构来说是多余的
if ! ls $hive_database_schema/$HiveDB/$table > /dev/null
then
hive -S -e "use $HiveDB; desc ${hive_table_name};" | awk '{print $1 }' > /tmp/hive_column
part_column=`sed -n '/\#/,$p' /tmp/hive_column | egrep -v '#|^$'`
for i in $part_column
do
sed -i "/\b${i}\b/d" /tmp/hive_column
done
egrep -v "#|^$" /tmp/hive_column | tee $hive_database_schema/$HiveDB/$table
fi #获取MD5
md5ForMysql=`md5sum $mysql_database_schema/$MysqlDB/$mysql_table_name | awk '{print $1}'`
md5ForHive=`md5sum $hive_database_schema/$HiveDB/$hive_table_name | awk '{print $1}'` #MD5不同就记录日志
if [ ! "$md5ForMysql"x == "$md5ForHive"x ]; then
echo -e "HiveDB:\t$HiveDB\t\ttable:\t$hive_table_name\t\tnot equal to MysqlDB table:\t$mysql_table_name" >> $runlog
fi
done
done < /root/script/DBlist #统计并报警.报警阈值在监控里面设置
err_line=`wc -l $runlog | awk '{print $1}'`
[报警]