环境
虚拟机:VMware 10
Linux版本:CentOS-6.5-x86_64
客户端:Xshell4
FTP:Xftp4
jdk8
hadoop-3.1.1
apache-hive-3.1.1
一、Hive运行方式
1、命令行方式cli:控制台模式
--与hdfs交互
hive> dfs -ls /; Found 3 items drwxr-xr-x - root supergroup 0 2019-01-25 16:44 /root drwxrwx--- - root supergroup 0 2019-01-25 16:18 /tmp drwxr-xr-x - root supergroup 0 2019-02-01 09:46 /usr hive> dfs -cat /root/hive_remote/warehouse/person/*; 1,小明1,18,lol-book-movie,beijing:shangxuetang-shanghai:pudong 2,小明2,20,lol-book-movie,beijing:shangxuetang-shanghai:pudong 3,小明3,21,lol-book-movie,beijing:shangxuetang-shanghai:pudong 4,小明4,21,lol-book-movie,beijing:shangxuetang-shanghai:pudong 5,小明5,21,lol-book-movie,beijing:shangxuetang-shanghai:pudong 6,小明6,21,lol-book-movie,beijing:shangxuetang-shanghai:pudong hive>
--与Linux交互 :!开头
hive> !pwd;
/root
2、脚本运行方式(实际生产环境中用最多)
#直接按照入参执行 输出结果到linux控制台 [root@PCS102 ~]# hive -e "select * from psn2" which: no hbase in (/usr/local/jdk1.8.0_65/bin:/home/cluster/subversion-1.10.3/bin:/home/cluster/apache-storm-0.9.2/bin:/usr/local/hadoop-3.1.1/bin:/usr/local/hadoop-3.1.1/sbin:/usr/local/apache-hive-3.1.1-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin) SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.1-bin/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Hive Session ID = b243b1f6-0b67-416f-8b9a-3da0304cb88b Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.1-bin/lib/hive-common-3.1.1.jar!/hive-log4j2.properties Async: true Hive Session ID = 0a2ced87-5509-44bb-927e-17ab4d993b91 OK psn2.id psn2.name psn2.likes psn2.address psn2.age 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 Time taken: 2.416 seconds, Fetched: 12 row(s) #直接按照入参执行 输出结果重定向到文件 [root@PCS102 ~]# hive -e "select * from psn2" > aaa which: no hbase in (/usr/local/jdk1.8.0_65/bin:/home/cluster/subversion-1.10.3/bin:/home/cluster/apache-storm-0.9.2/bin:/usr/local/hadoop-3.1.1/bin:/usr/local/hadoop-3.1.1/sbin:/usr/local/apache-hive-3.1.1-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin) SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.1-bin/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Hive Session ID = 1ee55846-3df2-4fc0-8ce8-501d2202a617 Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.1-bin/lib/hive-common-3.1.1.jar!/hive-log4j2.properties Async: true Hive Session ID = 7549c4cf-d416-406b-82f7-f5012c3f1173 OK Time taken: 2.59 seconds, Fetched: 12 row(s) [root@PCS102 ~]# cat aaa psn2.id psn2.name psn2.likes psn2.address psn2.age 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 #直接按照入参执行 输出结果重定向到文件 -S静默执行 [root@PCS102 ~]# hive -S -e "select * from psn2" > bbb which: no hbase in (/usr/local/jdk1.8.0_65/bin:/home/cluster/subversion-1.10.3/bin:/home/cluster/apache-storm-0.9.2/bin:/usr/local/hadoop-3.1.1/bin:/usr/local/hadoop-3.1.1/sbin:/usr/local/apache-hive-3.1.1-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin) SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.1-bin/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Hive Session ID = 991dd630-b1ae-448d-a43c-5870fb7508cc Hive Session ID = ed0b4ba8-c8ec-4c9b-acba-4815e3e5762a [root@PCS102 ~]# cat bbb psn2.id psn2.name psn2.likes psn2.address psn2.age 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 #直接按照入参执行 输出结果重定向到文件 如果sql有问题 会报错 报错信息输出到linux控制台 [root@PCS102 ~]# hive -e "select * from psn55" > ccc which: no hbase in (/usr/local/jdk1.8.0_65/bin:/home/cluster/subversion-1.10.3/bin:/home/cluster/apache-storm-0.9.2/bin:/usr/local/hadoop-3.1.1/bin:/usr/local/hadoop-3.1.1/sbin:/usr/local/apache-hive-3.1.1-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin) SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.1-bin/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Hive Session ID = 83a20df1-6f19-414a-a247-cf7dbc6ee58c Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.1-bin/lib/hive-common-3.1.1.jar!/hive-log4j2.properties Async: true Hive Session ID = 8d5bfc04-7e76-46b5-b2a2-13e8ccfc890a FAILED: SemanticException [Error 10001]: Line 1:14 Table not found 'psn55' [root@PCS102 ~]# cat ccc #-f 执行文件中的sql 结果输出到linux控制台 [root@PCS102 ~]# hive -f test which: no hbase in (/usr/local/jdk1.8.0_65/bin:/home/cluster/subversion-1.10.3/bin:/home/cluster/apache-storm-0.9.2/bin:/usr/local/hadoop-3.1.1/bin:/usr/local/hadoop-3.1.1/sbin:/usr/local/apache-hive-3.1.1-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin) SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.1-bin/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Hive Session ID = b5831035-da17-4260-95aa-10c68f729327 Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.1-bin/lib/hive-common-3.1.1.jar!/hive-log4j2.properties Async: true Hive Session ID = 19738ea4-0c4b-473f-8f05-171a16f8ec04 OK psn2.id psn2.name psn2.likes psn2.address psn2.age 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 Time taken: 2.569 seconds, Fetched: 12 row(s) #-i 执行文件中的sql 会进入hive CLI [root@PCS102 ~]# hive -i test which: no hbase in (/usr/local/jdk1.8.0_65/bin:/home/cluster/subversion-1.10.3/bin:/home/cluster/apache-storm-0.9.2/bin:/usr/local/hadoop-3.1.1/bin:/usr/local/hadoop-3.1.1/sbin:/usr/local/apache-hive-3.1.1-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin) SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.1-bin/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Hive Session ID = d720d685-5547-4469-a07d-f47d4d078bd7 Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.1-bin/lib/hive-common-3.1.1.jar!/hive-log4j2.properties Async: true Hive Session ID = eb4cbb84-8174-4432-8be6-dd38bac70f2d 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. #在Hive CLI里执行外面的包含sql的文件 hive> source test; OK 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 10 1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 5 小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"} 20 Time taken: 0.151 seconds, Fetched: 12 row(s) hive>
3、JDBC方式:hiveserver2
4、web GUI接口 :hwi操作麻烦、基本不用、hue要好一下
(1)下载源码包apache-hive-*-src.tar.gz (注意:新版本里没有hwi,这里举例apache-hive-1.2.1-src.tar.gz)
(2)将hwi/web/*里面所有的文件打成war包
cd /usr/local/apache-hive-1.2.1-src/hwi/web && jar -cvf hive-hwi.war ./*
(3)将hwi war包放在$HIVE_HOME/lib/
cp /usr/local/apache-hive-1.2.1-src/hwi/web/hive-hwi.war /usr/local/apache-hive-3.1.1-bin/lib/
(4)复制tools.jar(在jdk的lib目录下)到$HIVE_HOME/lib下
cp /usr/local/jdk1.8.0_65/lib/tools.jar /usr/local/apache-hive-3.1.1-bin/lib
(5)修改hive-site.xml
<property>
<name>hive.hwi.listen.host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hive.hwi.listen.port</name>
<value>9999</value>
</property>
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi.war</value>
</property>
(6)启动hwi服务(端口号9999)
hive --service hwi
(7)浏览器通过以下链接来访问
http://PCS102:9999/hwi/
二、Hive 权限管理
1、三种授权模型:
(1)Storage Based Authorization in the Metastore Server
基于存储的授权 - 可以对Metastore中的元数据进行保护,但是没有提供更加细粒度的访问控制(例如:列级别、行级别)。
(2)SQL Standards Based Authorization in HiveServer2
基于SQL标准的Hive授权 - 完全兼容SQL的授权模型,推荐使用该模式。
(3)Default Hive Authorization (Legacy Mode)
hive默认授权 - 设计目的仅仅只是为了防止用户产生误操作,而不是防止恶意用户访问未经授权的数据。
重点看一下第(2)种授权:Hive - SQL Standards Based Authorization in HiveServer2
--完全兼容SQL的授权模型
--除支持对于用户的授权认证,还支持角色role的授权认证
·role可理解为是一组权限的集合,通过role为用户授权
·一个用户可以具有一个或多个角色
·默认包含两种角色:public、admin
2、限制
(1)启用当前认证方式之后,dfs, add, delete, compile, and reset等命令被禁用。
(2)通过set命令设置hive configuration的方式被限制某些用户使用。
(可通过修改配置文件hive-site.xml中hive.security.authorization.sqlstd.confwhitelist进行配置)
(3)添加、删除函数以及宏的操作,仅为具有admin的用户开放。
(4)用户自定义函数(开放支持永久的自定义函数),可通过具有admin角色的用户创建,其他用户都可以使用。
(5)Transform功能被禁用。
3、配置
在hive服务端修改配置文件hive-site.xml添加以下配置内容:
<property> <name>hive.security.authorization.enabled</name> <value>true</value> </property> <property> <name>hive.server2.enable.doAs</name> <value>false</value> </property> <property> <name>hive.users.in.admin.role</name> <value>root</value> </property> <property> <name>hive.security.authorization.manager</name> <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory</value> </property> <property> <name>hive.security.authenticator.manager</name> <value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value> </property>
服务端启动hiveserver2;客户端通过beeline进行连接
4、角色的添加、删除、查看、设置
CREATE ROLE role_name; -- 创建角色 DROP ROLE role_name; -- 删除角色 SET ROLE (role_name|ALL|NONE); -- 设置角色 SHOW CURRENT ROLES; -- 查看当前具有的角色 SHOW ROLES; -- 查看所有存在的角色
角色的授予、移除、查看
#将角色授予某个用户、角色 GRANT role_name [, role_name] ... TO principal_specification [, principal_specification] ... [ WITH ADMIN OPTION ]; principal_specification : USER user | ROLE role #移除某个用户、角色的角色 REVOKE [ADMIN OPTION FOR] role_name [, role_name] ... FROM principal_specification [, principal_specification] ... ; principal_specification : USER user | ROLE role #查看授予某个用户、角色的角色列表 SHOW ROLE GRANT (USER|ROLE) principal_name; #查看属于某种角色的用户、角色列表 SHOW PRINCIPALS role_name;
5、Hive权限管理
权限:
SELECT privilege – gives read access to an object. INSERT privilege – gives ability to add data to an object (table). UPDATE privilege – gives ability to run update queries on an object (table). DELETE privilege – gives ability to delete data in an object (table). ALL PRIVILEGES – gives all privileges (gets translated into all the above privileges).
权限的授予、移除、查看:
#将权限授予某个用户、角色: GRANT priv_type [, priv_type ] ... ON table_or_view_name TO principal_specification [, principal_specification] ... [WITH GRANT OPTION]; #移除某个用户、角色的权限: REVOKE [GRANT OPTION FOR] priv_type [, priv_type ] ... ON table_or_view_name FROM principal_specification [, principal_specification] ... ; principal_specification : USER user | ROLE role priv_type : INSERT | SELECT | UPDATE | DELETE | ALL #查看某个用户、角色的权限: SHOW GRANT [principal_name] ON (ALL| ([TABLE] table_or_view_name)