时间:2022-03-17 21:15:40

  索引是标准的数据库技术,hive 0.7版本之后支持索引。Hive提供有限的索引功能,这不像传统的关系型数据库那样有“键(key)”的概念,用户可以在某些列上创建索引来加速某些操作,给一个表创建的索引数据被保存在另外的表中。 Hive的索引功能现在还相对较晚,提供的选项还较少。但是,索引被设计为可使用内置的可插拔的java代码来定制,用户可以扩展这个功能来满足自己的需求。 当然不是说有的查询都会受惠于Hive索引。用户可以使用EXPLAIN语法来分析HiveQL语句是否可以使用索引来提升用户查询的性能。像RDBMS中的索引一样,需要评估索引创建的是否合理,毕竟,索引需要更多的磁盘空间,并且创建维护索引也会有一定的代价。 用户必须要权衡从索引得到的好处和代价。

1234 hive> create table user( id int, name string)      > ROW FORMAT DELIMITED      > FIELDS TERMINATED BY '\t'    > STORED AS TEXTFILE;


12 hive> load data local inpath '/export1/tmp/wyp/row.txt'    > overwrite into table user;


01020304050607080910111213141516171819202122232425 hive> select * from user where id =500000;Total MapReduce jobs = 1Launching Job 1 out of 1Number of reduce tasks is set to 0 since there's no reduce operatorCannot run job locally: Input Size (= 356888890) is larger than hive.exec.mode.local.auto.inputbytes.max (= 134217728)Starting Job = job_1384246387966_0247, Tracking URL =  http://l-datalogm1.data.cn1:9981/proxy/application_1384246387966_0247/ Kill Command=/home/q/hadoop/bin/hadoop job -kill job_1384246387966_0247Hadoop job information for Stage-1: number of mappers:2; number of reducers:02013-11-13 15:09:53,336 Stage-1 map = 0%,  reduce = 0%2013-11-13 15:09:59,500 Stage-1 map=50%,reduce=0%, Cumulative CPU 2.0 sec2013-11-13 15:10:00,531 Stage-1 map=100%,reduce=0%, Cumulative CPU 5.63 sec2013-11-13 15:10:01,560 Stage-1 map=100%,reduce=0%, Cumulative CPU 5.63 secMapReduce Total cumulative CPU time: 5 seconds 630 msecEnded Job = job_1384246387966_0247MapReduce Jobs Launched:Job 0: Map: 2   Cumulative CPU: 5.63 sec   HDFS Read: 361084006 HDFS Write: 357 SUCCESSTotal MapReduce CPU Time Spent: 5 seconds 630 msecOK500000 wyp.Time taken: 14.107 seconds, Fetched: 1 row(s)


010203040506070809101112 hive> create index user_index on table user(id)     > as 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'    > with deferred rebuild    > IN TABLE user_index_table;hive> alter index user_index on user rebuild;hive> select * from user_index_table limit 5; 0       hdfs://mycluster/user/hive/warehouse/table02/000000_0   [0]1       hdfs://mycluster/user/hive/warehouse/table02/000000_0   [352]2       hdfs://mycluster/user/hive/warehouse/table02/000000_0   [704]3       hdfs://mycluster/user/hive/warehouse/table02/000000_0   [1056]4       hdfs://mycluster/user/hive/warehouse/table02/000000_0   [1408]Time taken: 0.244 seconds, Fetched: 5 row(s)



0102030405060708091011 hive> CREATE INDEX employees_index    > ON TABLE employees (country)    > AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'    > WITH DEFERRED REBUILD    > IDXPROPERTIES ('creator' = 'me','created_at' = 'some_time')    > IN TABLE employees_index_table    > COMMENT 'Employees indexed by country and name.';FAILED: Error in metadata: java.lang.RuntimeException:             \Check the index columns, they should appear in the table being indexed.FAILED: Execution Error, return code 1 from                       \org.apache.hadoop.hive.ql.exec.DDLTask



