hive 各种表及数据导入

时间:2021-04-11 06:39:44

# 内部表 (默认)
create table emp_info(id bigint, name string, income double)
	row format delimited  
	fields terminated by '\t';

# 外部表
create external emp_info_ext(id bigint, name string, income double)
	row format delimited 
	fields terminated by '\t' 
	location '/external/emp_info_ext';

# 分区表
create table emp_info_part(id bigint, name string, income double)
	partitioned by (year string) 
	row format delimited  
	fields terminated by '\t';


//从本地导入数据到hive的表中(实质就是将文件上传到hdfs中hive管理目录下)

load data local inpath '/home/hadoop/data.txt' into emp_info tab_ext;

//从hdfs上导入数据到hive表中(实质就是将文件从原始目录移动到hive管理的目录下)

load data inpath 'hdfs://ns1/aa/bb/data.log' into table emp_info;

//使用select语句来批量插入数据
insert overwrite table emp_info select * from emp_info2;


外部表的数据加载

hadoop fs -put data.log '/external/emp_info_ext'


year 字段无需在表中出现,只要在加载数据时指字即可

load data local inpath '/home/hadoop/data.log' overwrite into table emp_info_ext
     partition(year='1990');
	 
load data local inpath '/home/hadoop/data2.log' overwrite into table emp_info_ext
     partition(year='2000');



//从hdfs上导入数据到hive表中(实质就是将文件从原始目录移动到hive管理的目录下)

load data inpath 'hdfs://ns1/aa/bb/data.log' into table emp_info;