insertinto hive1.test2 partition (province='hebei',city='baoding') select * from hive1.test2; insert overwrite table test2 partition (province='hebei',city='shijiazhuang') selectid,name,age from test1;
动态分区
创建test3分区表
1 2 3 4 5
createtable test3(idint,namestring, age int ) partitioned by ( proince string,city string) rowformatDELIMITED FIELDSTERMINATEDby'\t' LINESTERMINATEDby'\n' STOREDas TEXTFILE ;
动态分区,复制一个表数据到分区表,动态创建分区 如果两个都是动态分区需要关闭严格模式
1
set hive.exec.dynamic.partition.mode=nonstrict //关闭动态分区的严格模式
1 2 3
insertinto hive1.test2 partition (province,city) selectid,name,age,'henan'as province, 'kaifeng'as city from table2;
使用分区动静混合
1 2 3
insertinto hive1.test2 partition (province='henan',city) selectid,name,age,'henan'as province, 'kaifeng'as city from table2;
查询期间,动态创建表,并将数据写入创建表中
1
createtable test3 asselectid ,namefrom test2 where province = 'hebei'and city = 'baoding' ;
导出hive数据到本地目录(下载)
1
insert overwrite localdirectory'/home/ubuntu/hive'select * from test2 where province = 'hebei' ;
导出hive数据到HDFS目录
1
insert overwrite directory'hdfs://s100:8020/user/ubuntu/xxx'select * from hive1.test2 where province = 'hebei' ;
查询数据向多个目录同时输出
1 2 3 4 5
from test2 t insert overwrite localdirectory'/home/ubuntu/hebei'select t.id,t.name,t.age,t.province,t.city ta where t.province = 'hebei' insert overwrite localdirectory'/home/ubuntu/henan'select t.id,t.name,t.age,t.province,t.city tb where t.province = 'henan'
查询
查询,投影查询,指定表的别名
1
select col1,col2,... fromtable t
查询,使用函数
1 2
selectupper(name) from test2 ; selectlower(name) from test2 ;
from (select * from test2 where province = 'hebei')e select e.id,e.name,e.age where city = 'baoding' select e.id,e.name,e.age where city = 'baoding' from (select * from test2 where province = 'hebei') e
**case..when..then** selectid,name, case when age <= 12then'young' when age >12and age <=13then'middle' when age >13and age <=15then'old' else'too old' endas yearState from test2 ;
load data local inpath '/home/ubuntu/customers.txt' into table hive1.customers; load data local inpath '/home/ubuntu/orders.txt' into table hive1.orders;
##内连接 JOIN … ON
1
select a.id,a.name,b.id,b.orderno,b.price from customers a join orders b on a.id = b.cid
a.id
a.name
b.id
b.orderno
b.price
1
tom1
1
No001
121.34
1
tom1
2
No002
121.35
1
tom1
3
No003
121.0
2
tom2
4
No004
22.66
2
tom2
5
No005
300.65
2
tom2
6
No006
800.56
##连接查询优化手段:查询表的大小从左到右是递增的
1
select c.id,c.name,c.age,o.orderno,o.price from customers c JOIN orders o on c.id = o.cid where ... ; //wright
1
select c.id,c.name,c.age,o.orderno,o.price from orders o JOIN customers c on c.id = o.cid where ... ; //wrong
###使用查询暗示 hint
1
select/*+STREAMTABLE (o)*/ c.id,c.name,c.age,o.orderno,o.price from orders o JOIN customers c on c.id = o.cid;
LEFT OUTER JOIN
1
select a.id,a.name,b.id,b.orderno,b.price from customers a LEFTOUTERJOIN orders b on a.id = b.cid;
RIGHT OUTER JOIN
1
select a.id,a.name,b.id,b.orderno,b.price from customers a RIGHTOUTERJOIN orders b on a.id = b.cid;
FULL OUTER JOIN
1
select a.id,a.name,b.id,b.orderno,b.price from customers a FULLOUTERJOIN orders b on a.id = b.cid;
左半连接,select 和where子句不能引用到右边表字段
左表的记录在右表中一旦找到对应的记录,右侧表即停止扫描。
1
select c.id,c.name from customers c LEFTsemiJOIN orders o on c.id = o.cid;
hive不支持右半连接操作,RIGHT SEMI JOIN 不存在
笛卡尔连接 m x n
1
select c.id,c.name,o.orderno from customers c JOIN orders o ;
map端连接,一张小表,通过mapper的时候,将小表完全载入内存。
暗示 MAPJOIN(c) 在0.7之前使用
1 2
select/*+MAPJOIN(c)*/ c.id,c.name,o.orderno from customers c JOIN orders o ; select/*+MAPJOIN(o)*/ c.id,c.name,o.orderno from customers c JOIN orders o ;
能否参与评论,且看个人手段。