ODPS SQL

时间:2023-08-04 22:58:19

基本操作:

查询:

SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[ORDER BY order_condition]
[DISTRIBUTE BY distribute_condition [SORT BY sort_condition] ]
[LIMIT number] 

更新:

INSERT OVERWRITE|INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)] [(col1,col2 ...)]
select_statement
FROM from_statement;

表关联:

join_table:
table_reference join table_factor [join_condition]
| table_reference {left outer|right outer|full outer|inner} join table_reference join_condition
table_reference:
table_factor
| join_table
table_factor:
tbl_name [alias]
| table_subquery alias
| ( table_references )
join_condition:
on equality_expression ( and equality_expression )

演示内容:

使用DML:

*查询已有数据

*使用表连接查询数据

*覆盖更新

*追加更新

//上传演示数据
odps@ sdrtest>tunnel upload /root/.odpscmd/t_people.txt t_people;
//查看演示数据
odps@ sdrtest>select * from t_people;
+------------+------+
| id | name |
+------------+------+
| 1 | Michael Jordan |
| 2 | Angela Dorthea Merkel |
| 3 | Bruce Willis |
| 4 | Kim Kardashian |
| 5 | Jhon Knight |
| 6 | Maria Sharapova |
| 7 | Chiang Kai-shek |
| 8 | Jennifer Aniston |
| 9 | David Beckham |
| 10 | Dragon Lady |
+------------+------+
odps@ sdrtest>select count(*),id from t_people group by id having count(*) > 0; ID = 20190414051444876gfk3c692
Job Queueing.
----------------------------------------------------------------------------------------------
STAGES STATUS TOTAL COMPLETED RUNNING PENDING BACKUP
M1_job_0 ................. TERMINATED 1 1 0 0 0
----------------------------------------------------------------------------------------------
STAGES STATUS TOTAL COMPLETED RUNNING PENDING BACKUP----------
M1_job_0 ................. TERMINATED 1 1 0 0 0----------------------------------------------------------------------------------
R2_1_job_0 ............... TERMINATED 1 1 0 0 0
----------------------------------------------------------------------------------------------
STAGES: 02/02 [==========================>>] 100% ELAPSED TIME: 8.17 s
----------------------------------------------------------------------------------------------
Summary:
resource cost: cpu 0.00 Core * Min, memory 0.00 GB * Min
inputs:
sdrtest.t_people: 10 (776 bytes)
outputs:
Job run time: 5.000
Job run mode: fuxi job
Job run engine: execution engine
M1:
instance count: 1
run time: 3.000
instance time:
min: 0.000, max: 0.000, avg: 0.000
input records:
TableScan1: 10 (min: 10, max: 10, avg: 10)
output records:
StreamLineWrite1: 10 (min: 10, max: 10, avg: 10)
writer dumps:
StreamLineWrite1: (min: 0, max: 0, avg: 0)
R2_1:
instance count: 1
run time: 5.000
instance time:
min: 0.000, max: 0.000, avg: 0.000
input records:
StreamLineRead1: 10 (min: 10, max: 10, avg: 10)
output records:
AdhocSink1: 10 (min: 10, max: 10, avg: 10)
reader dumps:
StreamLineRead1: (min: 0, max: 0, avg: 0) +------------+------------+
| _c0 | id |
+------------+------------+
| 1 | 1 |
| 1 | 2 |
| 1 | 3 |
| 1 | 4 |
| 1 | 5 |
| 1 | 6 |
| 1 | 7 |
| 1 | 8 |
| 1 | 9 |
| 1 | 10 |
+------------+------------+
//使用表连接查询数据
odps@ sdrtest>select t1.* from t_people t1 join t_people t2 on t1.id=t2.id; ID = 20190414052009767gcpxmnim Job Queueing.
----------------------------------------------------------------------------------------------
STAGES STATUS TOTAL COMPLETED RUNNING PENDING BACKUP
M1_job_0 ................. TERMINATED 1 1 0 0 0
----------------------------------------------------------------------------------------------
STAGES STATUS TOTAL COMPLETED RUNNING PENDING BACKUP----------
M1_job_0 ................. TERMINATED 1 1 0 0 0----------------------------------------------------------------------------------
J2_1_job_0 ............... TERMINATED 1 1 0 0 0
----------------------------------------------------------------------------------------------
STAGES: 02/02 [==========================>>] 100% ELAPSED TIME: 8.56 s
----------------------------------------------------------------------------------------------
Summary:
resource cost: cpu 0.00 Core * Min, memory 0.00 GB * Min
inputs:
sdrtest.t_people: 10 (776 bytes)
outputs:
Job run time: 6.000
Job run mode: fuxi job
Job run engine: execution engine
M1:
instance count: 1
run time: 3.000
instance time:
min: 0.000, max: 0.000, avg: 0.000
input records:
TableScan1: 10 (min: 10, max: 10, avg: 10)
output records:
StreamLineWrite1: 10 (min: 10, max: 10, avg: 10)
StreamLineWrite2: 10 (min: 10, max: 10, avg: 10)
writer dumps:
StreamLineWrite1: (min: 0, max: 0, avg: 0)
StreamLineWrite2: (min: 0, max: 0, avg: 0)
J2_1:
instance count: 1
run time: 6.000
instance time:
min: 0.000, max: 0.000, avg: 0.000
input records:
StreamLineRead1: 10 (min: 10, max: 10, avg: 10)
StreamLineRead2: 10 (min: 10, max: 10, avg: 10)
output records:
AdhocSink1: 10 (min: 10, max: 10, avg: 10)
reader dumps:
StreamLineRead1: (min: 0, max: 0, avg: 0)
StreamLineRead2: (min: 0, max: 0, avg: 0) +------------+------+
| id | name |
+------------+------+
| 1 | Michael Jordan |
| 2 | Angela Dorthea Merkel |
| 3 | Bruce Willis |
| 4 | Kim Kardashian |
| 5 | Jhon Knight |
| 6 | Maria Sharapova |
| 7 | Chiang Kai-shek |
| 8 | Jennifer Aniston |
| 9 | David Beckham |
| 10 | Dragon Lady |
+------------+------+
//创建新表用于演示追加更新:
odps@ sdrtest>create table t_people_new like t_people;
odps@ sdrtest>read t_people_new;
+------------+------------+
| id | name |
+------------+------------+
+------------+------------+
odps@ sdrtest>insert into table t_people_new select * from t_people;
//查看新建的表内写入的数据:
odps@ sdrtest>read t_people_new
>;
+------------+------------+
| id | name |
+------------+------------+
| 1 | Michael Jordan |
| 2 | Angela Dorthea Merkel |
| 3 | Bruce Willis |
| 4 | Kim Kardashian |
| 5 | Jhon Knight |
| 6 | Maria Sharapova |
| 7 | Chiang Kai-shek |
| 8 | Jennifer Aniston |
| 9 | David Beckham |
| 10 | Dragon Lady |
+------------+------------+
//演示追加更新 <into>
odps@ sdrtest>insert into table t_people_new select * from t_people;
Job Queueing.
----------------------------------------------------------------------------------------------
STAGES STATUS TOTAL COMPLETED RUNNING PENDING BACKUP
M1_job_0 ................. TERMINATED 1 1 0 0 0
----------------------------------------------------------------------------------------------
STAGES: 01/01 [==========================>>] 100% ELAPSED TIME: 5.26 s
----------------------------------------------------------------------------------------------
Summary:
resource cost: cpu 0.00 Core * Min, memory 0.00 GB * Min
inputs:
sdrtest.t_people: 10 (776 bytes)
outputs:
sdrtest.t_people_new: 10 (776 bytes)
Job run time: 3.000
Job run mode: fuxi job
Job run engine: execution engine
M1:
instance count: 1
run time: 3.000
instance time:
min: 0.000, max: 0.000, avg: 0.000
input records:
TableScan1: 10 (min: 10, max: 10, avg: 10)
output records:
TableSink1: 10 (min: 10, max: 10, avg: 10) OK
odps@ sdrtest>read t_people_new;
+------------+------------+
| id | name |
+------------+------------+
| 1 | Michael Jordan |
| 2 | Angela Dorthea Merkel |
| 3 | Bruce Willis |
| 4 | Kim Kardashian |
| 5 | Jhon Knight |
| 6 | Maria Sharapova |
| 7 | Chiang Kai-shek |
| 8 | Jennifer Aniston |
| 9 | David Beckham |
| 10 | Dragon Lady |
| 1 | Michael Jordan |
| 2 | Angela Dorthea Merkel |
| 3 | Bruce Willis |
| 4 | Kim Kardashian |
| 5 | Jhon Knight |
| 6 | Maria Sharapova |
| 7 | Chiang Kai-shek |
| 8 | Jennifer Aniston |
| 9 | David Beckham |
| 10 | Dragon Lady |
+------------+------------+ //演示覆盖更新 <overwrite>
odps@ sdrtest>insert overwrite table t_people_new select * from t_people; ----------------------------------------------------------------------------------------------
STAGES STATUS TOTAL COMPLETED RUNNING PENDING BACKUP
M1_job_0 ................. TERMINATED 1 1 0 0 0
----------------------------------------------------------------------------------------------
STAGES: 01/01 [==========================>>] 100% ELAPSED TIME: 5.24 s
----------------------------------------------------------------------------------------------
Summary:
resource cost: cpu 0.00 Core * Min, memory 0.00 GB * Min
inputs:
sdrtest.t_people: 10 (776 bytes)
outputs:
sdrtest.t_people_new: 10 (776 bytes)
Job run time: 3.000
Job run mode: fuxi job
Job run engine: execution engine
M1:
instance count: 1
run time: 3.000
instance time:
min: 0.000, max: 0.000, avg: 0.000
input records:
TableScan1: 10 (min: 10, max: 10, avg: 10)
output records:
TableSink1: 10 (min: 10, max: 10, avg: 10) OK
odps@ sdrtest>read t_people_new;
+------------+------------+
| id | name |
+------------+------------+
| 1 | Michael Jordan |
| 2 | Angela Dorthea Merkel |
| 3 | Bruce Willis |
| 4 | Kim Kardashian |
| 5 | Jhon Knight |
| 6 | Maria Sharapova |
| 7 | Chiang Kai-shek |
| 8 | Jennifer Aniston |
| 9 | David Beckham |
| 10 | Dragon Lady |
+------------+------------+
odps@ sdrtest>

  

others....