基于Hadoop生态圈的数据仓库实践 —— 进阶技术(九)

时间:2022-05-30 14:14:21
九、退化维度
        本节讨论一种称为退化维度的技术。该技术减少维度的数量,简化维度数据仓库模式。简单的模式比复杂的更容易理解,也有更好的查询性能。当一个维度没有数据仓库需要的任何数据时就可以退化此维度,此时需要把退化维度的相关数据迁移到事实表中,然后删除退化的维度。
1. 退化订单维度
        本小节说明如何退化订单维度,包括对数据仓库模式和定期装载脚本的修改。使用维度退化技术时你首先要识别数据,分析从来不用的数据列。例如,订单维度的order_number列就可能是这样的一列。但如果用户想看事务的细节,还需要订单号。因此,在退化订单维度前,要把订单号迁移到sales_order_fact表。下图显示了迁移后的模式。
基于Hadoop生态圈的数据仓库实践 —— 进阶技术(九)
        按顺序执行下面的四步退化order_dim维度表:
(1)给sales_order_fact表添加order_number列
(2)把order_dim表里的订单号迁移到sales_order_fact表
(3)删除sales_order_fact表里的order_sk列
(4)删除order_dim表

        下面的脚本完成所有退化订单维度所需的步骤。
use dw;  alter table sales_order_fact rename to sales_order_fact_old;
create table sales_order_fact(
order_number int COMMENT 'order number',
customer_sk int COMMENT 'customer surrogate key',
product_sk int COMMENT 'product surrogate key',
order_date_sk int COMMENT 'order date surrogate key',
allocate_date_sk int COMMENT 'allocate date surrogate key',
allocate_quantity int COMMENT 'allocate quantity',
packing_date_sk int COMMENT 'packing date surrogate key',
packing_quantity int COMMENT 'packing quantity',
ship_date_sk int COMMENT 'ship date surrogate key',
ship_quantity int COMMENT 'ship quantity',
receive_date_sk int COMMENT 'receive date surrogate key',
receive_quantity int COMMENT 'receive quantity',
request_delivery_date_sk int COMMENT 'request delivery date surrogate key',
order_amount decimal(10,2) COMMENT 'order amount',
order_quantity int COMMENT 'order quantity')
clustered by (order_number) into 8 buckets
stored as orc tblproperties ('transactional'='true');

insert into table sales_order_fact
select t2.order_number,
t1.customer_sk,
t1.product_sk,
t1.order_date_sk,
t1.allocate_date_sk,
t1.allocate_quantity,
t1.packing_date_sk,
t1.packing_quantity,
t1.ship_date_sk,
t1.ship_quantity,
t1.receive_date_sk,
t1.receive_quantity,
t1.request_delivery_date_sk,
t1.order_amount,
t1.order_quantity
from sales_order_fact_old t1
inner join order_dim t2 on t1.order_sk = t2.order_sk;

drop table sales_order_fact_old;
drop table order_dim;
2. 修改定期装载脚本
        退化一个维度后需要做的另一件事就是修改定期装载脚本。修改后的脚本需要把订单号加入到销售订单事实表,而不再需要导入订单维度。下面显示了修改后的regular_etl.sql脚本文件内容。
-- 设置变量以支持事务    set hive.support.concurrency=true;    set hive.exec.dynamic.partition.mode=nonstrict;    set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;    set hive.compactor.initiator.on=true;    set hive.compactor.worker.threads=1;        USE dw;          -- 设置SCD的生效时间和过期时间    SET hivevar:cur_date = CURRENT_DATE();SET hivevar:pre_date = DATE_ADD(${hivevar:cur_date},-1);    SET hivevar:max_date = CAST('2200-01-01' AS DATE);          -- 设置CDC的上限时间    INSERT OVERWRITE TABLE rds.cdc_time SELECT last_load, ${hivevar:cur_date} FROM rds.cdc_time;        -- 装载customer维度    -- 设置已删除记录和地址相关列上SCD2的过期,用<=>运算符处理NULL值。    UPDATE customer_dim        SET expiry_date = ${hivevar:pre_date}       WHERE customer_dim.customer_sk IN      (SELECT a.customer_sk        FROM (SELECT customer_sk,                    customer_number,                    customer_street_address,                    customer_zip_code,                    customer_city,                    customer_state,                    shipping_address,                    shipping_zip_code,                    shipping_city,                    shipping_state               FROM customer_dim WHERE expiry_date = ${hivevar:max_date}) a LEFT JOIN                     rds.customer b ON a.customer_number = b.customer_number               WHERE b.customer_number IS NULL OR               (  !(a.customer_street_address <=> b.customer_street_address)              OR !(a.customer_zip_code <=> b.customer_zip_code)              OR !(a.customer_city <=> b.customer_city)              OR !(a.customer_state <=> b.customer_state)              OR !(a.shipping_address <=> b.shipping_address)              OR !(a.shipping_zip_code <=> b.shipping_zip_code)              OR !(a.shipping_city <=> b.shipping_city)              OR !(a.shipping_state <=> b.shipping_state)              ));         -- 处理customer_street_addresses列上SCD2的新增行      INSERT INTO customer_dim    SELECT        ROW_NUMBER() OVER (ORDER BY t1.customer_number) + t2.sk_max,        t1.customer_number,        t1.customer_name,        t1.customer_street_address,        t1.customer_zip_code,        t1.customer_city,        t1.customer_state,        t1.shipping_address,        t1.shipping_zip_code,        t1.shipping_city,        t1.shipping_state,        t1.version,        t1.effective_date,        t1.expiry_date    FROM      (      SELECT          t2.customer_number customer_number,        t2.customer_name customer_name,        t2.customer_street_address customer_street_address,        t2.customer_zip_code customer_zip_code,        t2.customer_city customer_city,        t2.customer_state customer_state,        t2.shipping_address shipping_address,        t2.shipping_zip_code shipping_zip_code,        t2.shipping_city shipping_city,        t2.shipping_state shipping_state,        t1.version + 1 version,        ${hivevar:pre_date} effective_date,          ${hivevar:max_date} expiry_date       FROM customer_dim t1     INNER JOIN rds.customer t2         ON t1.customer_number = t2.customer_number         AND t1.expiry_date = ${hivevar:pre_date}       LEFT JOIN customer_dim t3        ON t1.customer_number = t3.customer_number       AND t3.expiry_date = ${hivevar:max_date}      WHERE (!(t1.customer_street_address <=> t2.customer_street_address)       OR  !(t1.customer_zip_code <=> t2.customer_zip_code)       OR  !(t1.customer_city <=> t2.customer_city)       OR  !(t1.customer_state <=> t2.customer_state)       OR  !(t1.shipping_address <=> t2.shipping_address)       OR  !(t1.shipping_zip_code <=> t2.shipping_zip_code)       OR  !(t1.shipping_city <=> t2.shipping_city)       OR  !(t1.shipping_state <=> t2.shipping_state)       )      AND t3.customer_sk IS NULL) t1      CROSS JOIN      (SELECT COALESCE(MAX(customer_sk),0) sk_max FROM customer_dim) t2;        -- 处理customer_name列上的SCD1    -- 因为hive的update的set子句还不支持子查询,所以这里使用了一个临时表存储需要更新的记录,用先delete再insert代替update    -- 因为SCD1本身就不保存历史数据,所以这里更新维度表里的所有customer_name改变的记录,而不是仅仅更新当前版本的记录    DROP TABLE IF EXISTS tmp;    CREATE TABLE tmp AS    SELECT        a.customer_sk,        a.customer_number,        b.customer_name,        a.customer_street_address,        a.customer_zip_code,        a.customer_city,        a.customer_state,        a.shipping_address,        a.shipping_zip_code,        a.shipping_city,        a.shipping_state,        a.version,        a.effective_date,        a.expiry_date      FROM customer_dim a, rds.customer b       WHERE a.customer_number = b.customer_number AND !(a.customer_name <=> b.customer_name);      DELETE FROM customer_dim WHERE customer_dim.customer_sk IN (SELECT customer_sk FROM tmp);      INSERT INTO customer_dim SELECT * FROM tmp;        -- 处理新增的customer记录     INSERT INTO customer_dim    SELECT        ROW_NUMBER() OVER (ORDER BY t1.customer_number) + t2.sk_max,        t1.customer_number,        t1.customer_name,        t1.customer_street_address,        t1.customer_zip_code,        t1.customer_city,        t1.customer_state,        t1.shipping_address,        t1.shipping_zip_code,        t1.shipping_city,        t1.shipping_state,        1,        ${hivevar:pre_date},        ${hivevar:max_date}    FROM      (      SELECT t1.* FROM rds.customer t1 LEFT JOIN customer_dim t2 ON t1.customer_number = t2.customer_number       WHERE t2.customer_sk IS NULL) t1      CROSS JOIN      (SELECT COALESCE(MAX(customer_sk),0) sk_max FROM customer_dim) t2;        -- 重载PA客户维度    TRUNCATE TABLE pa_customer_dim;      INSERT INTO pa_customer_dim      SELECT        customer_sk      , customer_number      , customer_name      , customer_street_address      , customer_zip_code      , customer_city      , customer_state      , shipping_address      , shipping_zip_code      , shipping_city      , shipping_state      , version      , effective_date      , expiry_date      FROM customer_dim      WHERE customer_state = 'PA' ;         -- 装载product维度    -- 设置已删除记录和product_name、product_category列上SCD2的过期    UPDATE product_dim       SET expiry_date = ${hivevar:pre_date}       WHERE product_dim.product_sk IN      (SELECT a.product_sk        FROM (SELECT product_sk,product_code,product_name,product_category                FROM product_dim WHERE expiry_date = ${hivevar:max_date}) a LEFT JOIN                     rds.product b ON a.product_code = b.product_code               WHERE b.product_code IS NULL OR (a.product_name <> b.product_name OR a.product_category <> b.product_category));        -- 处理product_name、product_category列上SCD2的新增行      INSERT INTO product_dim    SELECT        ROW_NUMBER() OVER (ORDER BY t1.product_code) + t2.sk_max,        t1.product_code,        t1.product_name,        t1.product_category,        t1.version,        t1.effective_date,        t1.expiry_date    FROM      (      SELECT          t2.product_code product_code,        t2.product_name product_name,        t2.product_category product_category,            t1.version + 1 version,        ${hivevar:pre_date} effective_date,          ${hivevar:max_date} expiry_date       FROM product_dim t1     INNER JOIN rds.product t2         ON t1.product_code = t2.product_code        AND t1.expiry_date = ${hivevar:pre_date}       LEFT JOIN product_dim t3        ON t1.product_code = t3.product_code       AND t3.expiry_date = ${hivevar:max_date}      WHERE (t1.product_name <> t2.product_name OR t1.product_category <> t2.product_category) AND t3.product_sk IS NULL) t1      CROSS JOIN      (SELECT COALESCE(MAX(product_sk),0) sk_max FROM product_dim) t2;        -- 处理新增的product记录    INSERT INTO product_dim    SELECT        ROW_NUMBER() OVER (ORDER BY t1.product_code) + t2.sk_max,        t1.product_code,        t1.product_name,        t1.product_category,        1,        ${hivevar:pre_date},        ${hivevar:max_date}    FROM      (      SELECT t1.* FROM rds.product t1 LEFT JOIN product_dim t2 ON t1.product_code = t2.product_code       WHERE t2.product_sk IS NULL) t1      CROSS JOIN      (SELECT COALESCE(MAX(product_sk),0) sk_max FROM product_dim) t2;        -- 装载销售订单事实表 -- 前一天新增的销售订单   INSERT INTO sales_order_fact    SELECT        a.order_number,        customer_sk,        product_sk,        e.order_date_sk,    null,    null,    null,    null,    null,    null,    null,    null,    f.request_delivery_date_sk,    order_amount,        quantity      FROM        rds.sales_order a,         customer_dim c,        product_dim d,        order_date_dim e,      request_delivery_date_dim f,      rds.cdc_time g     WHERE     a.order_status = 'N'AND a.customer_number = c.customer_number    AND a.status_date >= c.effective_date    AND a.status_date < c.expiry_date    AND a.product_code = d.product_code    AND a.status_date >= d.effective_date    AND a.status_date < d.expiry_date    AND to_date(a.status_date) = e.order_date   AND to_date(a.request_delivery_date) = f.request_delivery_date   AND a.entry_date >= g.last_load AND a.entry_date < g.current_load ;    -- 处理分配库房、打包、配送和收货四个状态DROP TABLE IF EXISTS tmp;CREATE TABLE tmp ASselect t0.order_number order_number,       t0.customer_sk customer_sk,       t0.product_sk product_sk,       t0.order_date_sk order_date_sk,       t2.allocate_date_sk allocate_date_sk,       t1.quantity allocate_quantity,       t0.packing_date_sk packing_date_sk,       t0.packing_quantity packing_quantity,       t0.ship_date_sk ship_date_sk,       t0.ship_quantity ship_quantity,       t0.receive_date_sk receive_date_sk,       t0.receive_quantity receive_quantity,       t0.request_delivery_date_sk request_delivery_date_sk,       t0.order_amount order_amount,       t0.order_quantity order_quantity  from sales_order_fact t0,       rds.sales_order t1,       allocate_date_dim t2,       rds.cdc_time t4 where t0.order_number = t1.order_number and t1.order_status = 'A'    and to_date(t1.status_date) = t2.allocate_date   and t1.entry_date >= t4.last_load and t1.entry_date < t4.current_load;DELETE FROM sales_order_fact WHERE sales_order_fact.order_number IN (SELECT order_number FROM tmp); INSERT INTO sales_order_fact SELECT * FROM tmp;DROP TABLE IF EXISTS tmp;CREATE TABLE tmp ASselect t0.order_number order_number,       t0.customer_sk customer_sk,       t0.product_sk product_sk,       t0.order_date_sk order_date_sk,       t0.allocate_date_sk allocate_date_sk,       t0.allocate_quantity allocate_quantity,       t2.packing_date_sk packing_date_sk,       t1.quantity packing_quantity,       t0.ship_date_sk ship_date_sk,       t0.ship_quantity ship_quantity,       t0.receive_date_sk receive_date_sk,       t0.receive_quantity receive_quantity,       t0.request_delivery_date_sk request_delivery_date_sk,       t0.order_amount order_amount,       t0.order_quantity order_quantity  from sales_order_fact t0,       rds.sales_order t1,       packing_date_dim t2,       rds.cdc_time t4 where t0.order_number = t1.order_number and t1.order_status = 'P'    and to_date(t1.status_date) = t2.packing_date   and t1.entry_date >= t4.last_load and t1.entry_date < t4.current_load;    DELETE FROM sales_order_fact WHERE sales_order_fact.order_number IN (SELECT order_number FROM tmp); INSERT INTO sales_order_fact SELECT * FROM tmp;DROP TABLE IF EXISTS tmp;CREATE TABLE tmp ASselect t0.order_number order_number,       t0.customer_sk customer_sk,       t0.product_sk product_sk,       t0.order_date_sk order_date_sk,       t0.allocate_date_sk allocate_date_sk,       t0.allocate_quantity allocate_quantity,       t0.packing_date_sk packing_date_sk,       t0.packing_quantity packing_quantity,       t2.ship_date_sk ship_date_sk,       t1.quantity ship_quantity,       t0.receive_date_sk receive_date_sk,       t0.receive_quantity receive_quantity,       t0.request_delivery_date_sk request_delivery_date_sk,       t0.order_amount order_amount,       t0.order_quantity order_quantity  from sales_order_fact t0,       rds.sales_order t1,       ship_date_dim t2,       rds.cdc_time t4 where t0.order_number = t1.order_number and t1.order_status = 'S'    and to_date(t1.status_date) = t2.ship_date   and t1.entry_date >= t4.last_load and t1.entry_date < t4.current_load;   DELETE FROM sales_order_fact WHERE sales_order_fact.order_number IN (SELECT order_number FROM tmp); INSERT INTO sales_order_fact SELECT * FROM tmp;DROP TABLE IF EXISTS tmp;CREATE TABLE tmp ASselect t0.order_number order_number,       t0.customer_sk customer_sk,       t0.product_sk product_sk,       t0.order_date_sk order_date_sk,       t0.allocate_date_sk allocate_date_sk,       t0.allocate_quantity allocate_quantity,       t0.packing_date_sk packing_date_sk,       t0.packing_quantity packing_quantity,       t0.ship_date_sk ship_date_sk,       t0.ship_quantity ship_quantity,       t2.receive_date_sk receive_date_sk,       t1.quantity receive_quantity,       t0.request_delivery_date_sk request_delivery_date_sk,       t0.order_amount order_amount,       t0.order_quantity order_quantity  from sales_order_fact t0,       rds.sales_order t1,       receive_date_dim t2,       rds.cdc_time t4 where t0.order_number = t1.order_number and t1.order_status = 'R'    and to_date(t1.status_date) = t2.receive_date   and t1.entry_date >= t4.last_load and t1.entry_date < t4.current_load;   DELETE FROM sales_order_fact WHERE sales_order_fact.order_number IN (SELECT order_number FROM tmp); INSERT INTO sales_order_fact SELECT * FROM tmp;-- 更新时间戳表的last_load字段    INSERT OVERWRITE TABLE rds.cdc_time SELECT current_load, current_load FROM rds.cdc_time;
3. 测试修改后的定期装载 
(1)准备测试数据
        测试使用具有分配库房、打包、配送和收货里程碑的两个新订单。所以每个订单需要添加五行。下面的脚本向源数据库里的sales_order表新增十行。
USE source; DROP TABLE IF EXISTS temp_sales_order_data;  CREATE TABLE temp_sales_order_data AS SELECT * FROM sales_order WHERE 1=0;  SET @start_date := unix_timestamp('2016-07-25');  SET @end_date := unix_timestamp('2016-07-26');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (1, 131, 1, 1, @order_date, 'N', '2016-08-01', @order_date, @amount, @quantity); SET @start_date := unix_timestamp('2016-07-25');  SET @end_date := unix_timestamp('2016-07-26');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (2, 132, 2, 2, @order_date, 'N', '2016-08-01', @order_date, @amount, @quantity); SET @start_date := unix_timestamp('2016-07-26');  SET @end_date := unix_timestamp('2016-07-27');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (3, 131, 1, 1, @order_date, 'A', '2016-08-01', @order_date, @amount, @quantity); SET @start_date := unix_timestamp('2016-07-26');  SET @end_date := unix_timestamp('2016-07-27');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (4, 132, 2, 2, @order_date, 'A', '2016-08-01', @order_date, @amount, @quantity);SET @start_date := unix_timestamp('2016-07-27');  SET @end_date := unix_timestamp('2016-07-28');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (5, 131, 1, 1, @order_date, 'P', '2016-08-01', @order_date, @amount, @quantity); SET @start_date := unix_timestamp('2016-07-27');  SET @end_date := unix_timestamp('2016-07-28');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (6, 132, 2, 2, @order_date, 'P', '2016-08-01', @order_date, @amount, @quantity);SET @start_date := unix_timestamp('2016-07-28');  SET @end_date := unix_timestamp('2016-07-29');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (7, 131, 1, 1, @order_date, 'S', '2016-08-01', @order_date, @amount, @quantity); SET @start_date := unix_timestamp('2016-07-28');  SET @end_date := unix_timestamp('2016-07-29');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (8, 132, 2, 2, @order_date, 'S', '2016-08-01', @order_date, @amount, @quantity);SET @start_date := unix_timestamp('2016-07-29');  SET @end_date := unix_timestamp('2016-07-30');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (9, 131, 1, 1, @order_date, 'R', '2016-08-01', @order_date, @amount, @quantity); SET @start_date := unix_timestamp('2016-07-29');  SET @end_date := unix_timestamp('2016-07-30');  SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));  SET @amount := floor(1000 + rand() * 9000); SET @quantity := floor(10 + rand() * 90);    INSERT INTO temp_sales_order_data VALUES (10, 132, 2, 2, @order_date, 'R', '2016-08-01', @order_date, @amount, @quantity);INSERT INTO sales_order        select null,         order_number,         customer_number,         product_code,         status_date,         order_status,         request_delivery_date,         entry_date,         order_amount,         quantity    from temp_sales_order_data t1 order by t1.status_date;   COMMIT ;
(2)执行五次定期装载
use rds;  INSERT OVERWRITE TABLE rds.cdc_time SELECT '2016-07-25', '2016-07-26' FROM rds.cdc_time;
将regular_etl.sql文件中的SET hivevar:cur_date = CURRENT_DATE();行改为SET hivevar:cur_date = '2016-07-26';
./regular_etl.sh

use rds;  INSERT OVERWRITE TABLE rds.cdc_time SELECT '2016-07-26', '2016-07-27' FROM rds.cdc_time;
将regular_etl.sql文件中的SET hivevar:cur_date = CURRENT_DATE();行改为SET hivevar:cur_date = '2016-07-27';
./regular_etl.sh

use rds;  INSERT OVERWRITE TABLE rds.cdc_time SELECT '2016-07-27', '2016-07-28' FROM rds.cdc_time;
将regular_etl.sql文件中的SET hivevar:cur_date = CURRENT_DATE();行改为SET hivevar:cur_date = '2016-07-28';
./regular_etl.sh

use rds;  INSERT OVERWRITE TABLE rds.cdc_time SELECT '2016-07-28', '2016-07-29' FROM rds.cdc_time;
将regular_etl.sql文件中的SET hivevar:cur_date = CURRENT_DATE();行改为SET hivevar:cur_date = '2016-07-29';
./regular_etl.sh

use rds;  INSERT OVERWRITE TABLE rds.cdc_time SELECT '2016-07-29', '2016-07-30' FROM rds.cdc_time;
将regular_etl.sql文件中的SET hivevar:cur_date = CURRENT_DATE();行改为SET hivevar:cur_date = '2016-07-30';
./regular_etl.sh

        查询sales_order_fact表的两条订单。
use dw;select t1.order_number orn,       t2.order_date od,       t1.order_quantity oq,       t3.allocate_date ad,       t1.allocate_quantity aq,       t4.packing_date pd,       t1.packing_quantity pq,       t5.ship_date sd,       t1.ship_quantity sq,       t6.receive_date rd,       t1.receive_quantity rq  from sales_order_fact t1       inner join order_date_dim t2 on t1.order_date_sk = t2.order_date_sk        left join allocate_date_dim t3 on t1.allocate_date_sk = t3.allocate_date_sk        left join packing_date_dim t4 on t1.packing_date_sk = t4.packing_date_sk        left join ship_date_dim t5 on t1.ship_date_sk = t5.ship_date_sk        left join receive_date_dim t6 on t1.receive_date_sk = t6.receive_date_sk where t1.order_number IN (131 , 132);
        查询结果如下图所示。
基于Hadoop生态圈的数据仓库实践 —— 进阶技术(九)        测试完将regular_etl.sql文件中的SET hivevar:cur_date = CURRENT_DATE();行恢复。