中国工信.三大运营商号段
正则匹配
SELECT
t.cellphone_num,
CASE
WHEN TRIM(t.cellphone_num) REGEXP '^13[5-9]|^134[0-8]|^14[78]|^15[012789]|^172|^178|^170[356]|^18[23478]|^198|^1440' and length(TRIM(t.cellphone_num)) = 11 THEN '中国移动'
WHEN TRIM(t.cellphone_num) REGEXP '^13[0-2]|^145|^146|^15[56]|^166|^17[156]|^170[4789]|^18[56]' and length(TRIM(t.cellphone_num)) = 11 THEN '中国联通'
WHEN TRIM(t.cellphone_num) REGEXP '^133|^1410|^149|^153|^170[012]|^17[347]|^18[019]|19[139]' and length(TRIM(t.cellphone_num)) = 11 THEN '中国电信'
WHEN TRIM(t.cellphone_num) REGEXP '^1349' and length(TRIM(t.cellphone_num)) = 11 THEN '中国卫星电话'
ELSE '未知运营商'
END AS provider
FROM
temp tt
正则匹配测试
SELECT
"17002341234",
CASE
WHEN TRIM("17002341234") REGEXP '^13[5-9]|^134[0-8]|^14[78]|^15[012789]|^172|^178|^170[356]|^18[23478]|^198|^1440' and length(TRIM("17002341234")) = 11 THEN '中国移动'
WHEN TRIM("17002341234") REGEXP '^13[0-2]|^145|^146|^15[56]|^166|^17[156]|^170[4789]|^18[56]' and length(TRIM("17002341234")) = 11 THEN '中国联通'
WHEN TRIM("17002341234") REGEXP '^133|^1410|^149|^153|^170[012]|^17[347]|^18[019]|19[139]' and length(TRIM("17002341234")) = 11 THEN '中国电信'
WHEN TRIM("17002341234") REGEXP '^1349' and length(TRIM("17002341234")) = 11 THEN '中国卫星电话'
ELSE '未知运营商'
END AS provider;
create table dw.dim_phone_segment_regexp(
vendor_id string comment '运营商,1:移动,2:联通,3:电信,4:卫星电话,5:其他',
vendor_name string comment '运营商名称',
vendor_regexp string comment '运营商号段正则表达式',
bi_update_datetime string comment "更新时间"
) TBLPROPERTIES ("comment"="手机号码段正则表达式 by zhangjiqiang");
insert into table dw.dim_phone_segment_regexp values
(1,"中国移动", "^13[5-9]|^134[0-8]|^14[78]|^15[012789]|^172|^178|^170[356]|^18[23478]|^198|^1440", "2019-09-16 18:02:47"),
(2,"中国联通", "^13[0-2]|^145|^146|^15[56]|^166|^17[156]|^170[4789]|^18[56]", "2019-09-16 18:02:47"),
(3,"中国电信", "^133|^1410|^149|^153|^170[012]|^17[347]|^18[019]|19[139]", "2019-09-16 18:02:47"),
(4,"中国卫星电话", "^1349", "2019-09-16 18:02:47")
;
运营商号码段维表
表结构
-- hive
create table test.dim_mobile_phone_segment(
phone_num_segment string comment '手机号码段',
vendor_name string comment '运营商'
) TBLPROPERTIES ("comment"="手机号码段 by zhangjiqiang");
初始化数据
init date: 2019-09-16
insert into table test.dim_mobile_phone_segment values
(133,"中国电信"),
(149,"中国电信"),
(153,"中国电信"),
(173,"中国电信"),
(177,"中国电信"),
(180,"中国电信"),
(181,"中国电信"),
(189,"中国电信"),
(199,"中国电信"),
(130,"中国联通"),
(131,"中国联通"),
(132,"中国联通"),
(145,"中国联通"),
(155,"中国联通"),
(156,"中国联通"),
(166,"中国联通"),
(171,"中国联通"),
(175,"中国联通"),
(176,"中国联通"),
(185,"中国联通"),
(186,"中国联通"),
(166,"中国联通"),
(145,"中国联通"),
(1340,"中国移动"),
(1341,"中国移动"),
(1342,"中国移动"),
(1343,"中国移动"),
(1344,"中国移动"),
(1345,"中国移动"),
(1346,"中国移动"),
(1347,"中国移动"),
(1348,"中国移动"),
(135,"中国移动"),
(136,"中国移动"),
(137,"中国移动"),
(138,"中国移动"),
(139,"中国移动"),
(147,"中国移动"),
(150,"中国移动"),
(151,"中国移动"),
(152,"中国移动"),
(157,"中国移动"),
(158,"中国移动"),
(159,"中国移动"),
(172,"中国移动"),
(178,"中国移动"),
(182,"中国移动"),
(183,"中国移动"),
(184,"中国移动"),
(187,"中国移动"),
(188,"中国移动"),
(198,"中国移动"),
(147,"中国移动"),
(1700,"中国电信_虚拟运营商"),
(1701,"中国电信_虚拟运营商"),
(1702,"中国电信_虚拟运营商"),
(1703,"中国移动_虚拟运营商"),
(1705,"中国移动_虚拟运营商"),
(1706,"中国移动_虚拟运营商"),
(1704,"中国联通_虚拟运营商"),
(1707,"中国联通_虚拟运营商"),
(1708,"中国联通_虚拟运营商"),
(1709,"中国联通_虚拟运营商"),
(171,"中国联通_虚拟运营商"),
(1349,"卫星通信");