内容纲要
概要描述
oracle中可以通过connect by level伪列来实现,参考如下sql语句:
create table test (begin_date date,end_date date);
insert into test values(trunc(sysdate),trunc(sysdate+5));
commit;
select begin_date, end_date, begin_date + level - 1 as today
from test
connect by begin_date + level - 1 <= end_date;
而我们的inceptor不支持connect by level方式,可以参考下面的方法返回两个日期之间的日期和小时清单。
详细说明
1. 构建样例表
DROP TABLE IF EXISTS dtbetween;
CREATE TABLE dtbetween(id INT,start_date STRING ,end_date STRING ) STORED AS ORC;
--正常数据
INSERT INTO dtbetween SELECT '1', '2020-01-05','2020-01-09' FROM system.dual;
--异常数据
INSERT INTO dtbetween SELECT '2', '2020-02-09','2020-02-05' FROM system.dual;
2. 计算中间日期
SET character.literal.as.string=true;
SELECT
tmp.*,
date_add(tmp.start_date, pos) as mid_date,
t.pos
FROM dtbetween tmp lateral view posexplode(
split(space(datediff(end_date, start_date)+1), '')
) t AS pos,val
WHERE tmp.start_date<=tmp.end_date;
原理来说,就是通过datediff
函数计算两个日期之间相差的天数N,然后通过split
,space
函数构造一个N个字段的array
给lateral view posexplode
函数做转列,然后再通过date_add
函数将start_date和这个pos差相加,获取中间日期。
3. 拓展-小时粒度
DROP TABLE IF EXISTS dtbetween_hour;
CREATE TABLE dtbetween_hour(id INT,start_date STRING ,end_date STRING ) STORED AS ORC;
--正常数据
INSERT INTO dtbetween_hour select '1' ,'2020-01-01 20:00:00', '2020-01-02 03:00:00' FROM system.dual;
--异常数据
INSERT INTO dtbetween_hour select '2' , '2020-02-02 3:00:00','2020-02-01 20:00:00' FROM system.dual;
SET character.literal.as.string=TRUE;
SELECT
tmp.*,
from_unixtime(unix_timestamp(start_date) + pos * 3600) as mid_hour,
t.pos
FROM dtbetween_hour tmp lateral view posexplode(
split(space(
hour(end_date) - hour(start_date) + datediff(end_date, start_date)*24 + 1
), '')
) t AS pos,val
WHERE tmp.start_date<=tmp.end_date;
3. 拓展-月份粒度
SET character.literal.as.string=TRUE;
WITH tmp AS
(
SELECT 1 AS id ,date('20220615') AS start_date, date('20220905') AS end_date FROM system.dual UNION ALL
SELECT 2 AS id ,date('20220615') AS start_date, date('20220620') AS end_date FROM system.dual
)
SELECT
tmp.*,
add_months(trunc(end_date,'MM'), -pos) as mid_date,
t.pos
FROM tmp lateral view posexplode(
split(space(EXTRACT(MONTH FROM end_date)-EXTRACT(MONTH FROM start_date)+(EXTRACT(YEAR FROM end_date)-EXTRACT(YEAR FROM start_date))*12 +1), '')
) t AS pos,val
WHERE tmp.start_date<=tmp.end_date;
4. 拓展-统计两个日期之间的工作日
SET character.literal.as.string=true;
WITH tb AS
(
SELECT date('20221025') AS START_DATE,date('20221029') AS END_DATE FROM SYSTEM.DUAL UNION ALL
SELECT date('20221024') AS START_DATE,date('20221029') AS END_DATE FROM SYSTEM.DUAL UNION ALL
SELECT date('20221024') AS START_DATE,date('20221031') AS END_DATE FROM SYSTEM.DUAL
)
SELECT start_date,end_date,sum(ifweekend) FROM (
select
start_date,
end_date,
date_add(start_date,i) as day_,
case when dayofweek(date_add(start_date,i)) in (1,7) then 0 else 1 END AS ifweekend,
i
from tb tmp lateral view posexplode(split(space(datediff(end_date,start_date)+1),'')) pe as i,x )
GROUP BY start_date,end_date;