# 1. 计算平台同时在线的主播人数最大值为多少,以及出现高峰期的时间段
# 主播表
create table zhubo | |
( | |
id int, | |
stt string, | |
edt string | |
) | |
row format delimited fields terminated by '\t'; |
# 数据
insert overwrite table zhubo VALUES | |
(1001,'2021-09-04 12:12:12','2021-09-04 20:12:12'), | |
(1002,'2021-09-04 12:13:12','2021-09-04 19:12:12'), | |
(1003,'2021-09-04 13:12:12','2021-09-04 19:50:12'), | |
(1004,'2021-09-04 13:15:12','2021-09-04 18:12:12'), | |
(1005,'2021-09-04 14:12:12','2021-09-04 19:12:15'), | |
(1006,'2021-09-04 14:16:12','2021-09-04 20:12:23'), | |
(1003,'2021-09-04 20:12:12','2021-09-04 23:12:12'), | |
(1001,'2021-09-04 21:12:12','2021-09-04 22:12:12'); |
- 把时间段转成时间点,模拟流式数据,标记位上线 + 1,下线 - 1
select id, stt dt, 1 flag | |
from zhubo | |
union | |
select id, edt dt, -1 flag | |
from zhubo; t1 |
- 统计每个时间点的在线人数
select id, | |
dt, | |
sum(flag) over (order by dt) people | |
from (select id, stt dt, 1 flag | |
from zhubo | |
union | |
select id, edt dt, -1 flag | |
from zhubo) t1; t2 |
- 按照 时间流将下一个时间点移至当前行
select dt, | |
people, | |
lead(dt, 1) over (order by dt) | |
from (select id, dt, sum(flag) over (order by dt) people | |
from (select id, stt dt, 1 flag | |
from zhubo | |
union | |
select id, edt dt, -1 flag | |
from zhubo) t1) t2; |
- 求最大在线人数
select max(people) max_p | |
from (select id, dt, sum(flag) over (order by dt) people | |
from (select id, stt dt, 1 flag | |
from zhubo | |
union | |
select id, edt dt, -1 flag | |
from zhubo) t1) t2; |
- 最后将 3、4 做 join
同时在线人数的表多次被使用 | |
with t as ( | |
select id, | |
dt, | |
sum(flag) over (order by dt) people | |
from (select id, stt dt, 1 flag | |
from zhubo | |
union | |
select id, edt dt, -1 flag | |
from zhubo) t1 | |
) | |
select dt, people, dtt | |
from ( | |
select dt, people, lead(dt, 1) over (order by dt) dtt | |
from t | |
) t1 | |
join ( | |
select max(people) max_p | |
from t | |
) t2 on t1.people = t2.max_p |