# 1. 计算平台同时在线的主播人数最大值为多少,以及出现高峰期的时间段

# 主播表

create table zhubo
(
    id  int,
    stt string,
    edt string
)
    row format delimited fields terminated by '\t';

# 数据

insert overwrite table zhubo VALUES
(1001,'2021-09-04 12:12:12','2021-09-04 20:12:12'),
(1002,'2021-09-04 12:13:12','2021-09-04 19:12:12'),
(1003,'2021-09-04 13:12:12','2021-09-04 19:50:12'),
(1004,'2021-09-04 13:15:12','2021-09-04 18:12:12'),
(1005,'2021-09-04 14:12:12','2021-09-04 19:12:15'),
(1006,'2021-09-04 14:16:12','2021-09-04 20:12:23'),
(1003,'2021-09-04 20:12:12','2021-09-04 23:12:12'),
(1001,'2021-09-04 21:12:12','2021-09-04 22:12:12');
  1. 把时间段转成时间点,模拟流式数据,标记位上线 + 1,下线 - 1
select id, stt dt, 1 flag
from zhubo
union
select id, edt dt, -1 flag
from zhubo; t1
  1. 统计每个时间点的在线人数
select id,
       dt,
       sum(flag) over (order by dt) people
from (select id, stt dt, 1 flag
      from zhubo
      union
      select id, edt dt, -1 flag
      from zhubo) t1; t2
  1. 按照 时间流将下一个时间点移至当前行
select dt,
       people,
       lead(dt, 1) over (order by dt)
from (select id, dt, sum(flag) over (order by dt) people
      from (select id, stt dt, 1 flag
            from zhubo
            union
            select id, edt dt, -1 flag
            from zhubo) t1) t2;
  1. 求最大在线人数
select max(people) max_p
from (select id, dt, sum(flag) over (order by dt) people
      from (select id, stt dt, 1 flag
            from zhubo
            union
            select id, edt dt, -1 flag
            from zhubo) t1) t2;
  1. 最后将 3、4 做 join
同时在线人数的表多次被使用
with t as (
    select id,
           dt,
           sum(flag) over (order by dt) people
    from (select id, stt dt, 1 flag
          from zhubo
          union
          select id, edt dt, -1 flag
          from zhubo) t1
)
select dt, people, dtt
from (
         select dt, people, lead(dt, 1) over (order by dt) dtt
         from t
     ) t1
         join (
    select max(people) max_p
    from t
) t2 on t1.people = t2.max_p