本文通过简单易懂的业务场景,旨在提高大家的 SQL 水平。把文中所有的 SQL 全理解了,工作中遇到的各种 SQL 场景和面试中遇到的各种 SQL 问题都能灵活应对。文中所有的 SQL 都支持 Hive 语法,学会了 Hive 的 SQL,那么 MySQL 的 SQL 题也都可以 cover 住,下面的题目难度从简单到困难的都有。笔者在之前练习 SQL 过程中,发现了网上一些比较好的 SQL 题目,但是苦于好多博客提供的 SQL 可读性、规范性、执行效率并不高,因此才有了今天这篇博文,后续如果发现有可优化的的 SQL,笔者会持续更新本博文。
# 数据表介绍
# 学生表
create table study.student | |
( | |
student_id string ,-- 学生编号 | |
name string ,-- 学生姓名 | |
birthday string ,-- 学生生日 | |
sex string -- 学生性别 | |
) | |
row format delimited fields terminated by '\t'; |
# 教师表
create table if not exists study.teacher | |
( | |
teacher_id string ,-- 教师编号 | |
name string -- 教师姓名 | |
) | |
row format delimited fields terminated by '\t'; |
# 课程表
create table if not exists study.course | |
( | |
course_id string ,-- 课程编号 | |
name string ,-- 课程名 | |
teacher_id string -- 课程对应的教师编号 | |
) | |
row format delimited fields terminated by '\t'; |
# 成绩表
create table study.score | |
( | |
student_id string ,-- 学生编号 | |
course_id string ,-- 课程编号 | |
score int -- 对应的成绩 | |
) | |
row format delimited fields terminated by '\t'; |
# 答题时请注意:
- 这里认为课程可能是选修课,学生不一定学了所有课程
- 成绩表里的课程都应该在课程表里存在,而且每门课程都应该有对应的教师,且在教师表里可以找到
- 成绩表里的学生也应该在学生表里存在
- 下面的导入数据也是随机写的,读者也可以自己随机制造生成数据导入
# 导入数据
# 学生表
insert overwrite table study.student VALUES | |
('01' , '赵雷' , '1990-01-01' , '男'), | |
('02' , '钱电' , '1990-12-21' , '男'), | |
('03' , '孙风' , '1990-12-20' , '男'), | |
('04' , '李云' , '1990-12-06' , '男'), | |
('05' , '周梅' , '1991-12-01' , '女'), | |
('06' , '吴兰' , '1992-01-01' , '女'), | |
('07' , '郑竹' , '1989-01-01' , '女'), | |
('09' , '张三' , '2017-12-20' , '女'), | |
('10' , '李四' , '2017-12-25' , '女'), | |
('11' , '李四' , '2012-06-06' , '女'), | |
('12' , '赵六' , '2013-06-13' , '女'), | |
('13' , '孙七' , '2014-06-01' , '女'); |
# 教师表
insert overwrite table study.teacher VALUES | |
('01' , '张三'), | |
('02' , '李四'), | |
('03' , '王五'); |
# 课程表
insert overwrite table study.course VALUES | |
('01' , '语文' , '02'), | |
('02' , '数学' , '01'), | |
('03' , '英语' , '03'); |
# 成绩表
insert overwrite table study.score VALUES | |
('01' , '01' , 80), | |
('01' , '02' , 90), | |
('01' , '03' , 99), | |
('02' , '01' , 70), | |
('02' , '02' , 60), | |
('02' , '03' , 80), | |
('03' , '01' , 80), | |
('03' , '02' , 80), | |
('03' , '03' , 80), | |
('04' , '01' , 50), | |
('04' , '02' , 30), | |
('04' , '03' , 20), | |
('05' , '01' , 76), | |
('05' , '02' , 87), | |
('06' , '01' , 31), | |
('06' , '03' , 34), | |
('07' , '02' , 89), | |
('07' , '03' , 98); |
# 练习题
- 查询所有教师的全部信息(教师编号和姓名)
- 输出所有学生中男生的全部信息
- 查询所有学生中男生的全部信息,按照生日排降序
- 查询所有学生的全部信息,先按照性别排序,再按照生日排降序
- 求出学生总数
- 查询学生中男生、女生人数
- 参加考试的学生中,查出每个学生的学生编号、选了几门课
- 检索至少选修三门课程的学生学号
- 查询存在不及格的课程编号
- 输出所有课程的课程编号、课程名、对应的教师姓名
- 求每门课程的学生人数及平均分,输出课程编号、对应学生人数、平均分
- 求每门课程的学生人数及平均分,输出课程名称、对应学生人数、平均分
- 查询同时学习 "01" 课程和 "02" 课程的学生编号及 01 和 02 课程分数
- 查询 "01" 课程比 "02" 课程成绩高的学生编号及 01 和 02 课程分数
- 查询 "01" 课程比 "02" 课程成绩高的学生姓名及 01 和 02 课程分数
- 查询选择了 "01" 课程但没选择 "02" 课程的学生姓名
- 查询学过 ' 张三 ' 老师课程的所有同学姓名、生日、性别
- 查询同时学习 "01"、"02" 课程学生的学生编号以及 "01" 和 "02" 课程成绩
- 查询学习 "01" 课程但没有学习 "02" 课程学生的学生编号以及 "01" 课程成绩
- 查询学习 "02" 课程但没有学习 "01" 课程学生的学生编号以及 "02" 课程成绩
- 查询选课的同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分
- 查询考试平均分大于 60 分同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分,按照科目数排降序、科目数相同按照分数排降序
- 检索 "01" 课程分数小于 60 分的学生信息及 "01" 课程分数,按照分数排降序
- 查询两门及其以上不及格课程的同学的姓名及其平均成绩
- 查询没有学全所有课程的同学的编号 (包含无选课的同学)
- 查询 1990 年出生的学生名单
- 查询名字中含有「风」字的学生信息
- 查询「李」姓老师的数量
- 查询至少有两门课与学号为 "01" 的学生所学相同的学生 id
- 查询选修了全部课程的学生 id 的姓名和姓名
- 查询和 "01" 号的同学学习的课程完全相同的其他同学的学生 id
- 统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[60-0] 及所占百分比
- 查询学生的总成绩,并进行排名
- 查询各科成绩前三名的记录
- 查询出只选修两门课程的学生学号和姓名
- 查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列
- 查询平均成绩大于等于 85 的所有学生的学号、姓名和平均成绩
- 查询课程名称为「数学」,且分数低于 60 的学生姓名和分数
- 查询课程编号为 01 且课程成绩在 80 分及以上的学生的学号和姓名
- 查询选修「张三」老师所授课程的学生中,成绩最高的学生编号、课程编号及其成绩
- 统计每门课程的学生选修人数(超过 5 人的课程才统计)。
- 查询该学生有不同课程但成绩相同的学生编号、课程编号、学生成绩
# 答案(请先独立思考后,再参考答案)
- 查询所有教师的全部信息(教师编号和姓名)
select * | |
from teacher; |
- 输出所有学生中男生的全部信息
select * | |
from student | |
where sex = '男'; |
- 查询所有学生中男生的全部信息,按照生日排降序
select * | |
from student | |
where sex='男' | |
order by birthday desc; |
- 查询所有学生的全部信息,先按照性别排序,再按照生日排降序
select * | |
from student | |
order by sex,birthday desc ; |
- 求出学生总数
select count(*) | |
from student; |
- 查询学生中男生、女生人数
select sex, count(*) | |
from student | |
group by sex; |
- 参加考试的学生中,查出每个学生的学生编号、选了几门课
select student_id, count(course_id) | |
from score | |
group by student_id; |
- 检索至少选修三门课程的学生学号
select student_id, count(course_id) as course_ct | |
from score | |
group by student_id | |
having course_ct >= 3; |
- 查询存在不及格的课程编号
1. 写法一 | |
select distinct course_id, score | |
from score | |
where score < 60; | |
2. 写法二 | |
select course_id | |
from score | |
where score<60 | |
group by course_id; |
- 输出所有课程的课程编号、课程名、对应的教师姓名
1. 写法一 | |
select c.course_id,c.name,t.name | |
from | |
course c join teacher t on c.teacher_id = t.teacher_id; | |
2. 写法二 | |
select course_id, a.name as course_name, b.name as teacher_name | |
from (select course_id, name, teacher_id from study.course) a join ( | |
select teacher_id, name | |
from study.teacher ) b | |
on a.teacher_id = b.teacher_id; |
- 求每门课程的学生人数及平均分,输出课程编号、对应学生人数、平均分
select course_id,count(student_id),avg(score) | |
from score | |
group by course_id; |
- 求每门课程的学生人数及平均分,输出课程名称、对应学生人数、平均分
select c.name, count(student_id), avg(score) | |
from course c left join score s on s.course_id = c.course_id | |
group by c.name; |
- 查询同时学习 "01" 课程和 "02" 课程的学生编号及 01 和 02 课程分数
1. 写法一 | |
select s1.student_id,s1.score,s2.score | |
from score s1 | |
left join score s2 on s1.student_id = s2.student_id | |
where s1.course_id = '01' and s2.course_id= '02'; | |
2. 写法二 | |
select a.student_id, a.score as score_01, b.score as score_02 | |
from (select * from score where course_id = '01') a | |
join ( | |
select * | |
from score | |
where course_id = '02') b | |
on a.student_id = b.student_id; |
- 查询 "01" 课程比 "02" 课程成绩高的学生编号及 01 和 02 课程分数
1. 写法一 | |
select s1.student_id, s1.score, s2.score | |
from score s1 | |
left join score s2 on s1.student_id = s2.student_id | |
where s1.course_id = '01' | |
and s2.course_id = '02' | |
and s1.score > s2.score; | |
2. 写法二 | |
select a.student_id, a.score as score_01, b.score as score_02 | |
from (select * from score where course_id = '01') a | |
join ( | |
select * | |
from score | |
where course_id = '02') b | |
on a.student_id = b.student_id | |
where a.score > b.score; |
- 查询 "01" 课程比 "02" 课程成绩高的学生姓名及 01 和 02 课程分数
select s.name, a.score as score_01, b.score as score_02 | |
from (select * from score where course_id = '01') a | |
join ( | |
select * | |
from score | |
where course_id = '02') b | |
join student s | |
on a.student_id = b.student_id and a.student_id = s.student_id | |
where a.score > b.score; |
- 连续三道题都是有关联的,前两道题给这道题做铺垫,所以当我们拿到一个向这种比较复杂的需求时,可以进行拆分需求,先拆分成 1、在做 2,最后达到完整的需求 ------ 查询选择了 "01" 课程但没选择 "02" 课程的学生姓名
1. 找出选择了 "01"课程但没选择 "02"课程的学生编号 | |
select a.student_id | |
from (select * from score where course_id = '01') a | |
left outer join (select * from score where course_id = '02') b | |
on a.student_id = b.student_id | |
where b.student_id is null | |
2. 拿着学生编号关联学生表找到姓名 | |
select name | |
from (select a.student_id as student_id | |
from (select * from score where course_id = '01') a | |
left join (select * from score where course_id = '02') b on a.student_id = b.student_id | |
where b.student_id is null) a | |
join ( | |
select * | |
from student) b | |
on a.student_id = b.student_id |
- 查询学过 ' 张三 ' 老师课程的所有同学姓名、生日、性别
- 找出 ' 张三 ' 老师的教师编号
- 找出 ' 张三 ' 老师所有教授的课程编号
- 找出这些课程对应的学生编号
- 根据学生编号找到对应的学生信息
select * | |
from student s | |
join ( | |
select student_id | |
from score s | |
join ( | |
select c.course_id | |
from ( | |
select teacher_id, name | |
from teacher | |
where name = '张三' | |
) t1 | |
left join ( | |
select teacher_id, course_id | |
from course | |
) c on t1.teacher_id = c.teacher_id | |
) t2 on s.course_id = t2.course_id | |
) t3 on s.student_id = t3.student_id; |
- 查询同时学习 "01"、"02" 课程学生的学生编号以及 "01" 和 "02" 课程成绩
select a.student_id as student_id, a.score as score_01, b.score as score_02 | |
from (select * from score where course_id = '01') a | |
join ( | |
select * | |
from score | |
where course_id = '02') b | |
on a.student_id = b.student_id; |
- 查询学习 "01" 课程但没有学习 "02" 课程学生的学生编号以及 "01" 课程成绩
select a.student_id as student_id, a.score as score_01 | |
from (select * from score where course_id = '01') a | |
left join (select * from score where course_id = '02') b | |
on a.student_id = b.student_id | |
where b.student_id is null; |
- 查询学习 "02" 课程但没有学习 "01" 课程学生的学生编号以及 "02" 课程成绩
select b.student_id as student_id, b.score as score_02 | |
from (select * from score where course_id = '01') a | |
right join (select * from score where course_id = '02') b | |
on a.student_id = b.student_id | |
where a.student_id is null; |
- 查询选课的同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分
select a.name as name ,course_num ,score_sum ,score_avg from ( select student_id ,name from student ) ajoin ( select student_id ,count(course_id) as course_num ,sum(score) as score_sum ,avg(score) as score_avg from score group by student_id ) b on a.student_id=b.student_idorder by course_num desc ,score_sum desc |
- 查询考试平均分大于 60 分同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分,按照科目数排降序、科目数相同按照分数排降序
select a.name as name ,course_num ,score_sum ,score_avg from ( select student_id ,name from student ) ajoin ( select student_id ,count(course_id) as course_num ,sum(score) as score_sum ,avg(score) as score_avg from score group by student_id ) b on a.student_id=b.student_idwhere score_avg > 60order by course_num desc ,score_sum desc |
- 检索 "01" 课程分数小于 60 分的学生信息及 "01" 课程分数,按照分数排降序
select a.student_id as student_id ,name ,birthday ,sex ,score from ( select * from student ) ajoin ( select student_id ,score from score where course_id='01' and score < 60 ) b on a.student_id = b.student_idorder by score desc |
- 查询两门及其以上不及格课程的同学的姓名及其平均成绩
select name ,score_avg from ( select * from student ) ajoin ( select student_id ,score_avg from ( select student_id ,avg(score) as score_avg ,count(case when score < 60 then 1 end) as fail_count from score group by student_id ) a where fail_count > 1 ) b on a.student_id = b.student_id; |
查询没有学全所有课程的同学的编号 (包含无选课的同学)
- 在成绩表中找出不满足课程总数的学生 id - 找出学生表中无成绩的学生 id - 二者之和
select b.student_id ( select count(*) as course_num from score ) ajoin ( select student_id ,count(*) as course_num from score ) b on a.course_num = b.course_numunion ( select a.student_id from ( select * from student ) a left join ( select * from score ) b on a.student_id = b.student_id where course_id is null ) b |
- 查询 1990 年出生的学生名单
select * from studentwhere substr(birthday,1,4)='1990' |
- 查询名字中含有「风」字的学生信息
select * from studentwhere name like '%风%' |
- 查询「李」姓老师的数量
select count(*) from teacherwhere name like '李%' |
- 查询至少有两门课与学号为 "01" 的学生所学相同的学生 id
select a.student_id | |
from (select b.student_id as student_id, b.course_id as course_id | |
from (select course_id from score where student_id = '01') a | |
join (select student_id, course_id from score where student_id <> '01') b | |
on a.course_id = b.course_id) a | |
group by a.student_id | |
having count(course_id) > 1; |
- 查询选修了全部课程的学生 id 的姓名和姓名
select b.student_id as student_id ,b.name as name ,a.course_num as course_numfrom ( select student_id ,a.course_num as course_num from ( select student_id ,count(course_id) as course_num from course ) a join ( select student_id ,count(*) as course_num from score group by student_id ) b on a.course_num = b.course_num ) ajoin ( select * from student ) bon a.student_id = b.student_id |
- 查询和 "01" 号的同学学习的课程完全相同的其他同学的学生 id
select student_id | |
from ( | |
select student_id, ct, ct1 | |
from ( | |
select student_id, count(*) ct | |
from score as a | |
left join | |
(select course_id | |
from score | |
where student_id = '01') as b | |
on a.course_id = b.course_id | |
group by student_id | |
) t1 | |
join( | |
select count(*) ct1 | |
from score | |
where student_id = '01' | |
) t2 | |
) t3 | |
where ct = ct1 | |
and student_id != '01' |
- 先选出‘01’同学所学的课程 id
- 然后选出学过这些课程 id 的学生 id 及其选过这些课程数的课程总数
- 匹配 “01” 同学学的课程总数(存在某同学学的课程超过 “01” 同学学的课程)
- 匹配成绩表中学生的课程总数
统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[60-0] 及所占百分比
- 根据课程号在成绩表中查询分数段的个数除以该学习该课程的总数
select a.course_id as course_id ,b.name as name ,round(sum(case when score > 85 then 1 else 0 end)/count(*),2) as '100-85' ,round(sum(case when score between 70 and 84 then 1 else 0 end)/count(*),2) as '85-70' ,round(sum(case when score between 60 and 69 then 1 else 0 end)/count(*),2) as '70-60' ,round(sum(case when score < 60 then 1 else 0 end)/count(*),2) as '60-0' from ( select course_id ,score from score ) a join ( select course_id ,name from course ) b on a.course_id = b.course_id group by a.course_id ,b.name |
- 查询学生的总成绩,并进行排名
select student_id ,sum(score) score_sum ,row_number() over(order by sum(score) desc) as rank from scoregroup by student_id |
- 查询各科成绩前三名的记录
select course_id,student_id,rk | |
from( | |
select *, | |
rank() over (partition by course_id order by score) rk | |
from score | |
)t1 | |
where rk<=3; |
- 查询出只选修两门课程的学生学号和姓名
select a.student_id as student_id ,a.name as name from ( select student_id ,name from student ) ajoin ( select student_id from score group by student_id having count(course_id) = 2 ) b on a.student_id = b.student_id; |
- 查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列
select course_id ,avg(score) as score_avg from scoreorder by score_avg desc ,course_id asc |
- 查询平均成绩大于等于 85 的所有学生的学号、姓名和平均成绩
select a.student_id as student_id ,a.name as name ,b.score_avg from ( select student_id ,name from student ) ajoin ( select student_id ,avg(score) as score_avg from score group by student_id having avg(score) >= 85 ) b on a.student_id = b.student_id |
- 查询课程名称为「数学」,且分数低于 60 的学生姓名和分数
select name ,score from ( select student_id ,score from ( select course_id from course where name = '数学' ) a join ( select student_id ,course_id ,score from score where score < 60 ) b on a.course_id = b.course_id ) ajoin ( select student_id ,name from student ) b on a.student_id = b.student_id |
- 查询课程编号为 01 且课程成绩在 80 分及以上的学生的学号和姓名
select a.student_id as student_id ,name from ( select student_id from score where course_id = '01' and score >= 80 ) ajoin ( select student_id ,name from student ) b on a.student_id = b.student_id |
- 查询选修「张三」老师所授课程的学生中,成绩最高的学生编号、课程编号及其成绩
select student_id ,a.course_id as course_id ,score from ( select course_id from ( select course_id ,teacher_id from course ) a join ( select teacher_id from teacher where name = '张三' ) b on a.teacher_id = b.teacher_id ) ajoin ( select student_id ,course_id ,score from score ) b on a.course_id = b.course_idorder by score desclimit 1; |
- 统计每门课程的学生选修人数(超过 5 人的课程才统计)。
select a.course_id as course_id ,b.name ,a.num as num from ( select course_id ,count(*) as num from score group by course_id having count(*) >= 5 ) ajoin ( select course_id ,name from course ) b on a.course_id =b.course_id |
- 查询该学生有不同课程但成绩相同的学生编号、课程编号、学生成绩
1. 写法一 | |
select a.student_id, a.course_id, a.score | |
from ( | |
select student_id | |
, course_id | |
, score | |
from score | |
) a | |
join | |
( | |
select student_id | |
, course_id | |
, score | |
from score | |
) b | |
on a.student_id = b.student_id | |
and a.score = b.score | |
and a.course_id <> b.course_id | |
group by a.student_id, a.course_id, a.score | |
2. 写法二 | |
select DISTINCT b.student_id, b.course_id, b.score | |
from score a, | |
score b | |
where a.course_id != b.course_id | |
and a.score = b.score | |
and a.student_id = b.student_id; | |
3. 写法三 | |
select * | |
from score as t | |
where not exists (select 1 | |
from score | |
where student_id = t.student_id | |
and course_id <> t.course_id | |
and score <> t.score) |