diff --git "a/03 \350\265\226\345\277\203\345\246\215/20231013 \347\252\227\345\217\243\345\207\275\346\225\260.md" "b/03 \350\265\226\345\277\203\345\246\215/20231013 \347\252\227\345\217\243\345\207\275\346\225\260.md" new file mode 100644 index 0000000000000000000000000000000000000000..22212b2fe5424aea70986dd13b369da16ab60052 --- /dev/null +++ "b/03 \350\265\226\345\277\203\345\246\215/20231013 \347\252\227\345\217\243\345\207\275\346\225\260.md" @@ -0,0 +1,256 @@ +## 窗口函数 + +mysql 8.0新增窗口函数(开窗函数)。 + +非聚合窗口函数:是相对于聚合函数来说的,特性为非聚合。一次只处理一行数据。 + +窗口聚合函数:窗口聚合函数在单元行上计算某个字段的结果时,可将窗口范围内的数据输入到聚合函数中,并且不会改变行数。 + +### 语法 + +```sql +窗口函数名称(参数) over(partition by ... order by... 窗口大小) +``` + +partition by 分组,等价于group by + +order by 排序 + +### 序号函数 + +row_number()、rank()、dense_rank() + +### 开窗聚合函数 + +```sql +sum()|avg()|min()|max()|count()| over (partition by ... order by...) +``` + +案例: + +```sql +# 获取各部门薪金总和(合计) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno) 'sum' +from emp; + +# 获取各部门薪金总和(累加) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno order by sal) 'sum' +from emp; +``` + +#### 窗口大小 + +```sql +# 开窗范围 +# 获取各部门薪金总和(范围:初始行至当前行) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno rows between unbounded preceding and current row) 'sum' +from emp; + +-- rows 启用窗口大小 +-- between ... and ... 范围区间 +-- unbounded preceding 起始行 +-- current row 当前行 +``` + +不同案例: + +```sql +# 范围:前3行 + 当前行 +sum(sal) over(partition by deptno rows between 3 preceding and current row) + +# 范围:前3行 + 当前行 + 后1行 +sum(sal) over(partition by deptno rows between 3 preceding and 1 following) +-- following指当前行之后的行 + +# 范围:当前行至最后一行 +sum(sal) over(partition by deptno rows between current row and unbounded following) +-- unbounded following 最终行 +``` + +### 分布函数 + +##### cume_dist() + +分组内 <= 或 >= 当前值的行数占比(包含当前值本身) + +```sql +# 查询各部门员工小于等于当前薪资的比例 + +select ename,deptno,sal, +cume_dist() over(partition by deptno order by sal) '占比' +from emp; + +-- order by 控制计算的列 +-- asc 小于等于 +-- desc 大于等于 +``` + +### 前后函数 + +返回当前行的前某一行的值,或者后某一行的值 + +lag(列名,前第n行) + +lead(列名,后第n行) + +```sql +select ename,deptno,sal, +lag(sal,1) over(partition by deptno order by sal) '前1', +lag(sal,2) over(partition by deptno order by sal) '前2' +from emp; +``` + +### 头尾函数 + +返回第一个或最后一个列的值 + +first_value(列名) + +last_value(列名) + +```sql +-- 查询各个部门中,第一个和最后一个入职的员工姓名 +select ename,deptno,sal,hiredate, +first_value(ename) over(partition by deptno order by hiredate) 'first', +last_value(ename) over(partition by deptno order by hiredate) 'last' +from emp; +``` + +### 其他函数 + +nth_value(列名,n) —— 返回截止到当前行,该列的第n个值 + +```sql +-- 查找出各个部门,薪资排名第2的员工姓名 +select ename,deptno,sal, +nth_value(ename) over(partition by deptno order by sal) '第二名' +from emp; + +-- 查询全公司,薪资排名第2的员工姓名 +-- 1 +select ename,deptno,sal, +nth_value(ename,2) over(order by sal) '第二名' +from emp; +-- 2 +select ename,deptno,sal, +nth_value(ename,2) over() '第二名' +from emp order by sal; +``` + +ntile(n) —— 将分区中的有序数据分为n个等级,记录等级数 + +```sql +-- 将每个部门员工按照入职日期分成3组 +select ename,deptno,sal, +ntile(3) over(partition by deptno order by hiredate) '组别' +from emp; + +-- 找出每个部门中的老员工?? +``` + +## 作业 + +```mysql + +create table if not exists `employee` +( + `eid` int not null auto_increment comment '员工id' primary key, + `ename` varchar(20) not null comment '员工名称', + `dname` varchar(50) not null comment '部门名称', + `hiredate` datetime not null comment '入职日期', + `birth` date not null comment '生日', + `salary` double null comment '基本薪资', + `start_sal` double null comment '入职薪资' +); + +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('傅嘉熙', '开发部', '2002-08-20 12:00:04','1980-12-10', 9000,6500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('武晟睿', '开发部', '2002-06-12 13:54:12', '1984-2-5',9500,6000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('孙弘文', '开发部', '2003-10-16 08:27:06','1979-8-7', 9400,8000); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('潘乐驹', '开发部', '2004-04-22 03:56:11','1980-5-12', 9500,6800); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('潘昊焱', '人事部', '2007-02-24 03:40:02','1987-2-12', 5000,4500); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('沈涛', '人事部', '2012-12-14 09:16:37','1993-4-30', 6000,5500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('江峻熙', '人事部', '2018-05-12 01:17:48','1990-6-8', 5000,3000); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('陆远航', '人事部', '2018-04-14 03:35:57','1989-11-13', 5500,5000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('姜煜祺', '销售部', '2020-03-23 03:21:05','1995-1-1', 6000,5500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('邹明', '销售部', '2015-11-23 23:10:06','1996-2-19', 6800,6000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('董擎苍', '销售部', '2012-02-12 07:54:32','1985-10-7', 6500,4800); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('钟俊驰', '销售部', '2010-04-10 12:17:06','1981-3-25', 6000,3500); + + +-- 员工按工龄,每年增加50元薪水。实发薪资 = 基本薪资 + 工龄 * 50 + +### -- 窗口函数 + +#求每个部门的员工总数 +select distinct dname,count(eid) over (partition by dname) from employee; + +#求每个部门的平均工资 +select distinct dname,avg(salary) over (partition by dname) from employee; + +#求每个部门的工资排名(从高到低,相同工资并列,并执行跳过排序) +select dname,ename,salary,rank() over (order by salary desc) from employee; + +#求公司所有员工的年龄排序(相同年龄并列,执行跳过排序) +select ename,birth,datediff(now(),birth) div 365 ,rank() over (order by datediff(now(),birth) div 365 desc) from employee; + +#求每个部门的员工工龄排序(相同年龄并列,执行顺序排序) +select ename,hiredate,datediff(now(),hiredate) div 365 ,rank() over (order by datediff(now(),hiredate) div 365 desc) from employee; + +#计算每个员工的工资与该部门平均工资的差额 +select dname,ename,(salary-avg(salary) over (partition by dname)) from employee; + +#按员工工资进行排序,比较相邻两个员工的工资,输出比较高的工资 +select ename,salary,max(salary) over (rows between 1 preceding and 1 following) from employee; + +#按员工工资进行排序,查询当前员工与前一位和后一位的工资平均值 +select ename,salary,avg(salary) over (rows between 1 preceding and 1 following) from employee; + +#按员工工资进行排序,查询当前员工至最后一位员工的工资总和 +select ename,sum(salary) over (rows between current row and unbounded following) from employee; + +#计算每个部门内最高薪资与平均薪资的差额 +select distinct dname,max(salary) over (partition by dname)-avg(salary) over (partition by dname) from employee; + +#找出各部门年薪第二高的员工 +select * from (select dname,eid,ename,salary,dense_rank() over (partition by dname order by salary desc) 排序 from employee) a where 排序=2; + +#查询各部门中小于等于当前员工实际薪资的比例 +select dname,ename,salary,cume_dist() over (partition by dname order by salary asc) from employee; + +#查询每个员工工资在全部员工中的排名比例 +select ename,salary,cume_dist() over (order by salary) from employee; from employee; + +#查询每个部门工资排名在前25%的员工记录数 +select * from (select *,ntile(4) over (partition by dname order by salary desc) a from employee) b where a=1; + +#每个部门按年龄进行排序,求当前员工与前一位员工的年龄差 +select dname,ename,birth,datediff(birth,lag(birth) over (partition by dname order by birth)) from employee; +#按入职日期进行排序,查询公司每个员工与后面一个员工的入职天数差异 + +#将每个部门的员工按工资平均分为2个组,组1为低工资,组2为高工资 +select dname,ename,salary,ntile(2) over (partition by dname order by salary) from employee; + +#将所有员工按照工龄分为4个组,并统计每个组的人数 +select b.a,count(b.a) from (select dname,ename,ntile(4) over (order by birth) a from employee) b group by b.a; + +#将员工按照工资分为3个组,并统计组别,每组平均工资,工资范围(first_value、last_value) +select num,avg(salary),min(salary),max(salary) from (select *,ntile(3) over (order by salary) num from employee) b group by num; + +### -- 非窗口函数 + +#按照工龄区分等级,小于5年为新员工,5-15年为老员工,大于15年为骨灰级员工,输出姓名,部门,工龄,级别 +select ename,dname,hiredate,floor(datediff(curdate(),hiredate)/365),case when floor(datediff(curdate(),hiredate)/365)<5 then '新员工' when floor(datediff(curdate(),hiredate)/365)>15 then '老员工' else '骨灰级员工' end '员工判定' from employee; + +#返回员工的实际年龄,如果小于当前日期则减1岁 +select ename,dname,birth,floor(datediff(curdate(),birth)/365)-case when date_format(curdate(),'%m%d')