diff --git "a/58 \351\231\210\350\203\234\346\235\260/2023-10-13.md" "b/58 \351\231\210\350\203\234\346\235\260/2023-10-13.md" new file mode 100644 index 0000000000000000000000000000000000000000..d60341db138a78844a788ea52721d82f7e54c2a5 --- /dev/null +++ "b/58 \351\231\210\350\203\234\346\235\260/2023-10-13.md" @@ -0,0 +1,311 @@ +# 窗口函数 + +mysql 8.0新增窗口函数(开窗函数)。 + +非聚合窗口函数:是相对于聚合函数来说的,特性为非聚合。一次只处理一行数据。 + +窗口聚合函数:窗口聚合函数在单元行上计算某个字段的结果时,可将窗口范围内的数据输入到聚合函数中,并且不会改变行数。 + + + +### 语法 + +```sql +窗口函数名称(参数) over(partition by ... order by... 窗口大小) +``` + +partition by 分组,等价于group by + +order by 排序 + + + +### 序号函数 + +row_number() :顺序排序,显示每条数据的行号,如:1 2 3 + +rank() :值相同的话,序号也会相同,会跳过,如:1 1 3 + +dense_rank() :值相同的话,序号也会相同,不会跳过,如:1 1 2 + + + +### 开窗聚合函数 + +```sql +sum()|avg()|min()|max()|count()| over (partition by ... order by...) +``` + +案例: + +```sql +# 获取各部门薪金总和(合计) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno) 'sum' +from emp; + +# 获取各部门薪金总和(累加) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno order by sal) 'sum' +from emp; +``` + + + +#### 窗口大小 + +```sql +# 开窗范围 +# 获取各部门薪金总和(范围:初始行至当前行) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno rows between unbounded preceding and current row) 'sum' +from emp; + +-- rows 启用窗口大小 +-- between ... and ... 范围区间 +-- unbounded preceding 起始行 +-- current row 当前行 +``` + +不同案例: + +```sql +# 范围:前3行 + 当前行 +sum(sal) over(partition by deptno rows between 3 preceding and current row) + +# 范围:前3行 + 当前行 + 后1行 +sum(sal) over(partition by deptno rows between 3 preceding and 1 following) +-- following指当前行之后的行 + +# 范围:当前行至最后一行 +sum(sal) over(partition by deptno rows between current row and unbounded following) +-- unbounded following 最终行 +``` + + + +### 分布函数 + +##### cume_dist() + +分组内 <= 或 >= 当前值的行数占比(包含当前值本身) + +```sql +# 查询各部门员工小于等于当前薪资的比例 + +select ename,deptno,sal, +cume_dist() over(partition by deptno order by sal) '占比' +from emp; + +-- order by 控制计算的列 +-- asc 小于等于 +-- desc 大于等于 +``` + + + +### 前后函数 + +返回当前行的前某一行的值,或者后某一行的值 + +lag(列名,前第n行) + +lead(列名,后第n行) + +```sql +select ename,deptno,sal, +lag(sal,1) over(partition by deptno order by sal) '前1', +lag(sal,2) over(partition by deptno order by sal) '前2' +from emp; +``` + + + +### 头尾函数 + +返回第一个或最后一个列的值 + +first_value(列名) + +last_value(列名) + +```sql +-- 查询各个部门中,第一个和最后一个入职的员工姓名 +select ename,deptno,sal,hiredate, +first_value(ename) over(partition by deptno order by hiredate) 'first', +last_value(ename) over(partition by deptno order by hiredate) 'last' +from emp; +``` + + + +### 其他函数 + +nth_value(列名,n) —— 返回截止到当前行,该列的第n个值 + +```sql +-- 查找出各个部门,薪资排名第2的员工姓名 +select ename,deptno,sal, +nth_value(ename) over(partition by deptno order by sal) '第二名' +from emp; + +-- 查询全公司,薪资排名第2的员工姓名 +-- 1 +select ename,deptno,sal, +nth_value(ename,2) over(order by sal) '第二名' +from emp; +-- 2 +select ename,deptno,sal, +nth_value(ename,2) over() '第二名' +from emp order by sal; +``` + +ntile(n) —— 将分区中的有序数据分为n个等级,记录等级数 + +```sql +-- 将每个部门员工按照入职日期分成3组 +select ename,deptno,sal, +ntile(3) over(partition by deptno order by hiredate) '组别' +from emp; + +-- 找出每个部门中的老员工?? +``` + +#### 作业 + +~~~~mysql + +create table if not exists `employee` +( + `eid` int not null auto_increment comment '员工id' primary key, + `ename` varchar(20) not null comment '员工名称', + `dname` varchar(50) not null comment '部门名称', + `hiredate` datetime not null comment '入职日期', + `birth` date not null comment '生日', + `salary` double null comment '基本薪资', + `start_sal` double null comment '入职薪资' +); + +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('傅嘉熙', '开发部', '2002-08-20 12:00:04','1980-12-10', 9000,6500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('武晟睿', '开发部', '2002-06-12 13:54:12', '1984-2-5',9500,6000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('孙弘文', '开发部', '2003-10-16 08:27:06','1979-8-7', 9400,8000); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('潘乐驹', '开发部', '2004-04-22 03:56:11','1980-5-12', 9500,6800); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('潘昊焱', '人事部', '2007-02-24 03:40:02','1987-2-12', 5000,4500); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('沈涛', '人事部', '2012-12-14 09:16:37','1993-4-30', 6000,5500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('江峻熙', '人事部', '2018-05-12 01:17:48','1990-6-8', 5000,3000); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('陆远航', '人事部', '2018-04-14 03:35:57','1989-11-13', 5500,5000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('姜煜祺', '销售部', '2020-03-23 03:21:05','1995-1-1', 6000,5500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('邹明', '销售部', '2015-11-23 23:10:06','1996-2-19', 6800,6000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('董擎苍', '销售部', '2012-02-12 07:54:32','1985-10-7', 6500,4800); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('钟俊驰', '销售部', '2010-04-10 12:17:06','1981-3-25', 6000,3500); + + +# 员工按工龄,每年增加50元薪水。实发薪资 = 基本薪资 + 工龄 * 50 + + + + +### -- 窗口函数 + + + +#求每个部门的员工总数 +select distinct dname,count(ename) over (partition by dname) 总人数 from employee; + +#求每个部门的平均工资 +select distinct dname,avg(salary) over (partition by dname) 平均工资 from employee; + +#求每个部门的工资排名(从高到低,相同工资并列,并执行跳过排序) +select ename,dname,salary,rank() over (partition by dname order by salary desc) 工资排名 from employee; + +#求公司所有员工的年龄排序(相同年龄并列,执行跳过排序) +select ename,birth,rank() over (order by birth) 年龄排名 from employee; + +#求每个部门的员工工龄排序(相同年龄并列,执行顺序排序) +select ename,row_number() over (order by hiredate) 工龄排名,datediff(now(),hiredate) 工作天数 from employee; + +#计算每个员工的工资与该部门平均工资的差额 +select ename,dname,salary,avg(salary) over (partition by dname) 部门平均工资,salary-avg(salary) over (partition by dname) 差额 from employee; + + + +select * from employee; +#按员工工资进行排序,比较相邻两个员工的工资,输出比较高的工资 +select ename,salary, + max(salary) over ( order by salary rows between current row and 1 following) 工资比较, +from employee; +#按员工工资进行排序,查询当前员工与前一位和后一位的工资平均值 +select ename,salary, + avg(salary) over (order by salary rows between 1 preceding and 1 following) +from employee; +#按员工工资进行排序,查询当前员工至最后一位员工的工资总和 +select ename,salary, + sum(salary) over (order by salary rows between current row and unbounded following ) +from employee; +#计算每个部门内最高薪资与平均薪资的差额 +select distinct dname, + max(salary) over (partition by dname) 最高薪资, + avg(salary) over (partition by dname) 平均薪资, + max(salary) over (partition by dname)-avg(salary) over (partition by dname) as 差额 +from employee; + +#找出各部门年薪第二高的员工 +select a.dname,a.salary,a.排名 from + (select dname,salary,dense_rank() over (partition by dname order by salary) 排名 from employee) a +where a.排名 =2; +#查询各部门中 小于等于当前员工实际薪资的比例 +select ename,salary,cume_dist() over (partition by dname order by salary) as 员工实际薪资的比例 from employee; + +#查询每个员工工资在全部员工中的排名比例 +select ename,salary,sum(salary) over (order by salary) 总工资,salary/sum(salary) over () as 排名比例 from employee; +#查询每个部门工资排名在前25%的员工记录数 +select * from employee where eid in( + select eid from + (select eid,dname,cume_dist() over (order by salary) 工资比例 from employee) as a +where a.工资比例 >= 0.25); +#每个部门按年龄进行排序,求当前员工与前一位员工的年龄差 +select ename,birth, + datediff(lag(birth,1) over (order by birth),birth) 年龄差 +from employee; +#按入职日期进行排序,查询公司每个员工与后面一个员工的入职天数差异 +select ename,hiredate, + datediff(lead(hiredate,1) over (order by hiredate),hiredate) 入职天数差异 +from employee; +#将每个部门的员工按工资平均分为2个组,组1为低工资,组2为高工资 +select ename,dname,salary, + ntile(2) over (partition by dname order by salary) 等级 +from employee; +#将所有员工按照工龄分为4个组,并统计每个组的人数 +select *,count(工龄组) over(partition by 工龄组) 组的人数 + from +(select ename,hiredate,ntile(4) over (order by hiredate) 工龄组 from employee) a; +#将员工按照工资分为3个组,并统计组别,每组平均工资,工资范围(first_value、last_value) +select *,avg(salary) over (partition by 组别) as 每组平均工资, + concat_ws(' - ',first_value(salary) over(partition by 组别),last_value(salary) over(partition by 组别)) as 每组工资范围 + from +(select ename,salary,ntile(3) over (order by salary ) as 组别 from employee) a; + +### -- 非窗口函数 + +#按照工龄区分等级,小于5年为新员工,5-15年为老员工,大于15年为骨灰级员工,输出姓名,部门,工龄,级别 +select ename,dname,hiredate,(datediff(now(),hiredate)/365) as 工龄, + case when (datediff(now(),hiredate)/365) < 5 then '新员工' + when (datediff(now(),hiredate)/365) < 15 then '老员工' + else '骨灰级员工' end as 级别 +from employee; +#返回员工的实际年龄,如果小于当前日期则减1岁 +select ename,dname,floor((datediff(now(),birth)/365)) as 年龄 from employee; + + +select * from employee; +#求每个员工还有多少天过生日,并返回下次生日是星期几 +select ename, + abs(datediff(concat_ws('-',year(curdate()),month(birth),day(birth)),curdate())) as 距离生日天数, + date_format(concat_ws('-',year(curdate())+1,month(birth),day(birth)),'%W') as 下次生日星期 +from employee; + +#求每个员工当前实发工资与入职时工资的增长率,输出员工姓名,部门,入职工资,实际工资,增长率 +select ename,dname,hiredate,salary,round((salary-start_sal) / start_sal,2) as 增长率 from employee + + +~~~~ \ No newline at end of file