diff --git "a/34 \345\210\230\346\231\272\347\277\224/\347\252\227\345\217\243\345\207\275\346\225\260.md" "b/34 \345\210\230\346\231\272\347\277\224/\347\252\227\345\217\243\345\207\275\346\225\260.md" new file mode 100644 index 0000000000000000000000000000000000000000..30e37fc07e750d461682dc416923b153207ac41d --- /dev/null +++ "b/34 \345\210\230\346\231\272\347\277\224/\347\252\227\345\217\243\345\207\275\346\225\260.md" @@ -0,0 +1,256 @@ +# 窗口函数 + +窗口函数的语法结构是: + +```mysql +函数 OVER([PARTITION BY 字段名 ORDER BY 字段名 ASC|DESC]) +``` + +或者是: + +```mysql +函数 OVER 窗口名 … WINDOW 窗口名 AS ([PARTITION BY 字段名 ORDER BY 字段名 ASC|DESC]) +``` + +* OVER 关键字指定函数窗口的范围。 + * 如果省略后面括号中的内容,则窗口会包含满足WHERE条件的所有记录,窗口函数会基于所有满足WHERE条件的记录进行计算。 + * 如果OVER关键字后面的括号不为空,则可以使用如下语法设置窗口。 +* 窗口名:为窗口设置一个别名,用来标识窗口。 +* PARTITION BY子句:指定窗口函数按照哪些字段进行分组。分组后,窗口函数可以在每个分组中分别执行。 +* ORDER BY子句:指定窗口函数按照哪些字段进行排序。执行排序操作使窗口函数按照排序后的数据记录的顺序进行编号。 +* FRAME子句:为分区中的某个子集定义规则,可以用来作为滑动窗口使用。 + +### 序号函数 + +row_number()、rank()、dense_rank() + + + +### 开窗聚合函数 + +```sql +sum()|avg()|min()|max()|count()| over (partition by ... order by...) +``` + +案例: + +```sql +# 获取各部门薪金总和(合计) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno) 'sum' +from emp; + +# 获取各部门薪金总和(累加) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno order by sal) 'sum' +from emp; +``` + + + +#### 窗口大小 + +```sql +# 开窗范围 +# 获取各部门薪金总和(范围:初始行至当前行) +select ename,hiredate,deptno,sal, +sum(sal) over(partition by deptno rows between unbounded preceding and current row) 'sum' +from emp; + +-- rows 启用窗口大小 +-- between ... and ... 范围区间 +-- unbounded preceding 起始行 +-- current row 当前行 +``` +公用表表达式 +公用表表达式(或通用表表达式)简称为CTE(Common Table Expressions)。CTE是一个命名的临时结 果集,作用范围是当前语句。CTE可以理解成一个可以复用的子查询,当然跟子查询还是有点区别的, CTE可以引用其他CTE,但子查询不能引用其他子查询。所以,可以考虑代替子查询。 + +依据语法结构和执行方式的不同,公用表表达式分为 普通公用表表达式 和 递归公用表表达式 2 种。 +1.普通公用表表达式 +ITH CTE名称 +AS (子查询) +SELECT|DELETE|UPDATE 语句; + +with + a as ( select department_id, min(hire_date) a1 from employees GROUP BY department_id ), + b as ( select department_id, max(hire_date) b1 from employees GROUP BY department_id ) +select a.department_id,a1 最早入职,b1 最晚入职 from a,b where a.department_id = b.department_id; + +普通公用表表达式类似于子查询,不过,跟子查询不同的是,它可以被多次引用,而且可以被其他的普 通公用表表达式所引用。 + +2.递归公用表表达式 +递归公用表表达式也是一种公用表表达式,只不过,除了普通公用表表达式的特点以外,它还有自己的特点,就是可以调用自己。它的语法结构是: + +```mysql +WITH RECURSIVE +CTE名称 AS (子查询) +SELECT|DELETE|UPDATE 语句; +``` + +递归公用表表达式由 2 部分组成,分别是种子查询和递归查询,中间通过关键字 UNION [ALL]进行连接。 这里的种子查询,意思就是获得递归的初始值。这个查询只会运行一次,以创建初始数据集,之后递归 查询会一直执行,直到没有任何新的查询数据产生,递归返回。 + + + + + + + +# 作业 + +```mysql +create table if not exists `employee` +( + `eid` int not null auto_increment comment '员工id' primary key, + `ename` varchar(20) not null comment '员工名称', + `dname` varchar(50) not null comment '部门名称', + `hiredate` datetime not null comment '入职日期', + `birth` date not null comment '生日', + `salary` double null comment '基本薪资', + `start_sal` double null comment '入职薪资' +); + +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('傅嘉熙', '开发部', '2002-08-20 12:00:04','1980-12-10', 9000,6500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('武晟睿', '开发部', '2002-06-12 13:54:12', '1984-2-5',9500,6000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('孙弘文', '开发部', '2003-10-16 08:27:06','1979-8-7', 9400,8000); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('潘乐驹', '开发部', '2004-04-22 03:56:11','1980-5-12', 9500,6800); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('潘昊焱', '人事部', '2007-02-24 03:40:02','1987-2-12', 5000,4500); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('沈涛', '人事部', '2012-12-14 09:16:37','1993-4-30', 6000,5500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('江峻熙', '人事部', '2018-05-12 01:17:48','1990-6-8', 5000,3000); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('陆远航', '人事部', '2018-04-14 03:35:57','1989-11-13', 5500,5000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('姜煜祺', '销售部', '2020-03-23 03:21:05','1995-1-1', 6000,5500); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('邹明', '销售部', '2015-11-23 23:10:06','1996-2-19', 6800,6000); +insert into `employee` (`ename`, `dname`, `hiredate`, `birth`,`salary`,`start_sal`) values ('董擎苍', '销售部', '2012-02-12 07:54:32','1985-10-7', 6500,4800); +insert into `employee` (`ename`, `dname`, `hiredate`,`birth`, `salary`,`start_sal`) values ('钟俊驰', '销售部', '2010-04-10 12:17:06','1981-3-25', 6000,3500); + + +#员工按工龄,每年增加50元薪水。实发薪资 = 基本薪资 + 工龄 * 50 +SELECT *,salary+(year(now())-year(hiredate))*50 实发薪资 from employee; + + +#求每个部门的员工总数 +SELECT DISTINCT dname,COUNT(*) over (PARTITION by dname) 员工总数 FROM employee; +#求每个部门的平均工资 +SELECT DISTINCT dname,avg(salary) over (PARTITION by dname) 平均工资 FROM employee; +#求每个部门的工资排名(从高到低,相同工资并列,并执行跳过排序) +SELECT *,RANK() over (PARTITION by dname ORDER BY salary) FROM employee; +#求公司所有员工的年龄排序(相同年龄并列,执行跳过排序) +SELECT *,RANK() over (ORDER BY birth) FROM employee; +#求每个部门的员工工龄排序(相同年龄并列,执行顺序排序) +SELECT *,RANK() over (ORDER BY hiredate) FROM employee; +#计算每个员工的工资与该部门平均工资的差额 +WITH +a as (SELECT eid,avg(salary) over (PARTITION by dname) avgsal FROM employee), +b as (SELECT * from employee) +SELECT *,salary-avgsal 平均工资的差额 from b,a WHERE a.eid=b.eid; + +#按员工工资进行排序,比较相邻两个员工的工资,输出比较高的工资 +SELECT *,ROW_NUMBER() over (ORDER BY salary) ,max(salary) over (rows BETWEEN 1 preceding and 1 following) FROM employee; +#按员工工资进行排序,查询当前员工与前一位和后一位的工资平均值 +SELECT *,ROW_NUMBER() over (ORDER BY salary) ,avg(salary) over (rows BETWEEN 1 preceding and 1 following) FROM employee; +#按员工工资进行排序,查询当前员工至最后一位员工的工资总和 +SELECT *,ROW_NUMBER() over (ORDER BY salary) ,sum(salary) over (rows BETWEEN current row and unbounded following) FROM employee; +#计算每个部门内最高薪资与平均薪资的差额 +SELECT DISTINCT dname,max(salary) over (PARTITION by dname)-avg(salary) over (PARTITION by dname) FROM employee; +#找出各部门年薪第二高的员工 + +with +a as(SELECT DISTINCT dname,max(salary) over (PARTITION by dname) msal FROM employee), +b as(SELECT * from employee), +c as(SELECT DISTINCT a.dname,max(salary) over (PARTITION by dname) msa from b,a WHERE a.dname=b.dname and b.salary != a.msal) +SELECT b.* from b,c WHERE b.dname=c.dname and salary=msa; + +#查询各部门中小于等于当前员工实际薪资的比例 +select ename,cume_dist() over (PARTITION by dname ORDER BY salary asc) as a from employee; +#查询每个员工工资在全部员工中的排名比例 +select ename,CUME_DIST() over (ORDER BY salary asc) as a from employee; +#查询每个部门工资排名在前25%的员工记录数 +with +b as(select *,CUME_DIST() over(PARTITION by dname ORDER BY salary asc) a from employee) +select b.dname,count(*) from employee,b where b.ename=employee.ename and a<=0.25; +#每个部门按年龄进行排序,求当前员工与前一位员工的年龄差 +SELECT *,(year(birth)-LAG(year(birth)) over (PARTITION by dname ORDER BY birth)) as a FROM employee ; + +#按入职日期进行排序,查询公司每个员工与后面一个员工的入职天数差异 +select *,DATEDIFF(hiredate,lead(hiredate) over (ORDER BY hiredate)) as a from employee ; +#将每个部门的员工按工资平均分为2个组,组1为低工资,组2为高工资 +select *,case + when ROW_NUMBER() over (PARTITION by dname ORDER BY salary) <=COUNT(*) over (PARTITION by dname)/2 then '低工资' +else '高工资' +end as salary_group +from employee +#将所有员工按照工龄分为4个组,并统计每个组的人数 +SELECT m.a,count(m.a) from (SELECT dname, ename, NTILE(4) OVER (ORDER BY birth) a +FROM employee)m GROUP BY m.a ; + +SELECT + *, + NTILE(4) OVER (ORDER BY TIMESTAMPDIFF(YEAR, hiredate, CURDATE())) AS group_number, + COUNT(*) OVER (PARTITION BY NTILE(4) ORDER BY NTILE(4)) AS group_count +FROM employee; +#将员工按照工资分为3个组,并统计组别,每组平均工资,工资范围(first_value、last_value) + +SELECT + group_number, + AVG(salary) AS average_salary, + MIN(salary) AS f, + MAX(salary) AS l +FROM ( + SELECT + *, + NTILE(3) OVER (ORDER BY salary) AS group_number + FROM employee +) AS grouped_employee +GROUP BY group_number; + +### -- 非窗口函数 + +#按照工龄区分等级,小于5年为新员工,5-15年为老员工,大于15年为骨灰级员工,输出姓名,部门,工龄,级别 +SELECT + ename, + dname, + hiredate, + FLOOR(DATEDIFF(CURDATE(), hiredate) / 365) , + CASE + WHEN FLOOR(DATEDIFF(CURDATE(), hiredate) / 365)<5 THEN '新员工' + WHEN FLOOR(DATEDIFF(CURDATE(), hiredate) / 365)<15 THEN '老员工' + ELSE '骨灰级员工' + END AS actual_age +FROM employee; + + + + +#返回员工的实际年龄,如果小于当前日期则减1岁 + +SELECT + ename, + dname, + birth, + FLOOR(DATEDIFF(CURDATE(), birth) / 365) - + CASE + WHEN DATE_FORMAT(CURDATE(), '%m%d') < DATE_FORMAT(birth, '%m%d') THEN 1 + ELSE 0 + END AS actual_age +FROM employee; + + +#求每个员工还有多少天过生日,并返回下次生日是星期几 + +SELECT + ename, + DATEDIFF(CONCAT(YEAR(CURDATE()), '-', MONTH(birth), '-', DAY(birth)), CURDATE()) AS 天, + DATE_FORMAT(CONCAT(YEAR(CURDATE()), '-', MONTH(`birth`), '-', DAY(`birth`)), '%W') AS 星期几 +FROM + employee; + + +#求每个员工当前实发工资与入职时工资的增长率,输出员工姓名,部门,入职工资,实际工资,增长率 +SELECT + employee.ename, + employee.dname, + employee.start_sal AS entry_salary, + employee.salary AS current_salary, + CONCAT(FORMAT(((employee.salary - employee.start_sal) / employee.start_sal) * 100, 2), '%') AS growth_rate +FROM + employee; +``` \ No newline at end of file