首页 > 代码库 > 分析函数

分析函数

分析函数
基本语法
函数名称([参数]) VOER (
PARTITION BY 子句 字段,....
[ORDER BY 子句 字段,..[ASC][DESC][NULLS FIRST][NULLS LAST]]
[WINDOWING 子句]);
使用PARTITION BY子句
SELECT deptno,ename,sal,job,
SUM(sal) OVER(PARTITION BY deptno) sum部门工资总和
FROM emp;
如果不加PARTITION BY
SELECT deptno,ename,sal,job,
SUM(sal) OVER() sum全公司工资总和
FROM emp;
使用PARTITION BY子句加多个分区
SELECT deptno,ename,sal,job,
SUM(sal) OVER(PARTITION BY deptno,job)sum
FROM emp;

ORDER BY 子句
查看ORDER BY子句
SELECT deptno,ename,sal,hiredate,
RANK()OVER (PARTITION BY deptno ORDER BY sal,hiredate DESC) rk
FROM emp;
直接用ORDER BY排序
SELECT deptno,ename,sal,hiredate,
SUM(sal) OVER(ORDER BY ename)sum
FROM emp;
NULLS FIRST 出现NULL值的数据放在前面
NULLS LAST 出现NULL值的数据放在后面
SELECT deptno,ename,sal,comm,
RANK() OVER(ORDER BY comm DESC NULLS LAST) RK,
SUM(sal) OVER(ORDER BY comm DESC NULLS LAST) sum
FROM emp;

WINDOWING子句
RNGGE子句
在sal上设置偏移量

SELECT deptno,ename,sal,
SUM(sal) OVER (PARTITION BY deptno ORDER BY sal RANGE 300 PRECEDING) sum
FROM emp;
向上N匹配
下面是向下N匹配
SELECT deptno,ename,sal,
SUM(sal) OVER (PARTITION BY deptno ORDER BY sal RANGE
BETWEEN 0 PRECEDING AND 300 FOLLOWING) sum
FROM emp;
匹配当前行数据
SELECT deptno,ename,sal,
SUM(sal) OVER (PARTITION BY deptno ORDER BY sal RANGE
BETWEEN 0 PRECEDING AND CURRENT ROW) sum
FROM emp;
此处用了CURRENT ROW选项,表示比当前行数相同,所以只有相同的数据才会使用SUM计算总和
使用UNBOUNDED不设置边界
SELECT deptno,ename,sal,
SUM(sal) OVER (PARTITION BY deptno ORDER BY sal RANGE
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) sum
FROM emp;

ROWS子句
设置两行偏移

SELECT deptno,ename,sal,
SUM(sal) OVER (PARTITION BY deptno ORDER BY sal ROWS 2 PRECEDING) sum
FROM emp;
查询行的范围
SELECT deptno,ename,sal,
SUM(sal) OVER (PARTITION BY deptno ORDER BY sal ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) sum
FROM emp;
分析函数的范例
数据统计函数
SUM
MIN
MAX
AVG
COUNT
这些是数据统计函数
查询编号是7369的员工姓名,职位,基本工资,部门编号,部门人数,平均工资,最高工资,最低工资,总工资
SELECT * FROM(
SELECT empno,ename,job,sal,deptno,
COUNT(empno) OVER(PARTITION BY deptno) count,
ROUND(AVG(sal) OVER(PARTITION BY deptno),2) avg,
MAX(sal) OVER(PARTITION BY deptno) max,
MIN(sal) OVER(PARTITION BY deptno) min,
SUM(sal) OVER(PARTITION BY deptno) sum
FROM emp
) temp
WHERE temp.empno=7369;
查询每个员工的编号,姓名,基本工资,所在部门名称,部门位置,以及此部门的平均工资,最高和最低工资
emp和dept表
SELECT e.empno,e.ename,e.sal,d.dname,d.loc,
ROUND(AVG(sal) OVER (PARTITION BY e.deptno ORDER BY sal RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),2) avg,
MAX(sal) OVER(PARTITION BY e.deptno ORDER BY sal RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) max,
MIN(sal) OVER(PARTITION BY e.deptno ORDER BY sal RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) min,
SUM(sal) OVER(PARTITION BY e.deptno ORDER BY sal RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) sum
FROM emp e,dept d
WHERE e.deptno=d.deptno;

等级函数
记录标记函数
RANK()和DENSE_RANK()
观察RANK()和DENSE_RANK()函数
SELECT deptno,ename,sal,
RANK() OVER(PARTITION BY deptno ORDER BY sal) rank_result,
DENSE_RANK() OVER (PARTITION BY deptno ORDER BY sal) dense_rank_resut
FROM emp;
可以看出RANK是有重复是当前值未变,但其它已经+1会跳号,下个不同的就+2了和DENSE_RANK()每次只+1,重复也不会增加
行标记函数
ROW_NUMBER()自动成功行记录号,并且不管内容是否重复都可以连接编号
SELECT deptno,ename,sal,
ROW_NUMBER() OVER(PARTITION BY deptno ORDER BY sal) row_result_deptno,
ROW_NUMBER() OVER(ORDER BY sal) row_result_sal
FROM emp;
使用KEEP语句保留满足条件的数据
SELECT deptno,
MAX(sal) KEEP (DENSE_RANK FIRST ORDER BY sal) max_sal,
MIN(sal) KEEP (DENSE_RANK LAST ORDER BY sal) min_sal
FROM emp
GROUP BY deptno;
取出首行和尾行的记录
FIRST_VALUE() LAST_VALUE()
SELECT deptno,empno,ename,sal,
FIRST_VALUE(sal) OVER(PARTITION BY deptno ORDER BY sal
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) first_result,
LAST_VALUE(sal) OVER (PARTITION BY deptno ORDER BY sal
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) last_result

FROM emp
WHERE deptno=10;

比较相邻记录
LAG()取得之前所列数据行的第N行记录显示,如果没有则使用默认值,不设置默认值返回NULL
LEAD()取重之后所列数据的第N行记录显示,如果没有则使用默认值,不设置默认值返回NULL
SELECT deptno,empno,ename,sal,
LAG(sal,2,0) OVER (PARTITION BY deptno ORDER BY sal) lag_result,
LEAD(sal,2,0) OVER (PARTITION BY deptno ORDER BY sal)lead_result
FROM emp
WHERE deptno=20;

报表函数
CUME_DIST()计算在分区中的相对位置
NTILE(数字)将一个分区分为“表达式”的散列表示
RATIO_TO_TEPORT(表达式)该函数计算expression/(sum(expression))的值,它给出的相对于总数据的百分比
验证CUME_DIST()函数
假设分区有5行测0.2,0.4,0.6,0.8,1.0进行划分
SELECT deptno,ename,sal,
CUME_DIST() OVER(PARTITION BY deptno ORDER BY sal) cume
FROM emp
WHERE deptno IN(10,20);

NTILE函数
对一个数据区有序结果进行划分,并为每个小组分配唯一的组编号
SELECT deptno,sal,
SUM(sal) OVER (PARTITION BY deptno ORDER BY sal) sum,
NTILE(3) OVER (PARTITION BY deptno ORDER BY sal) ntile_a,
NTILE(6) OVER (PARTITION BY deptno ORDER BY sal) ntile_b
FROM emp;

RATIO_TO_REPORT函数可以将需要统计的数据按照整体数据的百分比进行显示

SELECT deptno,SUM(sal),
ROUND(RATIO_TO_REPORT(SUM(sal)) OVER(),5) rate,
ROUND(RATIO_TO_REPORT(SUM(sal)) OVER(),5)*100 || ‘%‘precent
FROM emp
GROUP BY deptno;


行列转换

查询每个部门中各个职位的总工资
SELECT deptno,job,SUM(sal)
FROM emp
GROUP BY deptno,job;
第2种实现
用DECODE显示
SELECT deptno,
SUM(DECODE(job,‘PRESIDENT‘,sal,0)) PRESIDENT_JOB,
SUM(DECODE(job,‘MANAGER‘,sal,0)) MANAGER_JOB,
SUM(DECODE(job,‘ANALYST‘,sal,0)) ANALYST_JOB,
SUM(DECODE(job,‘CLERK‘,sal,0)) CLERK_JOB,
SUM(DECODE(job,‘SALESMAN‘,sal,0)) SALESMAN_JOB
FROM emp
GROUP BY deptno;
不使用DECODE来实现
SELECT temp.dno,SUM(PRESIDENT_JOB),SUM(MANAGER_JOB),SUM(ANALYST_JOB),SUM(CLERK_JOB),SUM(SALESMANJOB)
FROM(
SELECT deptno dno,
(SELECT SUM(sal) FROM emp WHERE job=‘PRESIDENT‘AND empno=e.empno) PRESIDENT_JOB,
(SELECT SUM(sal) FROM emp WHERE job=‘MANAGER‘AND empno=e.empno) MANAGER_JOB,
(SELECT SUM(sal) FROM emp WHERE job=‘ANALYST‘AND empno=e.empno) ANALYST_JOB,
(SELECT SUM(sal) FROM emp WHERE job=‘CLERK‘AND empno=e.empno) CLERK_JOB,
(SELECT SUM(sal) FROM emp WHERE job=‘SALESMAN‘AND empno=e.empno) SALESMANJOB
FROM emp e) temp
GROUP BY temp.dno
ORDER BY temp.dno DESC;

PIVOT函数和UNPIVOT函数
SELECT * FROM (SELECT deptno,job,sal FROM emp)
PIVOT(
SUM(sal)
FOR job IN(
‘PRESIDENT‘ AS PRESIDENT_JOB,
‘MANAGER‘ AS MANAGER_JOB,
‘ANALYST‘ AS ANALYST_JOB,
‘CLERK‘ AS CLERK_JOB,
‘SALESMAN‘ AS SALESMANJOB
)
)ORDER BY deptno;
使用XML与ANY
如果在PIVOT中增加了XML显示,可以利用ANY设置所要操作的所有数据
SELECT * FROM (SELECT deptno,job,sal FROM emp)
PIVOT XML(
SUM(sal)
FOR job IN(ANY)
)ORDER BY deptno;
查询更多统计信息
SELECT * FROM
(
SELECT deptno,job,sal,
SUM(sal) OVER (PARTITION BY deptno) sumsal,
MAX(sal) OVER (PARTITION BY deptno) maxsal,
MIN(sal) OVER (PARTITION BY deptno) minsal
FROM emp
)
PIVOT(
SUM(sal)
FOR job IN(
‘PRESIDENT‘ AS PRESIDENT_JOB,
‘MANAGER‘ AS MANAGER_JOB,
‘ANALYST‘ AS ANALYST_JOB,
‘CLERK‘ AS CLERK_JOB,
‘SALESMAN‘ AS SALESMANJOB
)
)ORDER BY deptno;

设置多个统计函数,查询每个部门不同职位的总工资,以及每个部门不同职位的最高工资
SELECT * FROM
(
SELECT deptno,job,sal FROM emp
)
PIVOT(
SUM(sal) AS sum, MAX(sal) AS max
FOR job IN(
‘PRESIDENT‘ AS PRESIDENT_JOB,
‘MANAGER‘ AS MANAGER_JOB,
‘ANALYST‘ AS ANALYST_JOB,
‘CLERK‘ AS CLERK_JOB,
‘SALESMAN‘ AS SALESMANJOB
)
)ORDER BY deptno;
使用UNPIVOT
WITH temp AS(
SELECT * FROM
( SELECT deptno,job,sal FROM emp)
PIVOT(
SUM(sal)
FOR job IN(
‘PRESIDENT‘ AS PRESIDENT_JOB,
‘MANAGER‘ AS MANAGER_JOB,
‘ANALYST‘ AS ANALYST_JOB,
‘CLERK‘ AS CLERK_JOB,
‘SALESMAN‘ AS SALESMANJOB
)
)ORDER BY deptno)
SELECT * FROM temp
UNPIVOT INCLUDE NULLS(
sal_sum FOR job IN(
PRESIDENT_JOB AS ‘PRESIDENT‘,
MANAGER_JOB AS ‘MANAGER‘ ,
ANALYST_JOB AS ‘ANALYST‘ ,
CLERK_JOB AS ‘CLERK‘ ,
SALESMANJOB AS ‘SALESMAN‘
)
)ORDER BY deptno;

分析函数