#title SQL Server Analysis Function [[TableOfContents]] 작성 중.. sql server 2011 ctp3 version ==== 테스트 데이터 ==== {{{ --drop table #temp create table #temp ( seq int identity(1,1) , std_dt date , val float ) go insert #temp values ('20110801', 600) , ('20110801', 200) , ('20110801', 800) , ('20110802', 700) , ('20110802', 500) , ('20110802', 100) , ('20110803', 700) , ('20110803', 300) , ('20110803', 900) go }}} ==== sum, accumulate ==== {{{ select std_dt , val , sum(val) over(partition by std_dt) 날짜별전체합 , sum(val) over(partition by std_dt order by seq) 날짜별누적합 , sum(val) over() 전체합 , sum(val) over(order by seq) SEQ로정렬후_누적합 from #temp /* std_dt val 날짜별전체합 날짜별누적합 전체합 SEQ로정렬후_누적합 ---------- ---------------------- ---------------------- ---------------------- ---------------------- ---------------------- 2011-08-01 600 1600 600 4800 600 2011-08-01 200 1600 800 4800 800 2011-08-01 800 1600 1600 4800 1600 2011-08-02 700 1300 700 4800 2300 2011-08-02 500 1300 1200 4800 2800 2011-08-02 100 1300 1300 4800 2900 2011-08-03 700 1900 700 4800 3600 2011-08-03 300 1900 1000 4800 3900 2011-08-03 900 1900 1900 4800 4800 */ --날짜별 가장 큰 값 select * from ( select std_dt , val , max(val) over(partition by std_dt) max_val from #temp ) t where val = max_val /* std_dt val max_val ---------- ---------------------- ---------------------- 2011-08-01 800 800 2011-08-02 700 700 2011-08-03 900 900 */ --값의 평균에 가장 근접한 값은? select * from ( select *, min(abs_val) over() min_val from ( select *, abs(val - avg(val) over()) abs_val from #temp ) t ) t where abs_val = min_val /* seq std_dt val abs_val min_val ----------- ---------- ---------------------- ---------------------- ---------------------- 5 2011-08-02 500 33.3333333333334 33.3333333333334 */ }}} ==== rank, dense_rank, row_number ==== {{{ --순위함수 select std_dt , val , rank() over(partition by std_dt order by val) [std_dt_rank] , rank() over(order by val) [rank] , dense_rank() over(order by val) [dense_rank] , row_number() over(order by val) [row_number] from #temp /* std_dt val std_dt_rank rank dense_rank row_number ---------- ---------------------- -------------------- -------------------- -------------------- -------------------- 2011-08-02 100 1 1 1 1 2011-08-01 200 1 2 2 2 2011-08-03 300 1 3 3 3 2011-08-02 500 2 4 4 4 2011-08-01 600 2 5 5 5 2011-08-02 700 3 6 6 6 2011-08-03 700 2 6 6 7 2011-08-01 800 3 8 7 8 2011-08-03 900 3 9 8 9 */ }}} ==== lead, lag ==== {{{ --앞 뒤행 가져오기 select seq , std_dt , val , lead(seq, 1) over(order by seq) 앞1행 , lag(seq, 1) over(order by seq) 뒤1행 , lead(seq, 2, 0) over(order by seq) 앞2행 --없으면 null대신할 값은 0 , lag(seq, 2, 0) over(order by seq) 뒤2행 --없으면 null대신할 값은 0 from #temp /* seq std_dt val 앞1행 뒤1행 앞2행 뒤2행 ----------- ---------- ---------------------- ----------- ----------- ----------- ----------- 1 2011-08-01 600 2 NULL 3 0 2 2011-08-01 200 3 1 4 0 3 2011-08-01 800 4 2 5 1 4 2011-08-02 700 5 3 6 2 5 2011-08-02 500 6 4 7 3 6 2011-08-02 100 7 5 8 4 7 2011-08-03 700 8 6 9 5 8 2011-08-03 300 9 7 0 6 9 2011-08-03 900 NULL 8 0 7 */ }}} ==== range, rows ==== {{{ --range select seq , std_dt , val , min(val) over(order by seq range between 100 preceding and 200 following) from #temp --메시지 4194, 수준 16, 상태 1, 줄 1 --RANGE는 UNBOUNDED 및 CURRENT ROW 창 프레임 구분 기호에서만 지원됩니다. select seq , std_dt , val , min(val) over(order by seq range between current row and unbounded following) 현재값에서_가장마지막값중_가장작은값 , sum(val) over(order by seq rows between 2 preceding and current row) 현재값에서_2건이전값의_합 , sum(val) over(order by seq rows between unbounded preceding and current row) 현재값에서_처음값의_합 from #temp order by 1 /* seq std_dt val 현재값에서_가장마지막값중_가장작은값 ----------- ---------- ---------------------- ---------------------- 1 2011-08-01 600 100 2 2011-08-01 200 100 3 2011-08-01 800 100 4 2011-08-02 700 100 5 2011-08-02 500 100 6 2011-08-02 100 100 7 2011-08-03 700 300 8 2011-08-03 300 300 9 2011-08-03 900 900 */ --rows select seq , std_dt , val , min(val) over(order by seq rows between 1 preceding and 1 following) 앞뒤로1건중_가장작은값 , min(val) over(order by seq rows between current row and 2 following) 현재행에서앞으로2건중_가장작은값 from #temp order by 1 /* seq std_dt val 앞뒤로1건중_가장작은값 현재행에서앞으로2건중_가장작은값 ----------- ---------- ---------------------- ---------------------- ---------------------- 1 2011-08-01 600 200 200 2 2011-08-01 200 200 200 3 2011-08-01 800 200 500 4 2011-08-02 700 500 100 5 2011-08-02 500 100 100 6 2011-08-02 100 100 100 7 2011-08-03 700 100 300 8 2011-08-03 300 300 300 9 2011-08-03 900 300 900 */ --row 구문에서 "current row"와 "0 preceding"는 동의어 select val , sum(val) over(order by val rows between 0 preceding and 0 following) 현재행 , sum(val) over(order by val rows between 1 preceding and 0 following) 이전1행 , sum(val) over(order by val rows between 0 preceding and 1 following) 이후1행 , sum(val) over(order by val rows between 1 preceding and 1 following) 전후1행 , sum(val) over(order by val range between current row and unbounded following) [현재~마지막] from (values (1),(2),(3),(4),(5),(6),(7)) t(val) order by 1 /* val 현재행 이전1행 이후1행 전후1행 현재~마지막 ----------- ----------- ----------- ----------- ----------- ----------- 1 1 1 3 3 28 2 2 3 5 6 27 3 3 5 7 9 25 4 4 7 9 12 22 5 5 9 11 15 18 6 6 11 13 18 13 7 7 13 7 13 7 */ }}} ==== cume_dist, percent_rank ==== {{{ select seq , std_dt , val , cume_dist() over(order by std_dt) --분포(전체의 몇%나 되나?) , rank() over(order by std_dt) [rank] , percent_rank() over(order by std_dt) [percent_rank] , convert(float, rank() over(order by std_dt)-1) / (count(*) over()-1) calc_percent_rank--percent_rank()계산 from #temp /* seq std_dt val rank percent_rank calc_percent_rank ----------- ---------- ---------------------- ---------------------- -------------------- ---------------------- ---------------------- 1 2011-08-01 600 0.333333333333333 1 0 0 2 2011-08-01 200 0.333333333333333 1 0 0 3 2011-08-01 800 0.333333333333333 1 0 0 4 2011-08-02 700 0.666666666666667 4 0.375 0.375 5 2011-08-02 500 0.666666666666667 4 0.375 0.375 6 2011-08-02 100 0.666666666666667 4 0.375 0.375 7 2011-08-03 700 1 7 0.75 0.75 8 2011-08-03 300 1 7 0.75 0.75 9 2011-08-03 900 1 7 0.75 0.75 */ }}} ==== first_value, last_value ==== {{{ select seq , std_dt , val , first_value(val) over(partition by std_dt order by val) first_val , last_value(val) over(partition by std_dt order by val) last_val--잘 안된다. --rows between unbounded preceding and unbounded following과 같이 해야 한다. from #temp /* seq std_dt val first_val last_val ----------- ---------- ---------------------- ---------------------- ---------------------- 2 2011-08-01 200 200 200 1 2011-08-01 600 200 600 3 2011-08-01 800 200 800 6 2011-08-02 100 100 100 5 2011-08-02 500 100 500 4 2011-08-02 700 100 700 8 2011-08-03 300 300 300 7 2011-08-03 700 300 700 9 2011-08-03 900 300 900 */ }}} ==== percentile_cont, percentile_disc ==== {{{ --백분위 함수 select seq , std_dt , val , cume_dist() over(order by val) cume_dist , percentile_cont(0.5) within group(order by val) over() median_cont--0.5이면 메디안이지.., 연속형 , percentile_disc(0.5) within group(order by val) over() median_disc --이산형 , percentile_disc(0.5) within group(order by std_dt) over() median_dt from #temp /* seq std_dt val cume_dist median_cont median_disc median_dt ----------- ---------- ---------------------- ---------------------- ---------------------- ---------------------- ---------- 2 2011-08-01 200 0.222222222222222 600 600 2011-08-02 1 2011-08-01 600 0.555555555555556 600 600 2011-08-02 3 2011-08-01 800 0.888888888888889 600 600 2011-08-02 4 2011-08-02 700 0.777777777777778 600 600 2011-08-02 6 2011-08-02 100 0.111111111111111 600 600 2011-08-02 5 2011-08-02 500 0.444444444444444 600 600 2011-08-02 8 2011-08-03 300 0.333333333333333 600 600 2011-08-02 7 2011-08-03 700 0.777777777777778 600 600 2011-08-02 9 2011-08-03 900 1 600 600 2011-08-02 */ }}} ==== 참고자료 ==== * [http://www.simple-talk.com/sql/learn-sql-server/window-functions-in-sql-server-part-2-the-frame/ Window Functions in SQL Server: Part 2-The Frame] * [http://www.sqlservercentral.com/articles/SQL+Server+2012/76704/ The new Analytic functions in SQL Server 2012]