#title Confusion Matrix (분류행렬)
[[TableOfContents]]

attachment:ConfusionMatrix/confusion_matrix2.xlsx --> 이걸 써라.

==== Confusion Matrix ====
|| || False(실제) || True(실제) ||
|| False(예측) || 4696 || 2766 ||
|| True(예측) || 304 || 2234 ||

|| 항목 || 계산 || 비율(%) ||
|| 정확도 (Accuracy) || (4696 +  2234) /  (4696 + 2766 + 304 + 2234) ||  69.30% ||
|| 민감도 (sensitivity) || 2234  /  (2766 + 2234) || 44.68% ||
|| 특이도 (specificity) || 4696 / (4696 + 304) || 93.92% ||
|| 오류율 (error) || (2766 + 304 ) /  (4696 + 2766 + 304 + 2234) || 30.70% ||
attachment:ConfusionMatrix/confusion_matrix.xlsx

 * 정확도: 모델의 정확도
 * 민감도: True의 비율
 * 특이도: False의 비율
 * 오류율: 모델의 오류율

==== 참고: 민감도와 특이도 ====
특정 질병에 대한 조사결과..
민감도 (질병에 실제로 걸렸을 때 걸렸다는 검사 결과)가 99.7%고 특이도 (질병에 실제로 안 걸렸을 때 안 걸렸다는 검사 결과)가 98.5%입니다. 이게 무슨 소리냐면, 1000명을 검사했을 때 병이 있는데도 없다고 오진받는 사람은 3명이고 병이 없는데도 있다고 오진되는 사람은 15명 정도라는 거죠.


==== t-sql ====
{{{
--로지스틱 회귀분석 결과를 가지고...
set ansi_warnings off
set statistics io off
set nocount on 

declare 
    @std float
,   @FF float
,   @FT float
,   @TF float
,   @TT float

set @std = 0.5

select 
    @FF = count(case when is_out = 0 and pred < @std then 1 end)
,   @FT = count(case when is_out = 1 and pred < @std then 1 end)
,   @TF = count(case when is_out = 0 and pred >= @std then 1 end)
,   @TT = count(case when is_out = 1 and pred >= @std then 1 end)
from reader.dbo.result_001

print 'confusion.matrix'
print '   ' + str(0) + '  ' + str(1)
print '0  ' + str(@FF) + '  ' + str(@FT)
print '1  ' + str(@TF) + '  ' + str(@TT)
print ''
print '정확도 = ' + convert(varchar, convert(decimal(18,2), (@FF + @TT) / (@FF + @FT + @TF + @TT) * 100))
print '민감도 = ' + convert(varchar, convert(decimal(18,2), @TT/(@FT+@TT)* 100))
print '특이도 = ' + convert(varchar, convert(decimal(18,2), @FF/(@FF+@TF)* 100))
print '오류율 = ' + convert(varchar, convert(decimal(18,2), (@FT + @TF)/(@FF + @FT + @TF + @TT)* 100))

/*
confusion.matrix
            0           1
0        4426         971
1        2674        6129
 
정확도 = 74.33
민감도 = 86.32
특이도 = 62.34
오류율 = 25.67
*/
}}}

==== R ====
{{{
cf <- sqldf ("
select 
    count(case when t1 = 0 and pred < 0.5 then 1 end) FF
,   count(case when t1 = 1 and pred < 0.5 then 1 end) FT
,   count(case when t1 = 0 and pred >= 0.5 then 1 end) TF
,   count(case when t1 = 1 and pred >= 0.5 then 1 end) TT
from x3
")

print (paste('정확도 = ', (cf$FF + cf$TT) / (cf$FF + cf$FT + cf$TF + cf$TT)))
print (paste('민감도 = ', cf$TT/(cf$FT+cf$TT)))
print (paste('특이도 = ', cf$FF/(cf$FF+cf$TF)))
print (paste('오류율 = ', (cf$FT + cf$TF)/(cf$FF + cf$FT + cf$TF + cf$TT)))
}}}