SHARE
TWEET

Scorecard validation tests

mjaniec Jan 9th, 2014 219 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # visit: http://www.reakkt.com/2014/01/basics-of-scorecard-model-validation.html
  2.  
  3. ### MODEL VALIDATION TESTS
  4.  
  5. library(compositions) # Gauss.test -> Spiegelhalter
  6. source("measures2.R") # calc.Sd - alternative Somer's D
  7.  
  8. ## DISCRIMINATORY POWER
  9.  
  10. ## GAUROC
  11. # s - test score
  12. # r - reference score
  13. GAUROC <- function(s,r) {
  14.  
  15.   n1 <- 0
  16.   n2 <- 0
  17.   n3 <- 0
  18.  
  19.   n <- length(s)
  20.  
  21.   for (i in 1:n) {
  22.    
  23.     Rless <- which(r[i]<r)
  24.    
  25.     n1    <- n1+length(intersect(which(s[i]<s),Rless))
  26.    
  27.     n2    <- n2+length(intersect(which(s[i]==s),Rless))
  28.    
  29.     n3    <- n3+length(Rless)  
  30.    
  31.   }
  32.  
  33.   return((n1+1/2*n2)/n3)
  34.  
  35. }
  36.  
  37. # for reference
  38. GAUROCslow <- function(scoreS,scoreR) {
  39.  
  40.   n1 <- 0
  41.   n2 <- 0
  42.   n3 <- 0
  43.  
  44.   n <- length(scoreS)
  45.  
  46.   for (i in 1:n) {
  47.    
  48.     for (j in 1:n) {
  49.      
  50.       if (scoreS[i]<scoreS[j]  && scoreR[i]<scoreR[j]) n1 <- n1+1
  51.      
  52.       if (scoreS[i]==scoreS[j] && scoreR[i]<scoreR[j]) n2 <- n2+1
  53.      
  54.       if (scoreR[i]<scoreR[j]) n3 <- n3+1        
  55.      
  56.     }
  57.    
  58.   }
  59.  
  60.   # n1;n2;n3
  61.  
  62.   return((n1+1/2*n2)/n3)
  63.  
  64. }
  65.  
  66. ## Somers'D + ASE (Asymetric Standard Error)
  67.  
  68. # companies - number of companies in portfolio
  69. SomersD <- function(companies,PD) {
  70.   # 2DO: needs cleaning
  71.  
  72.   PD    <- PD/100
  73.   total <- companies/sum(companies)
  74.  
  75.   defaulted    <- total*PD
  76.   nondefaulted <- total*(1-PD)
  77.  
  78.   n <- sum(total)
  79.  
  80.   portfolio <- rbind(defaulted,nondefaulted)/n
  81.  
  82.   x <- portfolio
  83.  
  84.   wr <- n^2-sum(sapply(1:nrow(x), function(i) sum(x[i,])^2))
  85.  
  86.   A <- function(x,i,j) {
  87.    
  88.     xr <- nrow(x)
  89.     xc <- ncol(x)
  90.    
  91.     sum(x[1:xr>i,1:xc>j])+sum(x[1:xr<i,1:xc<j])
  92.    
  93.   }
  94.  
  95.   D <- function(x,i,j) {
  96.    
  97.     xr <- nrow(x)
  98.     xc <- ncol(x)
  99.    
  100.     sum(x[1:xr>i,1:xc<j])+sum(x[1:xr<i,1:xc>j])
  101.    
  102.   }
  103.  
  104.   Pf <- function(x) {
  105.    
  106.     xr <- nrow(x)
  107.     xc <- ncol(x)
  108.    
  109.     tmp <- NULL
  110.    
  111.     for (i in 1:xr)
  112.       for (j in 1:xc)
  113.        
  114.         tmp <- c(tmp,x[i,j]*A(x,i,j))
  115.    
  116.     sum(tmp)
  117.    
  118.   }
  119.  
  120.   Qf <- function(x) {
  121.    
  122.     xr <- nrow(x)
  123.     xc <- ncol(x)
  124.    
  125.     tmp <- NULL
  126.    
  127.     for (i in 1:xr)
  128.       for (j in 1:xc)
  129.        
  130.         tmp <- c(tmp,x[i,j]*D(x,i,j))
  131.    
  132.     sum(tmp)
  133.    
  134.   }
  135.  
  136.   P <- Pf(x)
  137.   Q <- Qf(x)
  138.  
  139.   # ASE1
  140.   tmp2 <- NULL
  141.   for (i in 1:nrow(x))
  142.     for (j in 1:ncol(x))  
  143.       tmp2 <- c( tmp2, x[i,j] * ( wr*(A(x,i,j)-D(x,i,j)) - (P-Q)*(sum(x)-sum(x[i,])) )^2 )
  144.  
  145.   # ASE0
  146.   tmp <- NULL
  147.   for (i in 1:nrow(x))
  148.     for (j in 1:ncol(x))  
  149.       tmp <- c( tmp, x[i,j]*(A(x,i,j)-D(x,i,j))^2 )
  150.  
  151.   P <- Pf(x)
  152.   Q <- Qf(x)
  153.  
  154.   return(
  155.           list( SomersD=(P-Q)/wr,
  156.                 ASE1=2/wr^2*sqrt(sum(tmp2)),                # ASE1
  157.                 ASE0=2/wr*sqrt( sum(tmp)-(P-Q)^2/sum(x) ) ) # ASE0 - hypothesis of independence
  158.          )
  159.  
  160. }
  161.  
  162. SomersD.alt <- function(companies,PD) {
  163.  
  164.   PD        <- PD/100
  165.   companies <- companies/100
  166.  
  167.   defaulted    <- companies*PD
  168.  
  169.   nondefaulted <- companies*(1-PD)
  170.  
  171.   n <- sum(companies)
  172.  
  173.   portfolio <- rbind(defaulted,nondefaulted)/n
  174.  
  175.   calc.Sd(portfolio)
  176.  
  177. }
  178.  
  179. ## example:
  180. # PD        <- c(0.05,0.10,0.50,1,2,5,25)
  181. # companies <- c(5,10,20,25,20,15,5)
  182. # SomersD(companies,PD)
  183. # # verification:
  184. # PD <- PD/100
  185. # companies <- companies/100
  186. # defaulted    <- companies*PD
  187. # nondefaulted <- companies*(1-PD)
  188. # n <- sum(companies)
  189. # portfolio <- rbind(defaulted,nondefaulted)/n
  190. # calc.Sd(portfolio)
  191.  
  192. ### CALIBRATION
  193.  
  194. # PD - estimated/predicted PD for a particular risk class
  195. # defaults  - vector of actual defaults over time
  196. # companies - vector of companies over time
  197. NormalTest <- function(PD, defaulted, companies, alpha) {
  198.  
  199.   nT <- length(defaulted)
  200.  
  201.   default.rate <- defaulted/companies # annual defaults rate
  202.  
  203.   # unbiased estimator
  204.   dr.sum  <- sum(default.rate)
  205.   dr.sd2  <- 1/(nT-1) * sum( ( default.rate-1/nT*dr.sum )^2 )
  206.  
  207.   # reject? H0: realized Default Rate  <= q
  208.   #         H1: realized Default Rate  > q
  209.   reject <- round( 1/nT*dr.sum, digits=p) > round( PD+sqrt(dr.sd2/nT)*pnorm(1-alpha)^1, digits=p)
  210.  
  211.   return( list( reject  = reject,
  212.                 mean.dr = mean(default.rate)))
  213.  
  214. }
  215.  
  216. # companies - number of companies in portfolio
  217. HosmerLemeshow <- function (PD,defaulted,companies) {
  218.  
  219.   H <- sum( ((companies*PD-defaulted)^2) / (companies*PD*(1-PD)) )
  220.  
  221.   return( list(H=H, pvalue=1-pchisq(H,df=2)) )
  222.  
  223. }
  224.  
  225. # y  - vector 1/0 - defaulter/non-defaulter
  226. # PD -  PDs for counterparties
  227. # compositions -> Gauss.test
  228. Spiegelhalter <- function(y,PD) {
  229.  
  230.   n <- length(y)
  231.  
  232.   if (n!=length(PD)) error("y and PD sizes don't match.")
  233.  
  234.   # sd(MSE)
  235.   den <- sqrt( 1/n^2 * sum( (1-2*PD)^2*PD*(1-PD) ) )
  236.  
  237.   # [ MSE-E(MSE) ] / sd(MSE)
  238.   Z   <- (mean((y-PD)^2) - mean(PD*(1-PD))) / den
  239.  
  240.   return( list(Z=Z, Gauss=Gauss.test(Z)) )
  241.  
  242. }
  243.  
  244. ## Example:
  245. # PD <- c(0.01,0.02,0.01,0.05,0.20,0.05,0.33,0.02,0.05,0.20)
  246. # y  <- c(   0,   0,   0,   0,   0,   0,   1,   0,   0,   1)
  247. # Spiegelhalter(y,PD)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top