mjaniec

Scorecard validation tests

Jan 9th, 2014
312
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # visit: http://www.reakkt.com/2014/01/basics-of-scorecard-model-validation.html
  2.  
  3. ### MODEL VALIDATION TESTS
  4.  
  5. library(compositions) # Gauss.test -> Spiegelhalter
  6. source("measures2.R") # calc.Sd - alternative Somer's D
  7.  
  8. ## DISCRIMINATORY POWER
  9.  
  10. ## GAUROC
  11. # s - test score
  12. # r - reference score
  13. GAUROC <- function(s,r) {
  14.  
  15.   n1 <- 0
  16.   n2 <- 0
  17.   n3 <- 0
  18.  
  19.   n <- length(s)
  20.  
  21.   for (i in 1:n) {
  22.    
  23.     Rless <- which(r[i]<r)
  24.    
  25.     n1    <- n1+length(intersect(which(s[i]<s),Rless))
  26.    
  27.     n2    <- n2+length(intersect(which(s[i]==s),Rless))
  28.    
  29.     n3    <- n3+length(Rless)  
  30.    
  31.   }
  32.  
  33.   return((n1+1/2*n2)/n3)
  34.  
  35. }
  36.  
  37. # for reference
  38. GAUROCslow <- function(scoreS,scoreR) {
  39.  
  40.   n1 <- 0
  41.   n2 <- 0
  42.   n3 <- 0
  43.  
  44.   n <- length(scoreS)
  45.  
  46.   for (i in 1:n) {
  47.    
  48.     for (j in 1:n) {
  49.      
  50.       if (scoreS[i]<scoreS[j]  && scoreR[i]<scoreR[j]) n1 <- n1+1
  51.      
  52.       if (scoreS[i]==scoreS[j] && scoreR[i]<scoreR[j]) n2 <- n2+1
  53.      
  54.       if (scoreR[i]<scoreR[j]) n3 <- n3+1        
  55.      
  56.     }
  57.    
  58.   }
  59.  
  60.   # n1;n2;n3
  61.  
  62.   return((n1+1/2*n2)/n3)
  63.  
  64. }
  65.  
  66. ## Somers'D + ASE (Asymetric Standard Error)
  67.  
  68. # companies - number of companies in portfolio
  69. SomersD <- function(companies,PD) {
  70.   # 2DO: needs cleaning
  71.  
  72.   PD    <- PD/100
  73.   total <- companies/sum(companies)
  74.  
  75.   defaulted    <- total*PD
  76.   nondefaulted <- total*(1-PD)
  77.  
  78.   n <- sum(total)
  79.  
  80.   portfolio <- rbind(defaulted,nondefaulted)/n
  81.  
  82.   x <- portfolio
  83.  
  84.   wr <- n^2-sum(sapply(1:nrow(x), function(i) sum(x[i,])^2))
  85.  
  86.   A <- function(x,i,j) {
  87.    
  88.     xr <- nrow(x)
  89.     xc <- ncol(x)
  90.    
  91.     sum(x[1:xr>i,1:xc>j])+sum(x[1:xr<i,1:xc<j])
  92.    
  93.   }
  94.  
  95.   D <- function(x,i,j) {
  96.    
  97.     xr <- nrow(x)
  98.     xc <- ncol(x)
  99.    
  100.     sum(x[1:xr>i,1:xc<j])+sum(x[1:xr<i,1:xc>j])
  101.    
  102.   }
  103.  
  104.   Pf <- function(x) {
  105.    
  106.     xr <- nrow(x)
  107.     xc <- ncol(x)
  108.    
  109.     tmp <- NULL
  110.    
  111.     for (i in 1:xr)
  112.       for (j in 1:xc)
  113.        
  114.         tmp <- c(tmp,x[i,j]*A(x,i,j))
  115.    
  116.     sum(tmp)
  117.    
  118.   }
  119.  
  120.   Qf <- function(x) {
  121.    
  122.     xr <- nrow(x)
  123.     xc <- ncol(x)
  124.    
  125.     tmp <- NULL
  126.    
  127.     for (i in 1:xr)
  128.       for (j in 1:xc)
  129.        
  130.         tmp <- c(tmp,x[i,j]*D(x,i,j))
  131.    
  132.     sum(tmp)
  133.    
  134.   }
  135.  
  136.   P <- Pf(x)
  137.   Q <- Qf(x)
  138.  
  139.   # ASE1
  140.   tmp2 <- NULL
  141.   for (i in 1:nrow(x))
  142.     for (j in 1:ncol(x))  
  143.       tmp2 <- c( tmp2, x[i,j] * ( wr*(A(x,i,j)-D(x,i,j)) - (P-Q)*(sum(x)-sum(x[i,])) )^2 )
  144.  
  145.   # ASE0
  146.   tmp <- NULL
  147.   for (i in 1:nrow(x))
  148.     for (j in 1:ncol(x))  
  149.       tmp <- c( tmp, x[i,j]*(A(x,i,j)-D(x,i,j))^2 )
  150.  
  151.   P <- Pf(x)
  152.   Q <- Qf(x)
  153.  
  154.   return(
  155.           list( SomersD=(P-Q)/wr,
  156.                 ASE1=2/wr^2*sqrt(sum(tmp2)),                # ASE1
  157.                 ASE0=2/wr*sqrt( sum(tmp)-(P-Q)^2/sum(x) ) ) # ASE0 - hypothesis of independence
  158.          )
  159.  
  160. }
  161.  
  162. SomersD.alt <- function(companies,PD) {
  163.  
  164.   PD        <- PD/100
  165.   companies <- companies/100
  166.  
  167.   defaulted    <- companies*PD
  168.  
  169.   nondefaulted <- companies*(1-PD)
  170.  
  171.   n <- sum(companies)
  172.  
  173.   portfolio <- rbind(defaulted,nondefaulted)/n
  174.  
  175.   calc.Sd(portfolio)
  176.  
  177. }
  178.  
  179. ## example:
  180. # PD        <- c(0.05,0.10,0.50,1,2,5,25)
  181. # companies <- c(5,10,20,25,20,15,5)
  182. # SomersD(companies,PD)
  183. # # verification:
  184. # PD <- PD/100
  185. # companies <- companies/100
  186. # defaulted    <- companies*PD
  187. # nondefaulted <- companies*(1-PD)
  188. # n <- sum(companies)
  189. # portfolio <- rbind(defaulted,nondefaulted)/n
  190. # calc.Sd(portfolio)
  191.  
  192. ### CALIBRATION
  193.  
  194. # PD - estimated/predicted PD for a particular risk class
  195. # defaults  - vector of actual defaults over time
  196. # companies - vector of companies over time
  197. NormalTest <- function(PD, defaulted, companies, alpha) {
  198.  
  199.   nT <- length(defaulted)
  200.  
  201.   default.rate <- defaulted/companies # annual defaults rate
  202.  
  203.   # unbiased estimator
  204.   dr.sum  <- sum(default.rate)
  205.   dr.sd2  <- 1/(nT-1) * sum( ( default.rate-1/nT*dr.sum )^2 )
  206.  
  207.   # reject? H0: realized Default Rate  <= q
  208.   #         H1: realized Default Rate  > q
  209.   reject <- round( 1/nT*dr.sum, digits=p) > round( PD+sqrt(dr.sd2/nT)*pnorm(1-alpha)^1, digits=p)
  210.  
  211.   return( list( reject  = reject,
  212.                 mean.dr = mean(default.rate)))
  213.  
  214. }
  215.  
  216. # companies - number of companies in portfolio
  217. HosmerLemeshow <- function (PD,defaulted,companies) {
  218.  
  219.   H <- sum( ((companies*PD-defaulted)^2) / (companies*PD*(1-PD)) )
  220.  
  221.   return( list(H=H, pvalue=1-pchisq(H,df=2)) )
  222.  
  223. }
  224.  
  225. # y  - vector 1/0 - defaulter/non-defaulter
  226. # PD -  PDs for counterparties
  227. # compositions -> Gauss.test
  228. Spiegelhalter <- function(y,PD) {
  229.  
  230.   n <- length(y)
  231.  
  232.   if (n!=length(PD)) error("y and PD sizes don't match.")
  233.  
  234.   # sd(MSE)
  235.   den <- sqrt( 1/n^2 * sum( (1-2*PD)^2*PD*(1-PD) ) )
  236.  
  237.   # [ MSE-E(MSE) ] / sd(MSE)
  238.   Z   <- (mean((y-PD)^2) - mean(PD*(1-PD))) / den
  239.  
  240.   return( list(Z=Z, Gauss=Gauss.test(Z)) )
  241.  
  242. }
  243.  
  244. ## Example:
  245. # PD <- c(0.01,0.02,0.01,0.05,0.20,0.05,0.33,0.02,0.05,0.20)
  246. # y  <- c(   0,   0,   0,   0,   0,   0,   1,   0,   0,   1)
  247. # Spiegelhalter(y,PD)
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×