Scorecard validation tests

# visit: http://www.reakkt.com/2014/01/basics-of-scorecard-model-validation.html

### MODEL VALIDATION TESTS

library(compositions) # Gauss.test -> Spiegelhalter
source("measures2.R") # calc.Sd - alternative Somer's D

## DISCRIMINATORY POWER

## GAUROC
# s - test score
# r - reference score
GAUROC <- function(s,r) {

  n1 <- 0
  n2 <- 0
  n3 <- 0

  n <- length(s)

  for (i in 1:n) {

    Rless <- which(r[i]<r)

    n1    <- n1+length(intersect(which(s[i]<s),Rless))

    n2    <- n2+length(intersect(which(s[i]==s),Rless))

    n3    <- n3+length(Rless)

  }

  return((n1+1/2*n2)/n3)

}

# for reference
GAUROCslow <- function(scoreS,scoreR) {

  n1 <- 0
  n2 <- 0
  n3 <- 0

  n <- length(scoreS)

  for (i in 1:n) {

    for (j in 1:n) {

      if (scoreS[i]<scoreS[j]  && scoreR[i]<scoreR[j]) n1 <- n1+1

      if (scoreS[i]==scoreS[j] && scoreR[i]<scoreR[j]) n2 <- n2+1

      if (scoreR[i]<scoreR[j]) n3 <- n3+1

    }

  }

  # n1;n2;n3

  return((n1+1/2*n2)/n3)

}

## Somers'D + ASE (Asymetric Standard Error)

# companies - number of companies in portfolio
SomersD <- function(companies,PD) {
  # 2DO: needs cleaning

  PD    <- PD/100
  total <- companies/sum(companies)

  defaulted    <- total*PD
  nondefaulted <- total*(1-PD)

  n <- sum(total)

  portfolio <- rbind(defaulted,nondefaulted)/n

  x <- portfolio

  wr <- n^2-sum(sapply(1:nrow(x), function(i) sum(x[i,])^2))

  A <- function(x,i,j) {

    xr <- nrow(x)
    xc <- ncol(x)

    sum(x[1:xr>i,1:xc>j])+sum(x[1:xr<i,1:xc<j])

  }

  D <- function(x,i,j) {

    xr <- nrow(x)
    xc <- ncol(x)

    sum(x[1:xr>i,1:xc<j])+sum(x[1:xr<i,1:xc>j])

  }

  Pf <- function(x) {

    xr <- nrow(x)
    xc <- ncol(x)

    tmp <- NULL

    for (i in 1:xr)
      for (j in 1:xc)

        tmp <- c(tmp,x[i,j]*A(x,i,j))

    sum(tmp)

  }

  Qf <- function(x) {

    xr <- nrow(x)
    xc <- ncol(x)

    tmp <- NULL

    for (i in 1:xr)
      for (j in 1:xc)

        tmp <- c(tmp,x[i,j]*D(x,i,j))

    sum(tmp)

  }

  P <- Pf(x)
  Q <- Qf(x)

  # ASE1
  tmp2 <- NULL
  for (i in 1:nrow(x))
    for (j in 1:ncol(x))
      tmp2 <- c( tmp2, x[i,j] * ( wr*(A(x,i,j)-D(x,i,j)) - (P-Q)*(sum(x)-sum(x[i,])) )^2 )

  # ASE0
  tmp <- NULL
  for (i in 1:nrow(x))
    for (j in 1:ncol(x))
      tmp <- c( tmp, x[i,j]*(A(x,i,j)-D(x,i,j))^2 )

  P <- Pf(x)
  Q <- Qf(x)

  return(
          list( SomersD=(P-Q)/wr,
                ASE1=2/wr^2*sqrt(sum(tmp2)),                # ASE1
                ASE0=2/wr*sqrt( sum(tmp)-(P-Q)^2/sum(x) ) ) # ASE0 - hypothesis of independence
         )

}

SomersD.alt <- function(companies,PD) {

  PD        <- PD/100
  companies <- companies/100

  defaulted    <- companies*PD

  nondefaulted <- companies*(1-PD)

  n <- sum(companies)

  portfolio <- rbind(defaulted,nondefaulted)/n

  calc.Sd(portfolio)

}

## example:
# PD        <- c(0.05,0.10,0.50,1,2,5,25)
# companies <- c(5,10,20,25,20,15,5)
# SomersD(companies,PD)
# # verification:
# PD <- PD/100
# companies <- companies/100
# defaulted    <- companies*PD
# nondefaulted <- companies*(1-PD)
# n <- sum(companies)
# portfolio <- rbind(defaulted,nondefaulted)/n
# calc.Sd(portfolio)

### CALIBRATION

# PD - estimated/predicted PD for a particular risk class
# defaults  - vector of actual defaults over time
# companies - vector of companies over time
NormalTest <- function(PD, defaulted, companies, alpha) {

  nT <- length(defaulted)

  default.rate <- defaulted/companies # annual defaults rate

  # unbiased estimator
  dr.sum  <- sum(default.rate)
  dr.sd2  <- 1/(nT-1) * sum( ( default.rate-1/nT*dr.sum )^2 )

  # reject? H0: realized Default Rate  <= q
  #         H1: realized Default Rate  > q
  reject <- round( 1/nT*dr.sum, digits=p) > round( PD+sqrt(dr.sd2/nT)*pnorm(1-alpha)^1, digits=p)

  return( list( reject  = reject,
                mean.dr = mean(default.rate)))

}

# companies - number of companies in portfolio
HosmerLemeshow <- function (PD,defaulted,companies) {

  H <- sum( ((companies*PD-defaulted)^2) / (companies*PD*(1-PD)) )

  return( list(H=H, pvalue=1-pchisq(H,df=2)) )

}

# y  - vector 1/0 - defaulter/non-defaulter
# PD -  PDs for counterparties
# compositions -> Gauss.test
Spiegelhalter <- function(y,PD) {

  n <- length(y)

  if (n!=length(PD)) error("y and PD sizes don't match.")

  # sd(MSE)
  den <- sqrt( 1/n^2 * sum( (1-2*PD)^2*PD*(1-PD) ) )

  # [ MSE-E(MSE) ] / sd(MSE)
  Z   <- (mean((y-PD)^2) - mean(PD*(1-PD))) / den

  return( list(Z=Z, Gauss=Gauss.test(Z)) )

}

## Example:
# PD <- c(0.01,0.02,0.01,0.05,0.20,0.05,0.33,0.02,0.05,0.20)
# y  <- c(   0,   0,   0,   0,   0,   0,   1,   0,   0,   1)
# Spiegelhalter(y,PD)