Untitled

# Function support for the math final

ll <- c('dplyr','magrittr','tidyr','ggplot2','cowplot','ggrepel','GGally','broom','forcats',
        'stringr','reshape2','gridExtra','grid','latex2exp','MASS')
sapply(ll,function(l) require(l,character.only = T))

# Clean up
rm(list=ls())
dir <- 'C:/Users/erikinwest/Documents/Courses/MATH_896/final/'
setwd(dir)
source('C:/Users/erikinwest/Documents/R/funzies.R')

options(max.print = 75)

################################################
library(gtools)


# Set the theta/n we care about
theta <- 1
n <- 10

# Make a function that will sample from our weird distribution
rtheta <- function(theta,n) {
  # Sample from 0 to 3 theta
  samp <- runif(n,0,4*theta)
  # Find the "good" index
  good.idx <- (samp<=theta & samp>=0) | (samp>=2*theta & samp <=3*theta)
  good.samp <- samp[good.idx]
  # With probability 10% take one of the bad guys
  if( 0.9 < runif(1) ) {
    good.samp <- c(good.samp,samp[!good.idx][1])
    # print ('bad!')
  } else { }
  # Return
  return(good.samp)
}

# Now we get our three statistics
tX <- function(x) {
  x <- sort(x)
  # Get maximum
  m <- max(x)
  # Convert data to distance from max/2
  x2m <- x - m/2
  # If only positive/negative then return the first as such
  if (all(x2m>0)) {
    fl <- x[length(x)]
    fr <- x[1]
  } else if (all(x2m<0)) {
    fl <- x[length(x)]
    fr <- x[1]
  } else {
    # Find the first point to the left
    idx.fl <- which(x2m<0)
    idx.fl <- idx.fl[length(idx.fl)]
    fl <- x[idx.fl]
    fr <- x[idx.fl+1]
  }
  # Return
  return(c(m,fl,fr))
}

# Now do the check
check.stat <- function(t,theta) {
  # case 1: the max is less than theta
  c1 <- (findInterval(t[1],c(0,theta),rightmost.closed = T,left.open = T)==1)*1
  # Case 2: fl and fr are between 2theta and 3theta (max < 3*theta)
  c2 <- (findInterval(t[2],c(2*theta,3*theta),rightmost.closed = T,left.open = T)==1) *
    (findInterval(t[3],c(2*theta,3*theta),rightmost.closed = T,left.open = T)==1) *
    (t[1]<=3*theta)
  # Case 3: fl between 0,theta, fr between 2theta,3theta
  c3 <- (findInterval(t[2],c(0,theta),rightmost.closed = T,left.open = T)==1) *
    (findInterval(t[3],c(2*theta,3*theta),rightmost.closed = T,left.open = T)==1)*
    (t[1]<=3*theta)
  # Check if any conditions are met
  check <- any(c(c1,c2,c3)==1)
  return(check)
}

# Now we do some simulations
nsim <- 1000
sim.df <- data.frame(matrix(NA,ncol=2,nrow=nsim)) %>% set_colnames(c('lv','check')) %>% tbl_df
dstore <- list()
set.seed(nsim)
for (k in 1:nsim) {
  l <- 1
  while(l < 2) {
    # Generate some data (at least length 2)
    data <- rtheta(theta=1,n=10)
    l <- length(data)
  }
  # Get the statistics
  t <- data %>% tX
  # Check the data
  accept <- t %>% check.stat(theta=1)
  # Store
  sim.df[k,] <- c(data[length(data)],accept)
  names(data) <- paste('v',1:length(data),sep='')
  dstore[[k]] <- data
}

# -------- CHECK NOTHING SLIPPED THROUGH! ---------- #

d.df <- do.call('smartbind',dstore) %>% tbl_df %>% mutate(rid=1:nrow(.))
# Are all obs below 0 and above 3*theta spotted as bad?
sim.df %>% filter(lv<0 | lv>3*theta) %>% use_series(check) %>% table
# Are all obs between theta,2*theta spotted as base
sim.df %>% mutate(rid=1:nrow(.)) %>% filter(lv>theta & lv<2*theta) %>% use_series(check) %>% table