Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Function support for the math final
- ll <- c('dplyr','magrittr','tidyr','ggplot2','cowplot','ggrepel','GGally','broom','forcats',
- 'stringr','reshape2','gridExtra','grid','latex2exp','MASS')
- sapply(ll,function(l) require(l,character.only = T))
- # Clean up
- rm(list=ls())
- dir <- 'C:/Users/erikinwest/Documents/Courses/MATH_896/final/'
- setwd(dir)
- source('C:/Users/erikinwest/Documents/R/funzies.R')
- options(max.print = 75)
- ################################################
- library(gtools)
- # Set the theta/n we care about
- theta <- 1
- n <- 10
- # Make a function that will sample from our weird distribution
- rtheta <- function(theta,n) {
- # Sample from 0 to 3 theta
- samp <- runif(n,0,4*theta)
- # Find the "good" index
- good.idx <- (samp<=theta & samp>=0) | (samp>=2*theta & samp <=3*theta)
- good.samp <- samp[good.idx]
- # With probability 10% take one of the bad guys
- if( 0.9 < runif(1) ) {
- good.samp <- c(good.samp,samp[!good.idx][1])
- # print ('bad!')
- } else { }
- # Return
- return(good.samp)
- }
- # Now we get our three statistics
- tX <- function(x) {
- x <- sort(x)
- # Get maximum
- m <- max(x)
- # Convert data to distance from max/2
- x2m <- x - m/2
- # If only positive/negative then return the first as such
- if (all(x2m>0)) {
- fl <- x[length(x)]
- fr <- x[1]
- } else if (all(x2m<0)) {
- fl <- x[length(x)]
- fr <- x[1]
- } else {
- # Find the first point to the left
- idx.fl <- which(x2m<0)
- idx.fl <- idx.fl[length(idx.fl)]
- fl <- x[idx.fl]
- fr <- x[idx.fl+1]
- }
- # Return
- return(c(m,fl,fr))
- }
- # Now do the check
- check.stat <- function(t,theta) {
- # case 1: the max is less than theta
- c1 <- (findInterval(t[1],c(0,theta),rightmost.closed = T,left.open = T)==1)*1
- # Case 2: fl and fr are between 2theta and 3theta (max < 3*theta)
- c2 <- (findInterval(t[2],c(2*theta,3*theta),rightmost.closed = T,left.open = T)==1) *
- (findInterval(t[3],c(2*theta,3*theta),rightmost.closed = T,left.open = T)==1) *
- (t[1]<=3*theta)
- # Case 3: fl between 0,theta, fr between 2theta,3theta
- c3 <- (findInterval(t[2],c(0,theta),rightmost.closed = T,left.open = T)==1) *
- (findInterval(t[3],c(2*theta,3*theta),rightmost.closed = T,left.open = T)==1)*
- (t[1]<=3*theta)
- # Check if any conditions are met
- check <- any(c(c1,c2,c3)==1)
- return(check)
- }
- # Now we do some simulations
- nsim <- 1000
- sim.df <- data.frame(matrix(NA,ncol=2,nrow=nsim)) %>% set_colnames(c('lv','check')) %>% tbl_df
- dstore <- list()
- set.seed(nsim)
- for (k in 1:nsim) {
- l <- 1
- while(l < 2) {
- # Generate some data (at least length 2)
- data <- rtheta(theta=1,n=10)
- l <- length(data)
- }
- # Get the statistics
- t <- data %>% tX
- # Check the data
- accept <- t %>% check.stat(theta=1)
- # Store
- sim.df[k,] <- c(data[length(data)],accept)
- names(data) <- paste('v',1:length(data),sep='')
- dstore[[k]] <- data
- }
- # -------- CHECK NOTHING SLIPPED THROUGH! ---------- #
- d.df <- do.call('smartbind',dstore) %>% tbl_df %>% mutate(rid=1:nrow(.))
- # Are all obs below 0 and above 3*theta spotted as bad?
- sim.df %>% filter(lv<0 | lv>3*theta) %>% use_series(check) %>% table
- # Are all obs between theta,2*theta spotted as base
- sim.df %>% mutate(rid=1:nrow(.)) %>% filter(lv>theta & lv<2*theta) %>% use_series(check) %>% table
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement