Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- n_obs = 100
- spike_perc = .1
- ###################
- set.seed(123)
- n_spikes = floor(spike_perc * n_obs)
- inter_arrival = rexp(n_obs)
- xv = cumsum(inter_arrival)
- mu_spike = 5
- sd_spike = sqrt(mu_spike)
- sd_noise = mu_spike/75
- spike_events = sample(n_obs, n_spikes)
- flat_noise = rnorm(n_obs, 0, sd_noise)
- spike_ampl = abs(rnorm(n_spikes, mu_spike, sd_spike))
- yv = rep(0, n_obs)
- yv[spike_events] = yv[spike_events] + spike_ampl
- yv = cumsum(yv)
- yv = yv + flat_noise
- dat = c(0,diff(yv))
- library(igraph)
- # Helper function. Equivalently, use mefa::stack
- row_col_from_condensed_index = function(n, ix){
- nr=ceiling(n-(1+sqrt(1+4*(n^2-n-2*ix)))/2)
- nc=n-(2*n-nr+1)*nr/2+ix+nr
- cbind(nr,nc)
- }
- # Algorithm parameters, may require tuning
- rq = .2 # distance quantile for thresholding (higher->fewer outliers)
- p = .1 # percent of population necessary for a component to be flagged as an inlier
- method='euclidean'
- # calculate inter-observation distances and threshold to define adjacency matrix
- n=NROW(dat)
- d = dist(dat, method=method)
- r = quantile(d, rq)
- edges = t(sapply(which(d<=r), function(x) row_col_from_condensed_index(n,x)))
- # build adjacency graph
- g = graph.empty(n, directed=FALSE)
- g[from=edges[,1], to=edges[,2]]=1
- #g=nng(data.frame(dat), k=5, use.fnn=TRUE)
- V(g)$name = 1:n
- # Flag outliers based on component size
- components = clusters(g, mode="strong")
- csize = components$csize[components$membership]
- outliers = V(g)[csize<=p*n]$name
Add Comment
Please, Sign In to add comment