Guest User

Untitled

a guest
Jan 18th, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.42 KB | None | 0 0
  1. n_obs = 100
  2. spike_perc = .1
  3.  
  4. ###################
  5.  
  6. set.seed(123)
  7.  
  8. n_spikes = floor(spike_perc * n_obs)
  9. inter_arrival = rexp(n_obs)
  10. xv = cumsum(inter_arrival)
  11.  
  12. mu_spike = 5
  13. sd_spike = sqrt(mu_spike)
  14. sd_noise = mu_spike/75
  15. spike_events = sample(n_obs, n_spikes)
  16.  
  17. flat_noise = rnorm(n_obs, 0, sd_noise)
  18. spike_ampl = abs(rnorm(n_spikes, mu_spike, sd_spike))
  19.  
  20. yv = rep(0, n_obs)
  21. yv[spike_events] = yv[spike_events] + spike_ampl
  22. yv = cumsum(yv)
  23. yv = yv + flat_noise
  24.  
  25. dat = c(0,diff(yv))
  26.  
  27. library(igraph)
  28.  
  29. # Helper function. Equivalently, use mefa::stack
  30. row_col_from_condensed_index = function(n, ix){
  31. nr=ceiling(n-(1+sqrt(1+4*(n^2-n-2*ix)))/2)
  32. nc=n-(2*n-nr+1)*nr/2+ix+nr
  33. cbind(nr,nc)
  34. }
  35.  
  36. # Algorithm parameters, may require tuning
  37. rq = .2 # distance quantile for thresholding (higher->fewer outliers)
  38. p = .1 # percent of population necessary for a component to be flagged as an inlier
  39. method='euclidean'
  40.  
  41. # calculate inter-observation distances and threshold to define adjacency matrix
  42. n=NROW(dat)
  43. d = dist(dat, method=method)
  44. r = quantile(d, rq)
  45. edges = t(sapply(which(d<=r), function(x) row_col_from_condensed_index(n,x)))
  46.  
  47. # build adjacency graph
  48. g = graph.empty(n, directed=FALSE)
  49. g[from=edges[,1], to=edges[,2]]=1
  50. #g=nng(data.frame(dat), k=5, use.fnn=TRUE)
  51. V(g)$name = 1:n
  52.  
  53. # Flag outliers based on component size
  54. components = clusters(g, mode="strong")
  55. csize = components$csize[components$membership]
  56. outliers = V(g)[csize<=p*n]$name
Add Comment
Please, Sign In to add comment