Guest User

Untitled

a guest
Dec 5th, 2016
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.29 KB | None | 0 0
  1. import seaborn as sns
  2. import pymc3 as pm
  3. import numpy as np
  4. from scipy.stats import norm
  5. import pandas as pd
  6.  
  7. import theano.tensor as T
  8. from theano.compile.ops import as_op
  9.  
  10. # Generate True data
  11. n = 500
  12. theta_true = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5]
  13. mu_true = 1
  14. sigma_true = 2.5
  15. true_underlying = np.random.normal(mu_true, sigma_true, size=n)
  16. true_bucketed = np.digitize(true_underlying, theta_true)
  17.  
  18. # we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity.
  19. n_y_levels = len(theta_true) + 1
  20. thresh_mus = [k + .5 for k in range(1, n_y_levels)]
  21. thresh_mus_observed = np.array(thresh_mus)
  22. thresh_mus_observed[1:-1] = -999
  23. thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)
  24.  
  25. @as_op(itypes=[T.dvector, T.dscalar, T.dscalar], otypes=[T.dvector])
  26. def bucket_probabilities(theta, mu, sigma):
  27. """Given cutpoints and parameters of normal distribution, calculate probabilities for each ordinal outcome."""
  28. out = np.empty(n_y_levels)
  29. n = norm(loc=mu, scale=sigma)
  30. out[0] = n.cdf(theta[0])
  31. out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])])
  32. out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])])
  33. out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])])
  34. out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])])
  35. out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])])
  36. out[6] = 1 - n.cdf(theta[5])
  37. return out
  38.  
  39. with pm.Model() as ordinal_model:
  40. theta = pm.Normal('theta',
  41. mu=thresh_mus,
  42. tau=np.repeat(.5**2, len(thresh_mus)),
  43. shape=len(thresh_mus),
  44. observed=thresh_mus_observed,
  45. testval=thresh_mus[1:-1])
  46. mu = pm.Normal('mu',
  47. mu=n_y_levels/2.0,
  48. tau=1.0/(n_y_levels**2),
  49. testval=1.0)
  50. sigma = pm.Uniform('sigma',
  51. n_y_levels/1000.0,
  52. n_y_levels*10.0,
  53. testval=2.5)
  54.  
  55. pr = bucket_probabilities(theta, mu, sigma)
  56.  
  57. obs = pm.Categorical('obs', pr, observed=true_bucketed)
  58.  
  59. # must specify Metropolis because Theano can't compute gradient of bucket_probabilities
  60. step = pm.Metropolis([theta, mu, sigma, pr, obs])
  61. trace = pm.sample(8000, step=step)
Add Comment
Please, Sign In to add comment