Advertisement
Guest User

Untitled

a guest
Apr 25th, 2017
579
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 56.90 KB | None | 0 0
  1. from __future__ import print_function
  2.  
  3. """
  4. Markov based methods for spatial dynamics.
  5. """
  6. __author__ = "Sergio J. Rey <srey@asu.edu"
  7.  
  8. __all__ = ["Markov", "LISA_Markov", "Spatial_Markov", "kullback",
  9. "prais", "shorrock", "homogeneity"]
  10.  
  11. import numpy as np
  12. from pysal.spatial_dynamics.ergodic import fmpt
  13. from pysal.spatial_dynamics.ergodic import steady_state as STEADY_STATE
  14. from scipy import stats
  15. from operator import gt
  16. import pysal
  17.  
  18. # TT predefine LISA transitions
  19. # TT[i,j] is the transition type from i to j
  20. # i = quadrant in period 0
  21. # j = quadrant in period 1
  22. # uses one offset so first row and col of TT are ignored
  23. TT = np.zeros((5, 5), int)
  24. c = 1
  25. for i in range(1, 5):
  26. for j in range(1, 5):
  27. TT[i, j] = c
  28. c += 1
  29.  
  30. # MOVE_TYPES is a dictionary that returns the move type of a LISA transition
  31. # filtered on the significance of the LISA end points
  32. # True indicates significant LISA in a particular period
  33. # e.g. a key of (1, 3, True, False) indicates a significant LISA located in
  34. # quadrant 1 in period 0 moved to quadrant 3 in period 1 but was not
  35. # significant in quadrant 3.
  36.  
  37. MOVE_TYPES = {}
  38. c = 1
  39. cases = (True, False)
  40. sig_keys = [(i, j) for i in cases for j in cases]
  41.  
  42. for i, sig_key in enumerate(sig_keys):
  43. c = 1 + i * 16
  44. for i in range(1, 5):
  45. for j in range(1, 5):
  46. key = (i, j, sig_key[0], sig_key[1])
  47. MOVE_TYPES[key] = c
  48. c += 1
  49.  
  50.  
  51. class Markov(object):
  52. """
  53. Classic Markov transition matrices.
  54.  
  55. Parameters
  56. ----------
  57. class_ids : array
  58. (n, t), one row per observation, one column recording the
  59. state of each observation, with as many columns as time
  60. periods.
  61. classes : array
  62. (k, 1), all different classes (bins) of the matrix.
  63.  
  64. Attributes
  65. ----------
  66. p : matrix
  67. (k, k), transition probability matrix.
  68. steady_state : matrix
  69. (k, 1), ergodic distribution.
  70. transitions : matrix
  71. (k, k), count of transitions between each state i and j.
  72.  
  73. Examples
  74. --------
  75. >>> c = [['b','a','c'],['c','c','a'],['c','b','c']]
  76. >>> c.extend([['a','a','b'], ['a','b','c']])
  77. >>> c = np.array(c)
  78. >>> m = Markov(c)
  79. >>> m.classes.tolist()
  80. ['a', 'b', 'c']
  81. >>> m.p
  82. matrix([[ 0.25 , 0.5 , 0.25 ],
  83. [ 0.33333333, 0. , 0.66666667],
  84. [ 0.33333333, 0.33333333, 0.33333333]])
  85. >>> m.steady_state
  86. matrix([[ 0.30769231],
  87. [ 0.28846154],
  88. [ 0.40384615]])
  89.  
  90. US nominal per capita income 48 states 81 years 1929-2009
  91.  
  92. >>> import pysal
  93. >>> f = pysal.open(pysal.examples.get_path("usjoin.csv"))
  94. >>> pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
  95.  
  96. set classes to quintiles for each year
  97.  
  98. >>> q5 = np.array([pysal.Quantiles(y).yb for y in pci]).transpose()
  99. >>> m = Markov(q5)
  100. >>> m.transitions
  101. array([[ 729., 71., 1., 0., 0.],
  102. [ 72., 567., 80., 3., 0.],
  103. [ 0., 81., 631., 86., 2.],
  104. [ 0., 3., 86., 573., 56.],
  105. [ 0., 0., 1., 57., 741.]])
  106. >>> m.p
  107. matrix([[ 0.91011236, 0.0886392 , 0.00124844, 0. , 0. ],
  108. [ 0.09972299, 0.78531856, 0.11080332, 0.00415512, 0. ],
  109. [ 0. , 0.10125 , 0.78875 , 0.1075 , 0.0025 ],
  110. [ 0. , 0.00417827, 0.11977716, 0.79805014, 0.07799443],
  111. [ 0. , 0. , 0.00125156, 0.07133917, 0.92740926]])
  112. >>> m.steady_state
  113. matrix([[ 0.20774716],
  114. [ 0.18725774],
  115. [ 0.20740537],
  116. [ 0.18821787],
  117. [ 0.20937187]])
  118.  
  119. Relative incomes
  120.  
  121. >>> pci = pci.transpose()
  122. >>> rpci = pci/(pci.mean(axis=0))
  123. >>> rq = pysal.Quantiles(rpci.flatten()).yb
  124. >>> rq.shape = (48,81)
  125. >>> mq = Markov(rq)
  126. >>> mq.transitions
  127. array([[ 707., 58., 7., 1., 0.],
  128. [ 50., 629., 80., 1., 1.],
  129. [ 4., 79., 610., 73., 2.],
  130. [ 0., 7., 72., 650., 37.],
  131. [ 0., 0., 0., 48., 724.]])
  132. >>> mq.steady_state
  133. matrix([[ 0.17957376],
  134. [ 0.21631443],
  135. [ 0.21499942],
  136. [ 0.21134662],
  137. [ 0.17776576]])
  138.  
  139. """
  140. def __init__(self, class_ids, classes=None):
  141. if classes is not None:
  142. self.classes = classes
  143. else:
  144. self.classes = np.unique(class_ids)
  145.  
  146. n, t = class_ids.shape
  147. k = len(self.classes)
  148. js = range(t - 1)
  149.  
  150. classIds = self.classes.tolist()
  151. transitions = np.zeros((k, k))
  152. for state_0 in js:
  153. state_1 = state_0 + 1
  154. state_0 = class_ids[:, state_0]
  155. state_1 = class_ids[:, state_1]
  156. initial = np.unique(state_0)
  157. for i in initial:
  158. ending = state_1[state_0 == i]
  159. uending = np.unique(ending)
  160. row = classIds.index(i)
  161. for j in uending:
  162. col = classIds.index(j)
  163. transitions[row, col] += sum(ending == j)
  164. self.transitions = transitions
  165. row_sum = transitions.sum(axis=1)
  166. p = np.dot(np.diag(1 / (row_sum + (row_sum == 0))), transitions)
  167. self.p = np.matrix(p)
  168.  
  169. @property
  170. def steady_state(self):
  171. if not hasattr(self, '_steady_state'):
  172. self._steady_state = STEADY_STATE(self.p)
  173. return self._steady_state
  174.  
  175.  
  176. class Spatial_Markov(object):
  177. """
  178. Markov transitions conditioned on the value of the spatial lag.
  179.  
  180. Parameters
  181. ----------
  182. y : array
  183. (n,t), one row per observation, one column per state of
  184. each observation, with as many columns as time periods.
  185. w : W
  186. spatial weights object.
  187. k : integer
  188. number of classes (quantiles).
  189. permutations : int, optional
  190. number of permutations for use in randomization based
  191. inference (the default is 0).
  192. fixed : bool
  193. If true, quantiles are taken over the entire n*t
  194. pooled series. If false, quantiles are taken each
  195. time period over n.
  196. variable_name : string
  197. name of variable.
  198.  
  199. Attributes
  200. ----------
  201. p : matrix
  202. (k, k), transition probability matrix for a-spatial
  203. Markov.
  204. s : matrix
  205. (k, 1), ergodic distribution for a-spatial Markov.
  206. transitions : matrix
  207. (k, k), counts of transitions between each state i and j
  208. for a-spatial Markov.
  209. T : matrix
  210. (k, k, k), counts of transitions for each conditional
  211. Markov. T[0] is the matrix of transitions for
  212. observations with lags in the 0th quantile; T[k-1] is the
  213. transitions for the observations with lags in the k-1th.
  214. P : matrix
  215. (k, k, k), transition probability matrix for spatial
  216. Markov first dimension is the conditioned on the lag.
  217. S : matrix
  218. (k, k), steady state distributions for spatial Markov.
  219. Each row is a conditional steady_state.
  220. F : matrix
  221. (k, k, k),first mean passage times.
  222. First dimension is conditioned on the lag.
  223. shtest : list
  224. (k elements), each element of the list is a tuple for a
  225. multinomial difference test between the steady state
  226. distribution from a conditional distribution versus the
  227. overall steady state distribution: first element of the
  228. tuple is the chi2 value, second its p-value and the third
  229. the degrees of freedom.
  230. chi2 : list
  231. (k elements), each element of the list is a tuple for a
  232. chi-squared test of the difference between the
  233. conditional transition matrix against the overall
  234. transition matrix: first element of the tuple is the chi2
  235. value, second its p-value and the third the degrees of
  236. freedom.
  237. x2 : float
  238. sum of the chi2 values for each of the conditional tests.
  239. Has an asymptotic chi2 distribution with k(k-1)(k-1)
  240. degrees of freedom. Under the null that transition
  241. probabilities are spatially homogeneous.
  242. (see chi2 above)
  243. x2_dof : int
  244. degrees of freedom for homogeneity test.
  245. x2_pvalue : float
  246. pvalue for homogeneity test based on analytic.
  247. distribution
  248. x2_rpvalue : float
  249. (if permutations>0)
  250. pseudo p-value for x2 based on random spatial
  251. permutations of the rows of the original transitions.
  252. x2_realizations : array
  253. (permutations,1), the values of x2 for the random
  254. permutations.
  255. Q : float
  256. Chi-square test of homogeneity across lag classes based
  257. on Bickenbach and Bode (2003) [Bickenbach2003]_.
  258. Q_p_value : float
  259. p-value for Q.
  260. LR : float
  261. Likelihood ratio statistic for homogeneity across lag
  262. classes based on Bickenback and Bode (2003)
  263. [Bickenbach2003]_.
  264. LR_p_value : float
  265. p-value for LR.
  266. dof_hom : int
  267. degrees of freedom for LR and Q, corrected for 0 cells.
  268.  
  269. Notes
  270. -----
  271. Based on Rey (2001) [Rey2001]_.
  272.  
  273. The shtest and chi2 tests should be used with caution as they are based on
  274. classic theory assuming random transitions. The x2 based test is
  275. preferable since it simulates the randomness under the null. It is an
  276. experimental test requiring further analysis.
  277.  
  278. This is new
  279.  
  280. Examples
  281. --------
  282. >>> import pysal as ps
  283. >>> f = ps.open(ps.examples.get_path("usjoin.csv"))
  284. >>> pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
  285. >>> pci = pci.transpose()
  286. >>> rpci = pci/(pci.mean(axis=0))
  287. >>> w = ps.open(ps.examples.get_path("states48.gal")).read()
  288. >>> w.transform = 'r'
  289. >>> sm = ps.Spatial_Markov(rpci, w, fixed=True, k=5, variable_name='rpci')
  290. >>> for p in sm.P:
  291. ... print(p)
  292. ...
  293. [[ 0.96341463 0.0304878 0.00609756 0. 0. ]
  294. [ 0.06040268 0.83221477 0.10738255 0. 0. ]
  295. [ 0. 0.14 0.74 0.12 0. ]
  296. [ 0. 0.03571429 0.32142857 0.57142857 0.07142857]
  297. [ 0. 0. 0. 0.16666667 0.83333333]]
  298. [[ 0.79831933 0.16806723 0.03361345 0. 0. ]
  299. [ 0.0754717 0.88207547 0.04245283 0. 0. ]
  300. [ 0.00537634 0.06989247 0.8655914 0.05913978 0. ]
  301. [ 0. 0. 0.06372549 0.90196078 0.03431373]
  302. [ 0. 0. 0. 0.19444444 0.80555556]]
  303. [[ 0.84693878 0.15306122 0. 0. 0. ]
  304. [ 0.08133971 0.78947368 0.1291866 0. 0. ]
  305. [ 0.00518135 0.0984456 0.79274611 0.0984456 0.00518135]
  306. [ 0. 0. 0.09411765 0.87058824 0.03529412]
  307. [ 0. 0. 0. 0.10204082 0.89795918]]
  308. [[ 0.8852459 0.09836066 0. 0.01639344 0. ]
  309. [ 0.03875969 0.81395349 0.13953488 0. 0.00775194]
  310. [ 0.0049505 0.09405941 0.77722772 0.11881188 0.0049505 ]
  311. [ 0. 0.02339181 0.12865497 0.75438596 0.09356725]
  312. [ 0. 0. 0. 0.09661836 0.90338164]]
  313. [[ 0.33333333 0.66666667 0. 0. 0. ]
  314. [ 0.0483871 0.77419355 0.16129032 0.01612903 0. ]
  315. [ 0.01149425 0.16091954 0.74712644 0.08045977 0. ]
  316. [ 0. 0.01036269 0.06217617 0.89637306 0.03108808]
  317. [ 0. 0. 0. 0.02352941 0.97647059]]
  318.  
  319.  
  320. The probability of a poor state remaining poor is 0.963 if their
  321. neighbors are in the 1st quintile and 0.798 if their neighbors are
  322. in the 2nd quintile. The probability of a rich economy remaining
  323. rich is 0.976 if their neighbors are in the 5th quintile, but if their
  324. neighbors are in the 4th quintile this drops to 0.903.
  325.  
  326. The Q and likelihood ratio statistics are both significant indicating
  327. the dynamics are not homogeneous across the lag classes:
  328.  
  329. >>> "%.3f"%sm.LR
  330. '170.659'
  331. >>> "%.3f"%sm.Q
  332. '200.624'
  333. >>> "%.3f"%sm.LR_p_value
  334. '0.000'
  335. >>> "%.3f"%sm.Q_p_value
  336. '0.000'
  337. >>> sm.dof_hom
  338. 60
  339.  
  340. The long run distribution for states with poor (rich) neighbors has
  341. 0.435 (0.018) of the values in the first quintile, 0.263 (0.200) in
  342. the second quintile, 0.204 (0.190) in the third, 0.0684 (0.255) in the
  343. fourth and 0.029 (0.337) in the fifth quintile.
  344.  
  345. >>> sm.S
  346. array([[ 0.43509425, 0.2635327 , 0.20363044, 0.06841983, 0.02932278],
  347. [ 0.13391287, 0.33993305, 0.25153036, 0.23343016, 0.04119356],
  348. [ 0.12124869, 0.21137444, 0.2635101 , 0.29013417, 0.1137326 ],
  349. [ 0.0776413 , 0.19748806, 0.25352636, 0.22480415, 0.24654013],
  350. [ 0.01776781, 0.19964349, 0.19009833, 0.25524697, 0.3372434 ]])
  351.  
  352. States with incomes in the first quintile with neighbors in the
  353. first quintile return to the first quartile after 2.298 years, after
  354. leaving the first quintile. They enter the fourth quintile after
  355. 80.810 years after leaving the first quintile, on average.
  356. Poor states within neighbors in the fourth quintile return to the
  357. first quintile, on average, after 12.88 years, and would enter the
  358. fourth quintile after 28.473 years.
  359.  
  360. >>> for f in sm.F:
  361. ... print(f)
  362. ...
  363. [[ 2.29835259 28.95614035 46.14285714 80.80952381 279.42857143]
  364. [ 33.86549708 3.79459555 22.57142857 57.23809524 255.85714286]
  365. [ 43.60233918 9.73684211 4.91085714 34.66666667 233.28571429]
  366. [ 46.62865497 12.76315789 6.25714286 14.61564626 198.61904762]
  367. [ 52.62865497 18.76315789 12.25714286 6. 34.1031746 ]]
  368. [[ 7.46754205 9.70574606 25.76785714 74.53116883 194.23446197]
  369. [ 27.76691978 2.94175577 24.97142857 73.73474026 193.4380334 ]
  370. [ 53.57477715 28.48447637 3.97566318 48.76331169 168.46660482]
  371. [ 72.03631562 46.94601483 18.46153846 4.28393653 119.70329314]
  372. [ 77.17917276 52.08887197 23.6043956 5.14285714 24.27564033]]
  373. [[ 8.24751154 6.53333333 18.38765432 40.70864198 112.76732026]
  374. [ 47.35040872 4.73094099 11.85432099 34.17530864 106.23398693]
  375. [ 69.42288828 24.76666667 3.794921 22.32098765 94.37966594]
  376. [ 83.72288828 39.06666667 14.3 3.44668119 76.36702977]
  377. [ 93.52288828 48.86666667 24.1 9.8 8.79255406]]
  378. [[ 12.87974382 13.34847151 19.83446328 28.47257282 55.82395142]
  379. [ 99.46114206 5.06359731 10.54545198 23.05133495 49.68944423]
  380. [ 117.76777159 23.03735526 3.94436301 15.0843986 43.57927247]
  381. [ 127.89752089 32.4393006 14.56853107 4.44831643 31.63099455]
  382. [ 138.24752089 42.7893006 24.91853107 10.35 4.05613474]]
  383. [[ 56.2815534 1.5 10.57236842 27.02173913 110.54347826]
  384. [ 82.9223301 5.00892857 9.07236842 25.52173913 109.04347826]
  385. [ 97.17718447 19.53125 5.26043557 21.42391304 104.94565217]
  386. [ 127.1407767 48.74107143 33.29605263 3.91777427 83.52173913]
  387. [ 169.6407767 91.24107143 75.79605263 42.5 2.96521739]]
  388.  
  389.  
  390. """
  391. def __init__(self, y, w, k=4, permutations=0, fixed=False,
  392. variable_name=None):
  393.  
  394. self.y = y
  395. rows, cols = y.shape
  396. self.k = k
  397. self.cols = cols
  398. npa = np.array
  399. self.fixed = fixed
  400. self.variable_name = variable_name
  401. classes = self._maybe_classify(y, k=None, fixed=False)
  402. class_ids = self.y
  403. classic = Markov(class_ids, classes=classes)
  404. self.classes = classes
  405. self.p = classic.p
  406. self.transitions = classic.transitions
  407. T, P = self._calc(y, w, classes, k=k)
  408. self.T = T
  409. self.P = P
  410.  
  411. if permutations:
  412. nrp = np.random.permutation
  413. counter = 0
  414. x2_realizations = np.zeros((permutations, 1))
  415. for perm in range(permutations):
  416. T, P = self._calc(nrp(y), w, classes, k=k)
  417. x2 = [chi2(T[i], self.transitions)[0] for i in range(k)]
  418. x2s = sum(x2)
  419. x2_realizations[perm] = x2s
  420. if x2s >= self.x2:
  421. counter += 1
  422. self.x2_rpvalue = (counter + 1.0) / (permutations + 1.)
  423. self.x2_realizations = x2_realizations
  424.  
  425. @property
  426. def s(self):
  427. if not hasattr(self, '_s'):
  428. self._s = STEADY_STATE(self.p)
  429. return self._s
  430.  
  431. @property
  432. def S(self):
  433. if not hasattr(self, '_S'):
  434. S = np.zeros_like(self.p)
  435. for i, p in enumerate(self.P):
  436. S[i] = STEADY_STATE(p)
  437. self._S = np.asarray(S)
  438. return self._S
  439.  
  440. @property
  441. def F(self):
  442. if not hasattr(self, '_F'):
  443. F = np.zeros_like(self.P)
  444. for i, p in enumerate(self.P):
  445. F[i] = fmpt(np.asmatrix(p))
  446. self._F = np.asarray(F)
  447. return self._F
  448.  
  449. # bickenbach and bode tests
  450. @property
  451. def ht(self):
  452. if not hasattr(self, '_ht'):
  453. self._ht = homogeneity(self.T)
  454. return self._ht
  455.  
  456. @property
  457. def Q(self):
  458. if not hasattr(self, '_Q'):
  459. self._Q = self.ht.Q
  460. return self._Q
  461.  
  462. @property
  463. def Q_p_value(self):
  464. self._Q_p_value = self.ht.Q_p_value
  465. return self._Q_p_value
  466.  
  467. @property
  468. def LR(self):
  469. self._LR = self.ht.LR
  470. return self._LR
  471.  
  472. @property
  473. def LR_p_value(self):
  474. self._LR_p_value = self.ht.LR_p_value
  475. return self._LR_p_value
  476.  
  477. @property
  478. def dof_hom(self):
  479. self._dof_hom = self.ht.dof
  480. return self._dof_hom
  481.  
  482. # shtests
  483. @property
  484. def shtest(self):
  485. if not hasattr(self, '_shtest'):
  486. self._shtest = self._mn_test()
  487. return self._shtest
  488.  
  489. @property
  490. def chi2(self):
  491. if not hasattr(self, '_chi2'):
  492. self._chi2 = self._chi2_test()
  493. return self._chi2
  494.  
  495. @property
  496. def x2(self):
  497. if not hasattr(self, '_x2'):
  498. self._x2 = sum([c[0] for c in self.chi2])
  499. return self._x2
  500.  
  501. @property
  502. def x2_pvalue(self):
  503. if not hasattr(self, '_x2_pvalue'):
  504. self._x2_pvalue = 1 - stats.chi2.cdf(self.x2, self.x2_dof)
  505. return self._x2_pvalue
  506.  
  507. @property
  508. def x2_dof(self):
  509. if not hasattr(self, '_x2_dof'):
  510. k = self.k
  511. self._x2_dof = k * (k - 1) * (k - 1)
  512. return self._x2_dof
  513.  
  514. def _maybe_classify(self, y, k=None, fixed=False):
  515. if len(np.unique(y)) < len(y):
  516. classes = np.unique(y)
  517. else:
  518. if fixed:
  519. yf = y.flatten()
  520. yb = pysal.Quantiles(yf, k=k).yb
  521. yb.shape = (rows, cols)
  522. classes = yb
  523. else:
  524. npa = np.array
  525. classes = npa([pysal.Quantiles(y[:, i], k=k)
  526. .yb for i in np.arange(cols)]).transpose()
  527. return classes
  528.  
  529. def _calc(self, y, w, classes, k):
  530. ly = pysal.weights.spatial_lag.lag_categorical(w, y)
  531. l_classes = self._maybe_classify(ly, k=None, fixed=False)
  532. T = np.zeros((k, k, k))
  533. n, t = y.shape
  534. for t1 in range(t - 1):
  535. t2 = t1 + 1
  536. for i in range(n):
  537. T[l_classes[i, t1], classes[i, t1], classes[i, t2]] += 1
  538.  
  539. P = np.zeros_like(T)
  540. for i, mat in enumerate(T):
  541. row_sum = mat.sum(axis=1)
  542. row_sum = row_sum + (row_sum == 0)
  543. p_i = np.matrix(np.diag(1. / row_sum) * np.matrix(mat))
  544. P[i] = p_i
  545. return T, P
  546.  
  547. def _mn_test(self):
  548. """
  549. helper to calculate tests of differences between steady state
  550. distributions from the conditional and overall distributions.
  551. """
  552. n, t = self.y.shape
  553. n0, n1, n2 = self.T.shape
  554. rn = range(n0)
  555. mat = [self._ssmnp_test(
  556. self.s, self.S[i], self.T[i].sum()) for i in rn]
  557. return mat
  558.  
  559. def _ssmnp_test(self, p1, p2, nt):
  560. """
  561. Steady state multinomial probability difference test.
  562.  
  563. Arguments
  564. ---------
  565. p1 : array
  566. (k, 1), first steady state probability distribution.
  567. p1 : array
  568. (k, 1), second steady state probability distribution.
  569. nt : int
  570. number of transitions to base the test on.
  571.  
  572. Returns
  573. -------
  574. tuple
  575. (3 elements)
  576. (chi2 value, pvalue, degrees of freedom)
  577.  
  578. """
  579. p1 = np.array(p1)
  580. k, c = p1.shape
  581. p1.shape = (k, )
  582. o = nt * p2
  583. e = nt * p1
  584. d = np.multiply((o - e), (o - e))
  585. d = d / e
  586. chi2 = d.sum()
  587. pvalue = 1 - stats.chi2.cdf(chi2, k - 1)
  588. return (chi2, pvalue, k - 1)
  589.  
  590. def _chi2_test(self):
  591. """
  592. helper to calculate tests of differences between the conditional
  593. transition matrices and the overall transitions matrix.
  594. """
  595. n, t = self.y.shape
  596. n0, n1, n2 = self.T.shape
  597. rn = range(n0)
  598. mat = [chi2(self.T[i], self.transitions) for i in rn]
  599. return mat
  600.  
  601. def summary(self, file_name=None):
  602. class_names = ["C%d" % i for i in range(self.k)]
  603. regime_names = ["LAG%d" % i for i in range(self.k)]
  604. ht = homogeneity(self.T, class_names=class_names,
  605. regime_names=regime_names)
  606. title = "Spatial Markov Test"
  607. if self.variable_name:
  608. title = title + ": " + self.variable_name
  609. if file_name:
  610. ht.summary(file_name=file_name, title=title)
  611. else:
  612. ht.summary(title=title)
  613.  
  614.  
  615. def chi2(T1, T2):
  616. """
  617. chi-squared test of difference between two transition matrices.
  618.  
  619. Parameters
  620. ----------
  621. T1 : matrix
  622. (k, k), matrix of transitions (counts).
  623. T2 : matrix
  624. (k, k), matrix of transitions (counts) to use to form the
  625. probabilities under the null.
  626.  
  627. Returns
  628. -------
  629. : tuple
  630. (3 elements).
  631. (chi2 value, pvalue, degrees of freedom).
  632.  
  633. Examples
  634. --------
  635. >>> import pysal
  636. >>> f = pysal.open(pysal.examples.get_path("usjoin.csv"))
  637. >>> years = range(1929, 2010)
  638. >>> pci = np.array([f.by_col[str(y)] for y in years]).transpose()
  639. >>> rpci = pci/(pci.mean(axis=0))
  640. >>> w = pysal.open(pysal.examples.get_path("states48.gal")).read()
  641. >>> w.transform='r'
  642. >>> sm = Spatial_Markov(rpci, w, fixed=True)
  643. >>> T1 = sm.T[0]
  644. >>> T1
  645. array([[ 562., 22., 1., 0.],
  646. [ 12., 201., 22., 0.],
  647. [ 0., 17., 97., 4.],
  648. [ 0., 0., 3., 19.]])
  649. >>> T2 = sm.transitions
  650. >>> T2
  651. array([[ 884., 77., 4., 0.],
  652. [ 68., 794., 87., 3.],
  653. [ 1., 92., 815., 51.],
  654. [ 1., 0., 60., 903.]])
  655. >>> chi2(T1,T2)
  656. (23.397284414732951, 0.0053631167048613371, 9)
  657.  
  658. Notes
  659. -----
  660. Second matrix is used to form the probabilities under the null.
  661. Marginal sums from first matrix are distributed across these probabilities
  662. under the null. In other words the observed transitions are taken from T1
  663. while the expected transitions are formed as follows
  664.  
  665. .. math::
  666.  
  667. E_{i,j} = \sum_j T1_{i,j} * T2_{i,j}/\sum_j T2_{i,j}
  668.  
  669. Degrees of freedom corrected for any rows in either T1 or T2 that have
  670. zero total transitions.
  671. """
  672. rs2 = T2.sum(axis=1)
  673. rs1 = T1.sum(axis=1)
  674. rs2nz = rs2 > 0
  675. rs1nz = rs1 > 0
  676. dof1 = sum(rs1nz)
  677. dof2 = sum(rs2nz)
  678. rs2 = rs2 + (rs2 == 0)
  679. dof = (dof1 - 1) * (dof2 - 1)
  680. p = np.diag(1 / rs2) * np.matrix(T2)
  681. E = np.diag(rs1) * np.matrix(p)
  682. num = T1 - E
  683. num = np.multiply(num, num)
  684. E = E + (E == 0)
  685. chi2 = num / E
  686. chi2 = chi2.sum()
  687. pvalue = 1 - stats.chi2.cdf(chi2, dof)
  688. return chi2, pvalue, dof
  689.  
  690.  
  691. class LISA_Markov(Markov):
  692. """
  693. Markov for Local Indicators of Spatial Association
  694.  
  695. Parameters
  696. ----------
  697. y : array
  698. (n, t), n cross-sectional units observed over t time
  699. periods.
  700. w : W
  701. spatial weights object.
  702. permutations : int, optional
  703. number of permutations used to determine LISA
  704. significance (the default is 0).
  705. significance_level : float, optional
  706. significance level (two-sided) for filtering
  707. significant LISA endpoints in a transition (the
  708. default is 0.05).
  709. geoda_quads : bool
  710. If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4.
  711. If False use PySAL Scheme: HH=1, LH=2, LL=3, HL=4.
  712. (the default is False).
  713.  
  714. Attributes
  715. ----------
  716. chi_2 : tuple
  717. (3 elements)
  718. (chi square test statistic, p-value, degrees of freedom) for
  719. test that dynamics of y are independent of dynamics of wy.
  720. classes : array
  721. (4, 1)
  722. 1=HH, 2=LH, 3=LL, 4=HL (own, lag)
  723. 1=HH, 2=LL, 3=LH, 4=HL (own, lag) (if geoda_quads=True)
  724. expected_t : array
  725. (4, 4), expected number of transitions under the null that
  726. dynamics of y are independent of dynamics of wy.
  727. move_types : matrix
  728. (n, t-1), integer values indicating which type of LISA
  729. transition occurred (q1 is quadrant in period 1, q2 is
  730. quadrant in period 2).
  731.  
  732. .. Table:: Move Types
  733.  
  734. == == ========
  735. q1 q2 move_type
  736. == == ========
  737. 1 1 1
  738. 1 2 2
  739. 1 3 3
  740. 1 4 4
  741. 2 1 5
  742. 2 2 6
  743. 2 3 7
  744. 2 4 8
  745. 3 1 9
  746. 3 2 10
  747. 3 3 11
  748. 3 4 12
  749. 4 1 13
  750. 4 2 14
  751. 4 3 15
  752. 4 4 16
  753. == == ========
  754.  
  755. p : matrix
  756. (k, k), transition probability matrix.
  757. p_values : matrix
  758. (n, t), LISA p-values for each end point (if permutations >
  759. 0).
  760. significant_moves : matrix
  761. (n, t-1), integer values indicating the type and
  762. significance of a LISA transition. st = 1 if
  763. significant in period t, else st=0 (if permutations >
  764. 0).
  765.  
  766. .. Table:: Significant Moves
  767.  
  768. =============== ===================
  769. (s1,s2) move_type
  770. =============== ===================
  771. (1,1) [1, 16]
  772. (1,0) [17, 32]
  773. (0,1) [33, 48]
  774. (0,0) [49, 64]
  775. =============== ===================
  776.  
  777.  
  778. == == == == =========
  779. q1 q2 s1 s2 move_type
  780. == == == == =========
  781. 1 1 1 1 1
  782. 1 2 1 1 2
  783. 1 3 1 1 3
  784. 1 4 1 1 4
  785. 2 1 1 1 5
  786. 2 2 1 1 6
  787. 2 3 1 1 7
  788. 2 4 1 1 8
  789. 3 1 1 1 9
  790. 3 2 1 1 10
  791. 3 3 1 1 11
  792. 3 4 1 1 12
  793. 4 1 1 1 13
  794. 4 2 1 1 14
  795. 4 3 1 1 15
  796. 4 4 1 1 16
  797. 1 1 1 0 17
  798. 1 2 1 0 18
  799. . . . . .
  800. . . . . .
  801. 4 3 1 0 31
  802. 4 4 1 0 32
  803. 1 1 0 1 33
  804. 1 2 0 1 34
  805. . . . . .
  806. . . . . .
  807. 4 3 0 1 47
  808. 4 4 0 1 48
  809. 1 1 0 0 49
  810. 1 2 0 0 50
  811. . . . . .
  812. . . . . .
  813. 4 3 0 0 63
  814. 4 4 0 0 64
  815. == == == == =========
  816.  
  817. steady_state : matrix
  818. (k, 1), ergodic distribution.
  819. transitions : matrix
  820. (4, 4), count of transitions between each state i and j.
  821. spillover : array
  822. (n, 1) binary array, locations that were not part of a
  823. cluster in period 1 but joined a prexisting cluster in
  824. period 2.
  825.  
  826. Examples
  827. --------
  828. >>> import pysal as ps
  829. >>> import numpy as np
  830. >>> f = ps.open(ps.examples.get_path("usjoin.csv"))
  831. >>> years = range(1929, 2010)
  832. >>> pci = np.array([f.by_col[str(y)] for y in years]).transpose()
  833. >>> w = ps.open(ps.examples.get_path("states48.gal")).read()
  834. >>> lm = ps.LISA_Markov(pci,w)
  835. >>> lm.classes
  836. array([1, 2, 3, 4])
  837. >>> lm.steady_state
  838. matrix([[ 0.28561505],
  839. [ 0.14190226],
  840. [ 0.40493672],
  841. [ 0.16754598]])
  842. >>> lm.transitions
  843. array([[ 1.08700000e+03, 4.40000000e+01, 4.00000000e+00,
  844. 3.40000000e+01],
  845. [ 4.10000000e+01, 4.70000000e+02, 3.60000000e+01,
  846. 1.00000000e+00],
  847. [ 5.00000000e+00, 3.40000000e+01, 1.42200000e+03,
  848. 3.90000000e+01],
  849. [ 3.00000000e+01, 1.00000000e+00, 4.00000000e+01,
  850. 5.52000000e+02]])
  851. >>> lm.p
  852. matrix([[ 0.92985458, 0.03763901, 0.00342173, 0.02908469],
  853. [ 0.07481752, 0.85766423, 0.06569343, 0.00182482],
  854. [ 0.00333333, 0.02266667, 0.948 , 0.026 ],
  855. [ 0.04815409, 0.00160514, 0.06420546, 0.88603531]])
  856. >>> lm.move_types[0,:3]
  857. array([11, 11, 11])
  858. >>> lm.move_types[0,-3:]
  859. array([11, 11, 11])
  860.  
  861. Now consider only moves with one, or both, of the LISA end points being
  862. significant
  863.  
  864. >>> np.random.seed(10)
  865. >>> lm_random = pysal.LISA_Markov(pci, w, permutations=99)
  866. >>> lm_random.significant_moves[0, :3]
  867. array([11, 11, 11])
  868. >>> lm_random.significant_moves[0,-3:]
  869. array([59, 43, 27])
  870.  
  871.  
  872. Any value less than 49 indicates at least one of the LISA end points was
  873. significant. So for example, the first spatial unit experienced a
  874. transition of type 11 (LL, LL) during the first three and last tree
  875. intervals (according to lm.move_types), however, the last three of these
  876. transitions involved insignificant LISAS in both the start and ending year
  877. of each transition.
  878.  
  879. Test whether the moves of y are independent of the moves of wy
  880.  
  881. >>> "Chi2: %8.3f, p: %5.2f, dof: %d" % lm.chi_2
  882. 'Chi2: 1058.208, p: 0.00, dof: 9'
  883.  
  884. Actual transitions of LISAs
  885.  
  886. >>> lm.transitions
  887. array([[ 1.08700000e+03, 4.40000000e+01, 4.00000000e+00,
  888. 3.40000000e+01],
  889. [ 4.10000000e+01, 4.70000000e+02, 3.60000000e+01,
  890. 1.00000000e+00],
  891. [ 5.00000000e+00, 3.40000000e+01, 1.42200000e+03,
  892. 3.90000000e+01],
  893. [ 3.00000000e+01, 1.00000000e+00, 4.00000000e+01,
  894. 5.52000000e+02]])
  895.  
  896. Expected transitions of LISAs under the null y and wy are moving
  897. independently of one another
  898.  
  899. >>> lm.expected_t
  900. array([[ 1.12328098e+03, 1.15377356e+01, 3.47522158e-01,
  901. 3.38337644e+01],
  902. [ 3.50272664e+00, 5.28473882e+02, 1.59178880e+01,
  903. 1.05503814e-01],
  904. [ 1.53878082e-01, 2.32163556e+01, 1.46690710e+03,
  905. 9.72266513e+00],
  906. [ 9.60775143e+00, 9.86856346e-02, 6.23537392e+00,
  907. 6.07058189e+02]])
  908.  
  909. If the LISA classes are to be defined according to GeoDa, the `geoda_quad`
  910. option has to be set to true
  911.  
  912. >>> lm.q[0:5,0]
  913. array([3, 2, 3, 1, 4])
  914. >>> lm = ps.LISA_Markov(pci,w, geoda_quads=True)
  915. >>> lm.q[0:5,0]
  916. array([2, 3, 2, 1, 4])
  917.  
  918. """
  919. def __init__(self, y, w, permutations=0,
  920. significance_level=0.05, geoda_quads=False):
  921. y = y.transpose()
  922. pml = pysal.Moran_Local
  923. gq = geoda_quads
  924. ml = ([pml(yi, w, permutations=permutations, geoda_quads=gq)
  925. for yi in y])
  926. q = np.array([mli.q for mli in ml]).transpose()
  927. classes = np.arange(1, 5) # no guarantee all 4 quadrants are visited
  928. Markov.__init__(self, q, classes)
  929. self.q = q
  930. self.w = w
  931. n, k = q.shape
  932. k -= 1
  933. self.significance_level = significance_level
  934. move_types = np.zeros((n, k), int)
  935. sm = np.zeros((n, k), int)
  936. self.significance_level = significance_level
  937. if permutations > 0:
  938. p = np.array([mli.p_z_sim for mli in ml]).transpose()
  939. self.p_values = p
  940. pb = p <= significance_level
  941. else:
  942. pb = np.zeros_like(y.T)
  943. for t in range(k):
  944. origin = q[:, t]
  945. dest = q[:, t + 1]
  946. p_origin = pb[:, t]
  947. p_dest = pb[:, t + 1]
  948. for r in range(n):
  949. move_types[r, t] = TT[origin[r], dest[r]]
  950. key = (origin[r], dest[r], p_origin[r], p_dest[r])
  951. sm[r, t] = MOVE_TYPES[key]
  952. if permutations > 0:
  953. self.significant_moves = sm
  954. self.move_types = move_types
  955.  
  956. # null of own and lag moves being independent
  957.  
  958. ybar = y.mean(axis=0)
  959. r = y / ybar
  960. ylag = np.array([pysal.lag_spatial(w, yt) for yt in y])
  961. rlag = ylag / ybar
  962. rc = r < 1.
  963. rlagc = rlag < 1.
  964. markov_y = pysal.Markov(rc)
  965. markov_ylag = pysal.Markov(rlagc)
  966. A = np.matrix([[1, 0, 0, 0],
  967. [0, 0, 1, 0],
  968. [0, 0, 0, 1],
  969. [0, 1, 0, 0]])
  970.  
  971. kp = A * np.kron(markov_y.p, markov_ylag.p) * A.T
  972. trans = self.transitions.sum(axis=1)
  973. t1 = np.diag(trans) * kp
  974. t2 = self.transitions
  975. t1 = t1.getA()
  976. self.chi_2 = pysal.spatial_dynamics.markov.chi2(t2, t1)
  977. self.expected_t = t1
  978. self.permutations = permutations
  979.  
  980. def spillover(self, quadrant=1, neighbors_on=False):
  981. """
  982. Detect spillover locations for diffusion in LISA Markov.
  983.  
  984. Parameters
  985. ----------
  986. quadrant : int
  987. which quadrant in the scatterplot should form the core
  988. of a cluster.
  989. neighbors_on : binary
  990. If false, then only the 1st order neighbors of a core
  991. location are included in the cluster.
  992. If true, neighbors of cluster core 1st order neighbors
  993. are included in the cluster.
  994.  
  995. Returns
  996. -------
  997. results : dictionary
  998. two keys - values pairs:
  999. 'components' - array (n, t)
  1000. values are integer ids (starting at 1) indicating which
  1001. component/cluster observation i in period t belonged to.
  1002. 'spillover' - array (n, t-1)
  1003. binary values indicating if the location was a
  1004. spill-over location that became a new member of a
  1005. previously existing cluster.
  1006.  
  1007. Examples
  1008. --------
  1009. >>> f = pysal.open(pysal.examples.get_path("usjoin.csv"))
  1010. >>> years = range(1929, 2010)
  1011. >>> pci = np.array([f.by_col[str(y)] for y in years]).transpose()
  1012. >>> w = pysal.open(pysal.examples.get_path("states48.gal")).read()
  1013. >>> np.random.seed(10)
  1014. >>> lm_random = pysal.LISA_Markov(pci, w, permutations=99)
  1015. >>> r = lm_random.spillover()
  1016. >>> r['components'][:,12]
  1017. array([ 0., 0., 0., 2., 0., 1., 1., 0., 0., 2., 0., 0., 0.,
  1018. 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 2.,
  1019. 1., 1., 0., 1., 0., 0., 1., 0., 2., 1., 1., 0., 0.,
  1020. 0., 0., 0., 1., 0., 2., 1., 0., 0.])
  1021. >>> r['components'][:,14]
  1022. array([ 0., 2., 0., 2., 0., 1., 1., 0., 0., 2., 0., 0., 0.,
  1023. 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 2.,
  1024. 0., 1., 0., 1., 0., 0., 1., 0., 2., 1., 1., 0., 0.,
  1025. 0., 0., 0., 1., 0., 2., 1., 0., 0.])
  1026. >>> r['spill_over'][:,12]
  1027. array([ 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
  1028. 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
  1029. 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
  1030. 0., 0., 1., 0., 0., 0., 0., 0., 1.])
  1031.  
  1032. Including neighbors of core neighbors
  1033.  
  1034. >>> rn = lm_random.spillover(neighbors_on=True)
  1035. >>> rn['components'][:,12]
  1036. array([ 0., 2., 0., 2., 0., 1., 1., 0., 0., 2., 0., 1., 0.,
  1037. 0., 1., 0., 1., 1., 1., 1., 0., 0., 0., 2., 0., 2.,
  1038. 1., 1., 0., 1., 0., 0., 1., 0., 2., 1., 1., 0., 0.,
  1039. 0., 0., 2., 1., 1., 2., 1., 0., 2.])
  1040. >>> rn["components"][:,13]
  1041. array([ 0., 2., 0., 2., 2., 1., 1., 0., 0., 2., 0., 1., 0.,
  1042. 2., 1., 0., 1., 1., 1., 1., 0., 0., 0., 2., 2., 2.,
  1043. 1., 1., 2., 1., 0., 2., 1., 2., 2., 1., 1., 0., 2.,
  1044. 0., 2., 2., 1., 1., 2., 1., 0., 2.])
  1045. >>> rn["spill_over"][:,12]
  1046. array([ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
  1047. 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
  1048. 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1.,
  1049. 0., 1., 0., 0., 0., 0., 0., 0., 0.])
  1050.  
  1051. """
  1052. n, k = self.q.shape
  1053. if self.permutations:
  1054. spill_over = np.zeros((n, k - 1))
  1055. components = np.zeros((n, k))
  1056. i2id = {} # handle string keys
  1057. for key in self.w.neighbors.keys():
  1058. idx = self.w.id2i[key]
  1059. i2id[idx] = key
  1060. sig_lisas = (self.q == quadrant) \
  1061. * (self.p_values <= self.significance_level)
  1062. sig_ids = [np.nonzero(
  1063. sig_lisas[:, i])[0].tolist() for i in range(k)]
  1064.  
  1065. neighbors = self.w.neighbors
  1066. for t in range(k - 1):
  1067. s1 = sig_ids[t]
  1068. s2 = sig_ids[t + 1]
  1069. g1 = pysal.region.components.Graph(undirected=True)
  1070. for i in s1:
  1071. for neighbor in neighbors[i2id[i]]:
  1072. g1.add_edge(i2id[i], neighbor, 1.0)
  1073. if neighbors_on:
  1074. for nn in neighbors[neighbor]:
  1075. g1.add_edge(neighbor, nn, 1.0)
  1076. components1 = g1.connected_components(op=gt)
  1077. components1 = [list(c.nodes) for c in components1]
  1078. g2 = pysal.region.components.Graph(undirected=True)
  1079. for i in s2:
  1080. for neighbor in neighbors[i2id[i]]:
  1081. g2.add_edge(i2id[i], neighbor, 1.0)
  1082. if neighbors_on:
  1083. for nn in neighbors[neighbor]:
  1084. g2.add_edge(neighbor, nn, 1.0)
  1085. components2 = g2.connected_components(op=gt)
  1086. components2 = [list(c.nodes) for c in components2]
  1087. c2 = []
  1088. c1 = []
  1089. for c in components2:
  1090. c2.extend(c)
  1091. for c in components1:
  1092. c1.extend(c)
  1093.  
  1094. new_ids = [j for j in c2 if j not in c1]
  1095. spill_ids = []
  1096. for j in new_ids:
  1097. # find j's component in period 2
  1098. cj = [c for c in components2 if j in c][0]
  1099. # for members of j's component in period 2, check if they
  1100. # belonged to any components in period 1
  1101. for i in cj:
  1102. if i in c1:
  1103. spill_ids.append(j)
  1104. break
  1105. for spill_id in spill_ids:
  1106. id = self.w.id2i[spill_id]
  1107. spill_over[id, t] = 1
  1108. for c, component in enumerate(components1):
  1109. for i in component:
  1110. ii = self.w.id2i[i]
  1111. components[ii, t] = c + 1
  1112. results = {}
  1113. results['components'] = components
  1114. results['spill_over'] = spill_over
  1115. return results
  1116.  
  1117. else:
  1118. return None
  1119.  
  1120.  
  1121. def kullback(F):
  1122. """
  1123. Kullback information based test of Markov Homogeneity.
  1124.  
  1125. Parameters
  1126. ----------
  1127. F : array
  1128. (s, r, r), values are transitions (not probabilities) for
  1129. s strata, r initial states, r terminal states.
  1130.  
  1131. Returns
  1132. -------
  1133. Results : dictionary
  1134. (key - value)
  1135.  
  1136. Conditional homogeneity - (float) test statistic for homogeneity
  1137. of transition probabilities across strata.
  1138.  
  1139. Conditional homogeneity pvalue - (float) p-value for test
  1140. statistic.
  1141.  
  1142. Conditional homogeneity dof - (int) degrees of freedom =
  1143. r(s-1)(r-1).
  1144.  
  1145. Notes
  1146. -----
  1147. Based on Kullback, Kupperman and Ku (1962) [Kullback1962]_.
  1148. Example below is taken from Table 9.2 .
  1149.  
  1150. Examples
  1151. --------
  1152. >>> s1 = np.array([
  1153. ... [ 22, 11, 24, 2, 2, 7],
  1154. ... [ 5, 23, 15, 3, 42, 6],
  1155. ... [ 4, 21, 190, 25, 20, 34],
  1156. ... [0, 2, 14, 56, 14, 28],
  1157. ... [32, 15, 20, 10, 56, 14],
  1158. ... [5, 22, 31, 18, 13, 134]
  1159. ... ])
  1160. >>> s2 = np.array([
  1161. ... [3, 6, 9, 3, 0, 8],
  1162. ... [1, 9, 3, 12, 27, 5],
  1163. ... [2, 9, 208, 32, 5, 18],
  1164. ... [0, 14, 32, 108, 40, 40],
  1165. ... [22, 14, 9, 26, 224, 14],
  1166. ... [1, 5, 13, 53, 13, 116]
  1167. ... ])
  1168. >>>
  1169. >>> F = np.array([s1, s2])
  1170. >>> res = kullback(F)
  1171. >>> "%8.3f"%res['Conditional homogeneity']
  1172. ' 160.961'
  1173. >>> "%d"%res['Conditional homogeneity dof']
  1174. '30'
  1175. >>> "%3.1f"%res['Conditional homogeneity pvalue']
  1176. '0.0'
  1177.  
  1178. """
  1179.  
  1180. F1 = F == 0
  1181. F1 = F + F1
  1182. FLF = F * np.log(F1)
  1183. T1 = 2 * FLF.sum()
  1184.  
  1185. FdJK = F.sum(axis=0)
  1186. FdJK1 = FdJK + (FdJK == 0)
  1187. FdJKLFdJK = FdJK * np.log(FdJK1)
  1188. T2 = 2 * FdJKLFdJK.sum()
  1189.  
  1190. FdJd = F.sum(axis=0).sum(axis=1)
  1191. FdJd1 = FdJd + (FdJd == 0)
  1192. T3 = 2 * (FdJd * np.log(FdJd1)).sum()
  1193.  
  1194. FIJd = F[:, :].sum(axis=1)
  1195. FIJd1 = FIJd + (FIJd == 0)
  1196. T4 = 2 * (FIJd * np.log(FIJd1)).sum()
  1197.  
  1198. T6 = F.sum()
  1199. T6 = 2 * T6 * np.log(T6)
  1200.  
  1201. s, r, r1 = F.shape
  1202. chom = T1 - T4 - T2 + T3
  1203. cdof = r * (s - 1) * (r - 1)
  1204. results = {}
  1205. results['Conditional homogeneity'] = chom
  1206. results['Conditional homogeneity dof'] = cdof
  1207. results['Conditional homogeneity pvalue'] = 1 - stats.chi2.cdf(chom, cdof)
  1208. return results
  1209.  
  1210.  
  1211. def prais(pmat):
  1212. """
  1213. Prais conditional mobility measure.
  1214.  
  1215. Parameters
  1216. ----------
  1217. pmat : matrix
  1218. (k, k), Markov probability transition matrix.
  1219.  
  1220. Returns
  1221. -------
  1222. pr : matrix
  1223. (1, k), conditional mobility measures for each of the k classes.
  1224.  
  1225. Notes
  1226. -----
  1227. Prais' conditional mobility measure for a class is defined as:
  1228.  
  1229. .. math::
  1230.  
  1231. pr_i = 1 - p_{i,i}
  1232.  
  1233. Examples
  1234. --------
  1235. >>> import numpy as np
  1236. >>> import pysal
  1237. >>> f = pysal.open(pysal.examples.get_path("usjoin.csv"))
  1238. >>> pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
  1239. >>> q5 = np.array([pysal.Quantiles(y).yb for y in pci]).transpose()
  1240. >>> m = pysal.Markov(q5)
  1241. >>> m.transitions
  1242. array([[ 729., 71., 1., 0., 0.],
  1243. [ 72., 567., 80., 3., 0.],
  1244. [ 0., 81., 631., 86., 2.],
  1245. [ 0., 3., 86., 573., 56.],
  1246. [ 0., 0., 1., 57., 741.]])
  1247. >>> m.p
  1248. matrix([[ 0.91011236, 0.0886392 , 0.00124844, 0. , 0. ],
  1249. [ 0.09972299, 0.78531856, 0.11080332, 0.00415512, 0. ],
  1250. [ 0. , 0.10125 , 0.78875 , 0.1075 , 0.0025 ],
  1251. [ 0. , 0.00417827, 0.11977716, 0.79805014, 0.07799443],
  1252. [ 0. , 0. , 0.00125156, 0.07133917, 0.92740926]])
  1253. >>> pysal.spatial_dynamics.markov.prais(m.p)
  1254. matrix([[ 0.08988764, 0.21468144, 0.21125 , 0.20194986, 0.07259074]])
  1255.  
  1256. """
  1257. pr = (pmat.sum(axis=1) - np.diag(pmat))[0]
  1258. return pr
  1259.  
  1260.  
  1261. def shorrock(pmat):
  1262. """
  1263. Shorrock's mobility measure.
  1264.  
  1265. Parameters
  1266. ----------
  1267. pmat : matrix
  1268. (k, k), Markov probability transition matrix.
  1269.  
  1270. Returns
  1271. -------
  1272. sh : float
  1273. Shorrock mobility measure.
  1274.  
  1275. Notes
  1276. -----
  1277. Shorock's mobility measure is defined as
  1278.  
  1279. .. math::
  1280.  
  1281. sh = (k - \sum_{j=1}^{k} p_{j,j})/(k - 1)
  1282.  
  1283. Examples
  1284. --------
  1285. >>> import numpy as np
  1286. >>> import pysal
  1287. >>> f = pysal.open(pysal.examples.get_path("usjoin.csv"))
  1288. >>> pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
  1289. >>> q5 = np.array([pysal.Quantiles(y).yb for y in pci]).transpose()
  1290. >>> m = pysal.Markov(q5)
  1291. >>> m.transitions
  1292. array([[ 729., 71., 1., 0., 0.],
  1293. [ 72., 567., 80., 3., 0.],
  1294. [ 0., 81., 631., 86., 2.],
  1295. [ 0., 3., 86., 573., 56.],
  1296. [ 0., 0., 1., 57., 741.]])
  1297. >>> m.p
  1298. matrix([[ 0.91011236, 0.0886392 , 0.00124844, 0. , 0. ],
  1299. [ 0.09972299, 0.78531856, 0.11080332, 0.00415512, 0. ],
  1300. [ 0. , 0.10125 , 0.78875 , 0.1075 , 0.0025 ],
  1301. [ 0. , 0.00417827, 0.11977716, 0.79805014, 0.07799443],
  1302. [ 0. , 0. , 0.00125156, 0.07133917, 0.92740926]])
  1303. >>> pysal.spatial_dynamics.markov.shorrock(m.p)
  1304. 0.19758992000997844
  1305.  
  1306. """
  1307. t = np.trace(pmat)
  1308. k = pmat.shape[1]
  1309. sh = (k - t) / (k - 1)
  1310. return sh
  1311.  
  1312.  
  1313. def homogeneity(transition_matrices, regime_names=[], class_names=[],
  1314. title="Markov Homogeneity Test"):
  1315. """
  1316. Test for homogeneity of Markov transition probabilities across regimes.
  1317.  
  1318. Parameters
  1319. ----------
  1320. transition_matrices : list
  1321. of transition matrices for regimes, all matrices must
  1322. have same size (r, c). r is the number of rows in the
  1323. transition matrix and c is the number of columns in
  1324. the transition matrix.
  1325. regime_names : sequence
  1326. Labels for the regimes.
  1327. class_names : sequence
  1328. Labels for the classes/states of the Markov chain.
  1329. title : string
  1330. name of test.
  1331.  
  1332. Returns
  1333. -------
  1334. : implicit
  1335. an instance of Homogeneity_Results.
  1336. """
  1337.  
  1338. return Homogeneity_Results(transition_matrices, regime_names=regime_names,
  1339. class_names=class_names, title=title)
  1340.  
  1341.  
  1342. class Homogeneity_Results:
  1343. """
  1344. Wrapper class to present homogeneity results.
  1345.  
  1346. Parameters
  1347. ----------
  1348. transition_matrices : list
  1349. of transition matrices for regimes, all matrices must
  1350. have same size (r, c). r is the number of rows in
  1351. the transition matrix and c is the number of columns
  1352. in the transition matrix.
  1353. regime_names : sequence
  1354. Labels for the regimes.
  1355. class_names : sequence
  1356. Labels for the classes/states of the Markov chain.
  1357. title : string
  1358. Title of the table.
  1359.  
  1360. Attributes
  1361. -----------
  1362.  
  1363. Notes
  1364. -----
  1365. Degrees of freedom adjustment follow the approach in Bickenbach and Bode
  1366. (2003) [Bickenbach2003]_.
  1367.  
  1368. Examples
  1369. --------
  1370. See Spatial_Markov above.
  1371.  
  1372. """
  1373.  
  1374. def __init__(self, transition_matrices, regime_names=[], class_names=[],
  1375. title="Markov Homogeneity Test"):
  1376. self._homogeneity(transition_matrices)
  1377. self.regime_names = regime_names
  1378. self.class_names = class_names
  1379. self.title = title
  1380.  
  1381. def _homogeneity(self, transition_matrices):
  1382. # form null transition probability matrix
  1383. M = np.array(transition_matrices)
  1384. m, r, k = M.shape
  1385. self.k = k
  1386. B = np.zeros((r, m))
  1387. T = M.sum(axis=0)
  1388. self.t_total = T.sum()
  1389. n_i = T.sum(axis=1)
  1390. A_i = (T > 0).sum(axis=1)
  1391. A_im = np.zeros((r, m))
  1392. p_ij = np.dot(np.diag(1./(n_i + (n_i == 0)*1.)), T)
  1393. den = p_ij + 1. * (p_ij == 0)
  1394. b_i = np.zeros_like(A_i)
  1395. p_ijm = np.zeros_like(M)
  1396. # get dimensions
  1397. m, n_rows, n_cols = M.shape
  1398. m = 0
  1399. Q = 0.0
  1400. LR = 0.0
  1401. lr_table = np.zeros_like(M)
  1402. q_table = np.zeros_like(M)
  1403.  
  1404. for nijm in M:
  1405. nim = nijm.sum(axis=1)
  1406. B[:, m] = 1.*(nim > 0)
  1407. b_i = b_i + 1. * (nim > 0)
  1408. p_ijm[m] = np.dot(np.diag(1./(nim + (nim == 0)*1.)), nijm)
  1409. num = (p_ijm[m]-p_ij)**2
  1410. ratio = num / den
  1411. qijm = np.dot(np.diag(nim), ratio)
  1412. q_table[m] = qijm
  1413. Q = Q + qijm.sum()
  1414. # only use nonzero pijm in lr test
  1415. mask = (nijm > 0) * (p_ij > 0)
  1416. A_im[:, m] = (nijm > 0).sum(axis=1)
  1417. unmask = 1.0 * (mask == 0)
  1418. ratio = (mask * p_ijm[m] + unmask) / (mask * p_ij + unmask)
  1419. lr = nijm * np.log(ratio)
  1420. LR = LR + lr.sum()
  1421. lr_table[m] = 2 * lr
  1422. m += 1
  1423. # b_i is the number of regimes that have non-zero observations in row i
  1424. # A_i is the number of non-zero elements in row i of the aggregated
  1425. # transition matrix
  1426. self.dof = int(((b_i-1) * (A_i-1)).sum())
  1427. self.Q = Q
  1428. self.Q_p_value = 1 - stats.chi2.cdf(self.Q, self.dof)
  1429. self.LR = LR * 2.
  1430. self.LR_p_value = 1 - stats.chi2.cdf(self.LR, self.dof)
  1431. self.A = A_i
  1432. self.A_im = A_im
  1433. self.B = B
  1434. self.b_i = b_i
  1435. self.LR_table = lr_table
  1436. self.Q_table = q_table
  1437. self.m = m
  1438. self.p_h0 = p_ij
  1439. self.p_h1 = p_ijm
  1440.  
  1441. def summary(self, file_name=None, title="Markov Homogeneity Test"):
  1442. regime_names = ["%d" % i for i in range(self.m)]
  1443. if self.regime_names:
  1444. regime_names = self.regime_names
  1445. cols = ["P(%s)" % str(regime) for regime in regime_names]
  1446. if not self.class_names:
  1447. self.class_names = range(self.k)
  1448.  
  1449. max_col = max([len(col) for col in cols])
  1450. col_width = max([5, max_col]) # probabilities have 5 chars
  1451. n_tabs = self.k
  1452. width = n_tabs * 4 + (self.k+1)*col_width
  1453. lead = "-" * width
  1454. head = title.center(width)
  1455. contents = [lead, head, lead]
  1456. l = "Number of regimes: %d" % int(self.m)
  1457. k = "Number of classes: %d" % int(self.k)
  1458. r = "Regime names: "
  1459. r += ", ".join(regime_names)
  1460. t = "Number of transitions: %d" % int(self.t_total)
  1461. contents.append(k)
  1462. contents.append(t)
  1463. contents.append(l)
  1464. contents.append(r)
  1465. contents.append(lead)
  1466. h = "%7s %20s %20s" % ('Test', 'LR', 'Chi-2')
  1467. contents.append(h)
  1468. stat = "%7s %20.3f %20.3f" % ('Stat.', self.LR, self.Q)
  1469. contents.append(stat)
  1470. stat = "%7s %20d %20d" % ('DOF', self.dof, self.dof)
  1471. contents.append(stat)
  1472. stat = "%7s %20.3f %20.3f" % ('p-value', self.LR_p_value,
  1473. self.Q_p_value)
  1474. contents.append(stat)
  1475. print("\n".join(contents))
  1476. print(lead)
  1477.  
  1478. cols = ["P(%s)" % str(regime) for regime in self.regime_names]
  1479. if not self.class_names:
  1480. self.class_names = range(self.k)
  1481. cols.extend(["%s" % str(cname) for cname in self.class_names])
  1482.  
  1483. max_col = max([len(col) for col in cols])
  1484. col_width = max([5, max_col]) # probabilities have 5 chars
  1485. p0 = []
  1486. line0 = ['{s: <{w}}'.format(s="P(H0)", w=col_width)]
  1487. line0.extend((['{s: >{w}}'.format(s=cname, w=col_width) for cname in
  1488. self.class_names]))
  1489. print(" ".join(line0))
  1490. p0.append("&".join(line0))
  1491. for i, row in enumerate(self.p_h0):
  1492. line = ["%*s" % (col_width, str(self.class_names[i]))]
  1493. line.extend(["%*.3f" % (col_width, v) for v in row])
  1494. print(" ".join(line))
  1495. p0.append("&".join(line))
  1496. pmats = [p0]
  1497.  
  1498. print(lead)
  1499. for r, p1 in enumerate(self.p_h1):
  1500. p0 = []
  1501. line0 = ['{s: <{w}}'.format(s="P(%s)" %
  1502. regime_names[r], w=col_width)]
  1503. line0.extend((['{s: >{w}}'.format(s=cname, w=col_width) for cname
  1504. in self.class_names]))
  1505. print(" ".join(line0))
  1506. p0.append("&".join(line0))
  1507. for i, row in enumerate(p1):
  1508. line = ["%*s" % (col_width, str(self.class_names[i]))]
  1509. line.extend(["%*.3f" % (col_width, v) for v in row])
  1510. print(" ".join(line))
  1511. p0.append("&".join(line))
  1512. pmats.append(p0)
  1513. print(lead)
  1514.  
  1515. if file_name:
  1516. k = self.k
  1517. ks = str(k+1)
  1518. with open(file_name, 'w') as f:
  1519. c = []
  1520. fmt = "r"*(k+1)
  1521. s = "\\begin{tabular}{|%s|}\\hline\n" % fmt
  1522. s += "\\multicolumn{%s}{|c|}{%s}" % (ks, title)
  1523. c.append(s)
  1524. s = "Number of classes: %d" % int(self.k)
  1525. c.append("\\hline\\multicolumn{%s}{|l|}{%s}" % (ks, s))
  1526. s = "Number of transitions: %d" % int(self.t_total)
  1527. c.append("\\multicolumn{%s}{|l|}{%s}" % (ks, s))
  1528. s = "Number of regimes: %d" % int(self.m)
  1529. c.append("\\multicolumn{%s}{|l|}{%s}" % (ks, s))
  1530. s = "Regime names: "
  1531. s += ", ".join(regime_names)
  1532. c.append("\\multicolumn{%s}{|l|}{%s}" % (ks, s))
  1533. s = "\\hline\\multicolumn{2}{|l}{%s}" % ("Test")
  1534. s += "&\\multicolumn{2}{r}{LR}&\\multicolumn{2}{r|}{Q}"
  1535. c.append(s)
  1536. s = "Stat."
  1537. s = "\\multicolumn{2}{|l}{%s}" % (s)
  1538. s += "&\\multicolumn{2}{r}{%.3f}" % self.LR
  1539. s += "&\\multicolumn{2}{r|}{%.3f}" % self.Q
  1540. c.append(s)
  1541. s = "\\multicolumn{2}{|l}{%s}" % ("DOF")
  1542. s += "&\\multicolumn{2}{r}{%d}" % int(self.dof)
  1543. s += "&\\multicolumn{2}{r|}{%d}" % int(self.dof)
  1544. c.append(s)
  1545. s = "\\multicolumn{2}{|l}{%s}" % ("p-value")
  1546. s += "&\\multicolumn{2}{r}{%.3f}" % self.LR_p_value
  1547. s += "&\\multicolumn{2}{r|}{%.3f}" % self.Q_p_value
  1548. c.append(s)
  1549. s1 = "\\\\\n".join(c)
  1550. s1 += "\\\\\n"
  1551. c = []
  1552. for mat in pmats:
  1553. c.append("\\hline\n")
  1554. for row in mat:
  1555. c.append(row+"\\\\\n")
  1556. c.append("\\hline\n")
  1557. c.append("\\end{tabular}")
  1558. s2 = "".join(c)
  1559. f.write(s1+s2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement