Advertisement
Guest User

Untitled

a guest
May 27th, 2016
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.34 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "collapsed": true
  8. },
  9. "outputs": [],
  10. "source": [
  11. "from gensim.corpora import Dictionary, MmCorpus\n",
  12. "from gensim.models.ldamodel import LdaModel\n",
  13. "from gensim.segmentation import S_One_Pre\n",
  14. "from gensim.matutils import argsort"
  15. ]
  16. },
  17. {
  18. "cell_type": "code",
  19. "execution_count": 2,
  20. "metadata": {
  21. "collapsed": true
  22. },
  23. "outputs": [],
  24. "source": [
  25. "texts = [['human', 'interface', 'computer'],\n",
  26. " ['survey', 'user', 'computer', 'system', 'response', 'time'],\n",
  27. " ['eps', 'user', 'interface', 'system'],\n",
  28. " ['system', 'human', 'system', 'eps'],\n",
  29. " ['user', 'response', 'time'],\n",
  30. " ['trees'],\n",
  31. " ['graph', 'trees'],\n",
  32. " ['graph', 'minors', 'trees'],\n",
  33. " ['graph', 'minors', 'survey']]"
  34. ]
  35. },
  36. {
  37. "cell_type": "code",
  38. "execution_count": 3,
  39. "metadata": {
  40. "collapsed": true
  41. },
  42. "outputs": [],
  43. "source": [
  44. "dictionary = Dictionary(texts)\n",
  45. "corpus = [dictionary.doc2bow(text) for text in texts]\n",
  46. "MmCorpus.serialize('/tmp/deerwester.mm', corpus)"
  47. ]
  48. },
  49. {
  50. "cell_type": "code",
  51. "execution_count": 4,
  52. "metadata": {
  53. "collapsed": false
  54. },
  55. "outputs": [],
  56. "source": [
  57. "topics = []\n",
  58. "str_topics = []\n",
  59. "lm = LdaModel(corpus=corpus)\n",
  60. "for topic in lm.state.get_lambda():\n",
  61. " topic = topic / topic.sum()\n",
  62. " bestn = argsort(topic, topn=3, reverse=True)\n",
  63. " topics.append(bestn)\n",
  64. " beststr = [(topic[id], lm.id2word[id]) for id in bestn]\n",
  65. " str_topics.append(beststr)"
  66. ]
  67. },
  68. {
  69. "cell_type": "code",
  70. "execution_count": 5,
  71. "metadata": {
  72. "collapsed": false
  73. },
  74. "outputs": [
  75. {
  76. "name": "stdout",
  77. "output_type": "stream",
  78. "text": [
  79. "[ 9 10 7]\n"
  80. ]
  81. }
  82. ],
  83. "source": [
  84. "print topics[0]"
  85. ]
  86. },
  87. {
  88. "cell_type": "code",
  89. "execution_count": 6,
  90. "metadata": {
  91. "collapsed": false
  92. },
  93. "outputs": [
  94. {
  95. "name": "stdout",
  96. "output_type": "stream",
  97. "text": [
  98. "{0: [(10, 9), (7, 9), (7, 10)]}\n"
  99. ]
  100. }
  101. ],
  102. "source": [
  103. "print S_One_Pre([topics[0]])"
  104. ]
  105. },
  106. {
  107. "cell_type": "code",
  108. "execution_count": 7,
  109. "metadata": {
  110. "collapsed": false
  111. },
  112. "outputs": [
  113. {
  114. "data": {
  115. "text/plain": [
  116. "{0: [(10, 9), (7, 9), (7, 10)],\n",
  117. " 1: [(10, 9), (7, 9), (7, 10)],\n",
  118. " 2: [(10, 9), (7, 9), (7, 10)],\n",
  119. " 3: [(10, 9), (7, 9), (7, 10)],\n",
  120. " 4: [(10, 9), (7, 9), (7, 10)],\n",
  121. " 5: [(10, 9), (7, 9), (7, 10)],\n",
  122. " 6: [(10, 9), (7, 9), (7, 10)],\n",
  123. " 7: [(10, 9), (7, 9), (7, 10)],\n",
  124. " 8: [(10, 9), (7, 9), (7, 10)],\n",
  125. " 9: [(10, 9), (7, 9), (7, 10)],\n",
  126. " 10: [(10, 9), (7, 9), (7, 10)],\n",
  127. " 11: [(10, 9), (7, 9), (7, 10)],\n",
  128. " 12: [(10, 9), (7, 9), (7, 10)],\n",
  129. " 13: [(10, 9), (7, 9), (7, 10)],\n",
  130. " 14: [(10, 9), (7, 9), (7, 10)],\n",
  131. " 15: [(10, 9), (7, 9), (7, 10)],\n",
  132. " 16: [(10, 9), (7, 9), (7, 10)],\n",
  133. " 17: [(10, 9), (7, 9), (7, 10)],\n",
  134. " 18: [(10, 9), (7, 9), (7, 10)],\n",
  135. " 19: [(10, 9), (7, 9), (7, 10)],\n",
  136. " 20: [(10, 9), (7, 9), (7, 10)],\n",
  137. " 21: [(7, 4), (6, 4), (6, 7)],\n",
  138. " 22: [(10, 9), (7, 9), (7, 10)],\n",
  139. " 23: [(10, 9), (7, 9), (7, 10)],\n",
  140. " 24: [(10, 9), (7, 9), (7, 10)],\n",
  141. " 25: [(10, 9), (7, 9), (7, 10)],\n",
  142. " 26: [(1, 2), (0, 2), (0, 1)],\n",
  143. " 27: [(10, 9), (7, 9), (7, 10)],\n",
  144. " 28: [(10, 9), (7, 9), (7, 10)],\n",
  145. " 29: [(10, 9), (7, 9), (7, 10)],\n",
  146. " 30: [(10, 9), (7, 9), (7, 10)],\n",
  147. " 31: [(10, 9), (7, 9), (7, 10)],\n",
  148. " 32: [(10, 9), (7, 9), (7, 10)],\n",
  149. " 33: [(10, 9), (7, 9), (7, 10)],\n",
  150. " 34: [(10, 9), (7, 9), (7, 10)],\n",
  151. " 35: [(10, 9), (7, 9), (7, 10)],\n",
  152. " 36: [(10, 9), (7, 9), (7, 10)],\n",
  153. " 37: [(10, 9), (7, 9), (7, 10)],\n",
  154. " 38: [(10, 9), (7, 9), (7, 10)],\n",
  155. " 39: [(10, 9), (7, 9), (7, 10)],\n",
  156. " 40: [(10, 9), (7, 9), (7, 10)],\n",
  157. " 41: [(10, 9), (7, 9), (7, 10)],\n",
  158. " 42: [(10, 9), (7, 9), (7, 10)],\n",
  159. " 43: [(10, 9), (7, 9), (7, 10)],\n",
  160. " 44: [(10, 9), (7, 9), (7, 10)],\n",
  161. " 45: [(10, 9), (7, 9), (7, 10)],\n",
  162. " 46: [(10, 9), (7, 9), (7, 10)],\n",
  163. " 47: [(11, 5), (10, 5), (10, 11)],\n",
  164. " 48: [(10, 9), (7, 9), (7, 10)],\n",
  165. " 49: [(10, 9), (7, 9), (7, 10)],\n",
  166. " 50: [(10, 9), (7, 9), (7, 10)],\n",
  167. " 51: [(10, 9), (7, 9), (7, 10)],\n",
  168. " 52: [(10, 9), (7, 9), (7, 10)],\n",
  169. " 53: [(10, 9), (7, 9), (7, 10)],\n",
  170. " 54: [(10, 9), (7, 9), (7, 10)],\n",
  171. " 55: [(10, 9), (7, 9), (7, 10)],\n",
  172. " 56: [(10, 9), (7, 9), (7, 10)],\n",
  173. " 57: [(10, 9), (7, 9), (7, 10)],\n",
  174. " 58: [(10, 9), (7, 9), (7, 10)],\n",
  175. " 59: [(10, 9), (7, 9), (7, 10)],\n",
  176. " 60: [(10, 9), (7, 9), (7, 10)],\n",
  177. " 61: [(10, 9), (7, 9), (7, 10)],\n",
  178. " 62: [(10, 9), (7, 9), (7, 10)],\n",
  179. " 63: [(10, 9), (7, 9), (7, 10)],\n",
  180. " 64: [(10, 9), (7, 9), (7, 10)],\n",
  181. " 65: [(10, 9), (11, 9), (11, 10)],\n",
  182. " 66: [(10, 9), (7, 9), (7, 10)],\n",
  183. " 67: [(10, 9), (7, 9), (7, 10)],\n",
  184. " 68: [(10, 9), (7, 9), (7, 10)],\n",
  185. " 69: [(10, 9), (7, 9), (7, 10)],\n",
  186. " 70: [(10, 9), (7, 9), (7, 10)],\n",
  187. " 71: [(3, 4), (7, 4), (7, 3)],\n",
  188. " 72: [(10, 9), (7, 9), (7, 10)],\n",
  189. " 73: [(10, 9), (7, 9), (7, 10)],\n",
  190. " 74: [(10, 9), (7, 9), (7, 10)],\n",
  191. " 75: [(2, 6), (8, 6), (8, 2)],\n",
  192. " 76: [(6, 8), (0, 8), (0, 6)],\n",
  193. " 77: [(10, 9), (7, 9), (7, 10)],\n",
  194. " 78: [(10, 9), (7, 9), (7, 10)],\n",
  195. " 79: [(10, 9), (7, 9), (7, 10)],\n",
  196. " 80: [(10, 9), (7, 9), (7, 10)],\n",
  197. " 81: [(10, 9), (7, 9), (7, 10)],\n",
  198. " 82: [(10, 9), (7, 9), (7, 10)],\n",
  199. " 83: [(10, 9), (7, 9), (7, 10)],\n",
  200. " 84: [(10, 9), (7, 9), (7, 10)],\n",
  201. " 85: [(10, 9), (7, 9), (7, 10)],\n",
  202. " 86: [(10, 9), (7, 9), (7, 10)],\n",
  203. " 87: [(10, 9), (7, 9), (7, 10)],\n",
  204. " 88: [(10, 9), (7, 9), (7, 10)],\n",
  205. " 89: [(10, 9), (7, 9), (7, 10)],\n",
  206. " 90: [(10, 9), (7, 9), (7, 10)],\n",
  207. " 91: [(10, 9), (7, 9), (7, 10)],\n",
  208. " 92: [(10, 9), (7, 9), (7, 10)],\n",
  209. " 93: [(10, 9), (7, 9), (7, 10)],\n",
  210. " 94: [(10, 9), (7, 9), (7, 10)],\n",
  211. " 95: [(10, 9), (7, 9), (7, 10)],\n",
  212. " 96: [(10, 9), (7, 9), (7, 10)],\n",
  213. " 97: [(10, 9), (7, 9), (7, 10)],\n",
  214. " 98: [(10, 9), (7, 9), (7, 10)],\n",
  215. " 99: [(10, 9), (7, 9), (7, 10)]}"
  216. ]
  217. },
  218. "execution_count": 7,
  219. "metadata": {},
  220. "output_type": "execute_result"
  221. }
  222. ],
  223. "source": [
  224. "S_One_Pre(topics)"
  225. ]
  226. }
  227. ],
  228. "metadata": {
  229. "kernelspec": {
  230. "display_name": "Python 2",
  231. "language": "python",
  232. "name": "python2"
  233. },
  234. "language_info": {
  235. "codemirror_mode": {
  236. "name": "ipython",
  237. "version": 2
  238. },
  239. "file_extension": ".py",
  240. "mimetype": "text/x-python",
  241. "name": "python",
  242. "nbconvert_exporter": "python",
  243. "pygments_lexer": "ipython2",
  244. "version": "2.7.11"
  245. }
  246. },
  247. "nbformat": 4,
  248. "nbformat_minor": 0
  249. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement