Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from gensim.corpora import Dictionary, MmCorpus\n",
- "from gensim.models.ldamodel import LdaModel\n",
- "from gensim.segmentation import S_One_Pre\n",
- "from gensim.matutils import argsort"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "texts = [['human', 'interface', 'computer'],\n",
- " ['survey', 'user', 'computer', 'system', 'response', 'time'],\n",
- " ['eps', 'user', 'interface', 'system'],\n",
- " ['system', 'human', 'system', 'eps'],\n",
- " ['user', 'response', 'time'],\n",
- " ['trees'],\n",
- " ['graph', 'trees'],\n",
- " ['graph', 'minors', 'trees'],\n",
- " ['graph', 'minors', 'survey']]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "dictionary = Dictionary(texts)\n",
- "corpus = [dictionary.doc2bow(text) for text in texts]\n",
- "MmCorpus.serialize('/tmp/deerwester.mm', corpus)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "topics = []\n",
- "str_topics = []\n",
- "lm = LdaModel(corpus=corpus)\n",
- "for topic in lm.state.get_lambda():\n",
- " topic = topic / topic.sum()\n",
- " bestn = argsort(topic, topn=3, reverse=True)\n",
- " topics.append(bestn)\n",
- " beststr = [(topic[id], lm.id2word[id]) for id in bestn]\n",
- " str_topics.append(beststr)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[ 9 10 7]\n"
- ]
- }
- ],
- "source": [
- "print topics[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{0: [(10, 9), (7, 9), (7, 10)]}\n"
- ]
- }
- ],
- "source": [
- "print S_One_Pre([topics[0]])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{0: [(10, 9), (7, 9), (7, 10)],\n",
- " 1: [(10, 9), (7, 9), (7, 10)],\n",
- " 2: [(10, 9), (7, 9), (7, 10)],\n",
- " 3: [(10, 9), (7, 9), (7, 10)],\n",
- " 4: [(10, 9), (7, 9), (7, 10)],\n",
- " 5: [(10, 9), (7, 9), (7, 10)],\n",
- " 6: [(10, 9), (7, 9), (7, 10)],\n",
- " 7: [(10, 9), (7, 9), (7, 10)],\n",
- " 8: [(10, 9), (7, 9), (7, 10)],\n",
- " 9: [(10, 9), (7, 9), (7, 10)],\n",
- " 10: [(10, 9), (7, 9), (7, 10)],\n",
- " 11: [(10, 9), (7, 9), (7, 10)],\n",
- " 12: [(10, 9), (7, 9), (7, 10)],\n",
- " 13: [(10, 9), (7, 9), (7, 10)],\n",
- " 14: [(10, 9), (7, 9), (7, 10)],\n",
- " 15: [(10, 9), (7, 9), (7, 10)],\n",
- " 16: [(10, 9), (7, 9), (7, 10)],\n",
- " 17: [(10, 9), (7, 9), (7, 10)],\n",
- " 18: [(10, 9), (7, 9), (7, 10)],\n",
- " 19: [(10, 9), (7, 9), (7, 10)],\n",
- " 20: [(10, 9), (7, 9), (7, 10)],\n",
- " 21: [(7, 4), (6, 4), (6, 7)],\n",
- " 22: [(10, 9), (7, 9), (7, 10)],\n",
- " 23: [(10, 9), (7, 9), (7, 10)],\n",
- " 24: [(10, 9), (7, 9), (7, 10)],\n",
- " 25: [(10, 9), (7, 9), (7, 10)],\n",
- " 26: [(1, 2), (0, 2), (0, 1)],\n",
- " 27: [(10, 9), (7, 9), (7, 10)],\n",
- " 28: [(10, 9), (7, 9), (7, 10)],\n",
- " 29: [(10, 9), (7, 9), (7, 10)],\n",
- " 30: [(10, 9), (7, 9), (7, 10)],\n",
- " 31: [(10, 9), (7, 9), (7, 10)],\n",
- " 32: [(10, 9), (7, 9), (7, 10)],\n",
- " 33: [(10, 9), (7, 9), (7, 10)],\n",
- " 34: [(10, 9), (7, 9), (7, 10)],\n",
- " 35: [(10, 9), (7, 9), (7, 10)],\n",
- " 36: [(10, 9), (7, 9), (7, 10)],\n",
- " 37: [(10, 9), (7, 9), (7, 10)],\n",
- " 38: [(10, 9), (7, 9), (7, 10)],\n",
- " 39: [(10, 9), (7, 9), (7, 10)],\n",
- " 40: [(10, 9), (7, 9), (7, 10)],\n",
- " 41: [(10, 9), (7, 9), (7, 10)],\n",
- " 42: [(10, 9), (7, 9), (7, 10)],\n",
- " 43: [(10, 9), (7, 9), (7, 10)],\n",
- " 44: [(10, 9), (7, 9), (7, 10)],\n",
- " 45: [(10, 9), (7, 9), (7, 10)],\n",
- " 46: [(10, 9), (7, 9), (7, 10)],\n",
- " 47: [(11, 5), (10, 5), (10, 11)],\n",
- " 48: [(10, 9), (7, 9), (7, 10)],\n",
- " 49: [(10, 9), (7, 9), (7, 10)],\n",
- " 50: [(10, 9), (7, 9), (7, 10)],\n",
- " 51: [(10, 9), (7, 9), (7, 10)],\n",
- " 52: [(10, 9), (7, 9), (7, 10)],\n",
- " 53: [(10, 9), (7, 9), (7, 10)],\n",
- " 54: [(10, 9), (7, 9), (7, 10)],\n",
- " 55: [(10, 9), (7, 9), (7, 10)],\n",
- " 56: [(10, 9), (7, 9), (7, 10)],\n",
- " 57: [(10, 9), (7, 9), (7, 10)],\n",
- " 58: [(10, 9), (7, 9), (7, 10)],\n",
- " 59: [(10, 9), (7, 9), (7, 10)],\n",
- " 60: [(10, 9), (7, 9), (7, 10)],\n",
- " 61: [(10, 9), (7, 9), (7, 10)],\n",
- " 62: [(10, 9), (7, 9), (7, 10)],\n",
- " 63: [(10, 9), (7, 9), (7, 10)],\n",
- " 64: [(10, 9), (7, 9), (7, 10)],\n",
- " 65: [(10, 9), (11, 9), (11, 10)],\n",
- " 66: [(10, 9), (7, 9), (7, 10)],\n",
- " 67: [(10, 9), (7, 9), (7, 10)],\n",
- " 68: [(10, 9), (7, 9), (7, 10)],\n",
- " 69: [(10, 9), (7, 9), (7, 10)],\n",
- " 70: [(10, 9), (7, 9), (7, 10)],\n",
- " 71: [(3, 4), (7, 4), (7, 3)],\n",
- " 72: [(10, 9), (7, 9), (7, 10)],\n",
- " 73: [(10, 9), (7, 9), (7, 10)],\n",
- " 74: [(10, 9), (7, 9), (7, 10)],\n",
- " 75: [(2, 6), (8, 6), (8, 2)],\n",
- " 76: [(6, 8), (0, 8), (0, 6)],\n",
- " 77: [(10, 9), (7, 9), (7, 10)],\n",
- " 78: [(10, 9), (7, 9), (7, 10)],\n",
- " 79: [(10, 9), (7, 9), (7, 10)],\n",
- " 80: [(10, 9), (7, 9), (7, 10)],\n",
- " 81: [(10, 9), (7, 9), (7, 10)],\n",
- " 82: [(10, 9), (7, 9), (7, 10)],\n",
- " 83: [(10, 9), (7, 9), (7, 10)],\n",
- " 84: [(10, 9), (7, 9), (7, 10)],\n",
- " 85: [(10, 9), (7, 9), (7, 10)],\n",
- " 86: [(10, 9), (7, 9), (7, 10)],\n",
- " 87: [(10, 9), (7, 9), (7, 10)],\n",
- " 88: [(10, 9), (7, 9), (7, 10)],\n",
- " 89: [(10, 9), (7, 9), (7, 10)],\n",
- " 90: [(10, 9), (7, 9), (7, 10)],\n",
- " 91: [(10, 9), (7, 9), (7, 10)],\n",
- " 92: [(10, 9), (7, 9), (7, 10)],\n",
- " 93: [(10, 9), (7, 9), (7, 10)],\n",
- " 94: [(10, 9), (7, 9), (7, 10)],\n",
- " 95: [(10, 9), (7, 9), (7, 10)],\n",
- " 96: [(10, 9), (7, 9), (7, 10)],\n",
- " 97: [(10, 9), (7, 9), (7, 10)],\n",
- " 98: [(10, 9), (7, 9), (7, 10)],\n",
- " 99: [(10, 9), (7, 9), (7, 10)]}"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "S_One_Pre(topics)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.11"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement