Advertisement
Guest User

Untitled

a guest
Mar 5th, 2017
326
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.35 KB | None | 0 0
  1. from bokeh import mpl
  2. from math import pi
  3. import seaborn as sns
  4. import pandas as pd
  5. import numpy as np
  6. np.random.seed(0)
  7.  
  8. from bokeh.io import curdoc
  9. from bokeh.charts import Bar
  10. from bokeh.models.widgets import Panel, Tabs
  11. from bokeh.layouts import layout, widgetbox, row, column
  12. from bokeh.models import ColumnDataSource, Select, Slider
  13. from bokeh.charts.attributes import ColorAttr, CatAttr
  14. from bokeh.plotting import figure
  15. from bokeh.palettes import Spectral6
  16.  
  17. from sklearn import cluster, datasets
  18. from sklearn.neighbors import kneighbors_graph
  19. from sklearn.preprocessing import StandardScaler
  20.  
  21. data_ngram1 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram1.csv')
  22. data_ngram2 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram2.csv')
  23. data_ngram3 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram3.csv')
  24. data_chunk = pd.read_csv('/Users/degravek/Downloads/sorted_chunk.csv')
  25. data_rake = pd.read_csv('/Users/degravek/Downloads/sorted_rake.csv')
  26.  
  27. # define some helper functions
  28. def get_dataset(dataset, n_samples):
  29. if dataset == 'n-gram 1':
  30. asdata = data_ngram1.copy()
  31. asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
  32. elif dataset == 'n-gram 2':
  33. asdata = data_ngram2.copy()
  34. asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
  35. elif dataset == 'n-gram 3':
  36. asdata = data_ngram3.copy()
  37. asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
  38. elif dataset == 'chunk':
  39. asdata = data_chunk.copy()
  40. asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
  41. elif dataset == 'rake':
  42. asdata = data_rake.copy()
  43. asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
  44. return asdata
  45.  
  46. # set up initial data
  47. n_samples = 15
  48. dataset = 'n-gram 1'
  49.  
  50. asdata = get_dataset(dataset, n_samples)
  51.  
  52. # set up plot (styling in theme.yaml)
  53. source = ColumnDataSource(data=dict(x=asdata['aspects'].tolist(), y=asdata['importance'].values))
  54.  
  55. def mplot(source):
  56. p = pd.DataFrame()
  57. p['aspects'] = source.data['x']
  58. p['importance'] = source.data['y']
  59. plot = Bar(p, values='importance', label='aspects', legend=False)
  60. return plot
  61.  
  62. plot = mplot(source)
  63.  
  64. datasets_names = [
  65. 'n-gram 1',
  66. 'n-gram 2',
  67. 'n-gram 3',
  68. 'chunk',
  69. 'rake'
  70. ]
  71.  
  72. dataset_select = Select(value='n-gram 1',
  73. title='Select dataset:',
  74. width=200,
  75. options=datasets_names)
  76.  
  77. samples_slider = Slider(title="Number of aspects",
  78. value=15,
  79. start=1,
  80. end=30,
  81. step=1,
  82. width=200)
  83.  
  84. def update_samples_or_dataset(attrname, old, new):
  85. dataset = dataset_select.value
  86. n_samples = int(samples_slider.value)
  87.  
  88. asdata = get_dataset(dataset, n_samples)
  89. plot.x_range.factors = asdata['aspects'].tolist() # this was missing
  90. source.data = dict(x=asdata['aspects'].tolist(), y=asdata['importance'].values)
  91.  
  92. dataset_select.on_change('value', update_samples_or_dataset)
  93. samples_slider.on_change('value', update_samples_or_dataset)
  94.  
  95. # set up layout
  96. selects = row(dataset_select, width=420)
  97. inputs = column(selects, widgetbox(samples_slider))
  98.  
  99. # add to document
  100. curdoc().add_root(row(inputs, plot))
  101. curdoc().title = "Clustering"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement