Advertisement
Guest User

Untitled

a guest
Mar 3rd, 2017
616
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.54 KB | None | 0 0
  1. from math import pi
  2. import pandas as pd
  3. import numpy as np
  4. import seaborn as sns
  5. from bokeh import mpl
  6. np.random.seed(0)
  7.  
  8. from bokeh.io import curdoc
  9. from bokeh.charts import Bar
  10. from bokeh.layouts import widgetbox, row, column
  11. from bokeh.models import ColumnDataSource, Select, Slider
  12. from bokeh.charts.attributes import ColorAttr, CatAttr
  13. from bokeh.plotting import figure
  14. from bokeh.palettes import Spectral6
  15.  
  16. from sklearn import cluster, datasets
  17. from sklearn.neighbors import kneighbors_graph
  18. from sklearn.preprocessing import StandardScaler
  19.  
  20. data_ngram1 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram1.csv')
  21. data_ngram2 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram2.csv')
  22.  
  23. # define some helper functions
  24. def get_dataset(dataset, n_samples):
  25. if dataset == 'n-gram 1':
  26. asdata = data_ngram1.copy()
  27. asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
  28. elif dataset == 'n-gram 2':
  29. asdata = data_ngram2.copy()
  30. asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
  31. return asdata
  32.  
  33. # set up initial data
  34. n_samples = 10
  35. dataset = 'n-gram 1'
  36.  
  37. asdata = get_dataset(dataset, n_samples)
  38.  
  39. # set up plot (styling in theme.yaml)
  40. source = ColumnDataSource(data=dict(x=asdata['aspects'].tolist(), y=asdata['importance'].values))
  41.  
  42. plot = figure(toolbar_location=None, x_range=source.data['x'])
  43. plot.vbar(x=source.data['x'], width=0.5, bottom=0, top='y', source=source)
  44. plot.xaxis.major_label_orientation = pi/2
  45.  
  46. datasets_names = [
  47. 'n-gram 1',
  48. 'n-gram 2'
  49. ]
  50.  
  51. dataset_select = Select(value='n-gram 1',
  52. title='Select dataset:',
  53. width=200,
  54. options=datasets_names)
  55.  
  56. samples_slider = Slider(title="Number of aspects",
  57. value=15,
  58. start=1,
  59. end=30,
  60. step=1,
  61. width=400)
  62.  
  63. def update_samples_or_dataset(attrname, old, new):
  64. global asdata
  65. dataset = dataset_select.value
  66. n_samples = int(samples_slider.value)
  67.  
  68. asdata = get_dataset(dataset, n_samples)
  69. source.data = dict(x=asdata['aspects'].tolist(), y=asdata['importance'].values)
  70.  
  71. dataset_select.on_change('value', update_samples_or_dataset)
  72. samples_slider.on_change('value', update_samples_or_dataset)
  73.  
  74. # set up layout
  75. selects = row(dataset_select, width=420)
  76. inputs = column(selects, widgetbox(samples_slider))
  77.  
  78. # add to document
  79. curdoc().add_root(row(inputs, plot))
  80. curdoc().title = "Clustering"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement