Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bokeh import mpl
- from math import pi
- import seaborn as sns
- import pandas as pd
- import numpy as np
- np.random.seed(0)
- from bokeh.io import curdoc
- from bokeh.charts import Bar
- from bokeh.models.widgets import Panel, Tabs
- from bokeh.layouts import layout, widgetbox, row, column
- from bokeh.models import ColumnDataSource, Select, Slider
- from bokeh.charts.attributes import ColorAttr, CatAttr
- from bokeh.plotting import figure
- from bokeh.palettes import Spectral6
- from sklearn import cluster, datasets
- from sklearn.neighbors import kneighbors_graph
- from sklearn.preprocessing import StandardScaler
- data_ngram1 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram1.csv')
- data_ngram2 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram2.csv')
- data_ngram3 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram3.csv')
- data_chunk = pd.read_csv('/Users/degravek/Downloads/sorted_chunk.csv')
- data_rake = pd.read_csv('/Users/degravek/Downloads/sorted_rake.csv')
- # define some helper functions
- def get_dataset(dataset, n_samples):
- if dataset == 'n-gram 1':
- asdata = data_ngram1.copy()
- asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
- elif dataset == 'n-gram 2':
- asdata = data_ngram2.copy()
- asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
- elif dataset == 'n-gram 3':
- asdata = data_ngram3.copy()
- asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
- elif dataset == 'chunk':
- asdata = data_chunk.copy()
- asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
- elif dataset == 'rake':
- asdata = data_rake.copy()
- asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
- return asdata
- # set up initial data
- n_samples = 15
- dataset = 'n-gram 1'
- asdata = get_dataset(dataset, n_samples)
- # set up plot (styling in theme.yaml)
- source = ColumnDataSource(data=dict(x=asdata['aspects'].tolist(), y=asdata['importance'].values))
- def mplot(source):
- p = pd.DataFrame()
- p['aspects'] = source.data['x']
- p['importance'] = source.data['y']
- plot = Bar(p, values='importance', label='aspects', legend=False)
- return plot
- plot = mplot(source)
- datasets_names = [
- 'n-gram 1',
- 'n-gram 2',
- 'n-gram 3',
- 'chunk',
- 'rake'
- ]
- dataset_select = Select(value='n-gram 1',
- title='Select dataset:',
- width=200,
- options=datasets_names)
- samples_slider = Slider(title="Number of aspects",
- value=15,
- start=1,
- end=30,
- step=1,
- width=200)
- def update_samples_or_dataset(attrname, old, new):
- dataset = dataset_select.value
- n_samples = int(samples_slider.value)
- asdata = get_dataset(dataset, n_samples)
- plot.x_range.factors = asdata['aspects'].tolist() # this was missing
- source.data = dict(x=asdata['aspects'].tolist(), y=asdata['importance'].values)
- dataset_select.on_change('value', update_samples_or_dataset)
- samples_slider.on_change('value', update_samples_or_dataset)
- # set up layout
- selects = row(dataset_select, width=420)
- inputs = column(selects, widgetbox(samples_slider))
- # add to document
- curdoc().add_root(row(inputs, plot))
- curdoc().title = "Clustering"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement