Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from math import pi
- import pandas as pd
- import numpy as np
- import seaborn as sns
- from bokeh import mpl
- np.random.seed(0)
- from bokeh.io import curdoc
- from bokeh.charts import Bar
- from bokeh.layouts import widgetbox, row, column
- from bokeh.models import ColumnDataSource, Select, Slider
- from bokeh.charts.attributes import ColorAttr, CatAttr
- from bokeh.plotting import figure
- from bokeh.palettes import Spectral6
- from sklearn import cluster, datasets
- from sklearn.neighbors import kneighbors_graph
- from sklearn.preprocessing import StandardScaler
- data_ngram1 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram1.csv')
- data_ngram2 = pd.read_csv('/Users/degravek/Downloads/sorted_ngram2.csv')
- # define some helper functions
- def get_dataset(dataset, n_samples):
- if dataset == 'n-gram 1':
- asdata = data_ngram1.copy()
- asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
- elif dataset == 'n-gram 2':
- asdata = data_ngram2.copy()
- asdata = asdata.head(n_samples).append(asdata.tail(n_samples))
- return asdata
- # set up initial data
- n_samples = 10
- dataset = 'n-gram 1'
- asdata = get_dataset(dataset, n_samples)
- # set up plot (styling in theme.yaml)
- source = ColumnDataSource(data=dict(x=asdata['aspects'].tolist(), y=asdata['importance'].values))
- plot = figure(toolbar_location=None, x_range=source.data['x'])
- plot.vbar(x=source.data['x'], width=0.5, bottom=0, top='y', source=source)
- plot.xaxis.major_label_orientation = pi/2
- datasets_names = [
- 'n-gram 1',
- 'n-gram 2'
- ]
- dataset_select = Select(value='n-gram 1',
- title='Select dataset:',
- width=200,
- options=datasets_names)
- samples_slider = Slider(title="Number of aspects",
- value=15,
- start=1,
- end=30,
- step=1,
- width=400)
- def update_samples_or_dataset(attrname, old, new):
- global asdata
- dataset = dataset_select.value
- n_samples = int(samples_slider.value)
- asdata = get_dataset(dataset, n_samples)
- source.data = dict(x=asdata['aspects'].tolist(), y=asdata['importance'].values)
- dataset_select.on_change('value', update_samples_or_dataset)
- samples_slider.on_change('value', update_samples_or_dataset)
- # set up layout
- selects = row(dataset_select, width=420)
- inputs = column(selects, widgetbox(samples_slider))
- # add to document
- curdoc().add_root(row(inputs, plot))
- curdoc().title = "Clustering"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement