Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def bokeh_tukey_summary_boxplot(self): #
- from bokeh.plotting import figure, show, output_file
- from bokeh.embed import components
- """
- Some Py code to get the Current dataset_name
- """
- dataset_name = dataset_name
- from sqlalchemy import create_engine
- user = settings.DATABASES['default']['USER']
- password = settings.DATABASES['default']['PASSWORD']
- database_name = settings.DATABASES['default']['NAME']
- database_url = 'postgresql://{user}:{password}@localhost:5432/{database_name}'.format(
- user=user,
- password=password,
- database_name=database_name,
- )
- engine = create_engine(database_url, echo=False)
- schema_default_public = "public"
- sql_command = "SELECT * FROM {} ;".format(str(dataset_name)) #
- df_for_bokeh = pd.read_sql(sql_command,engine)
- groups = df_for_bokeh.groupby('segments')
- for key, item in groups:
- print(groups.get_group(key), "\n\n")
- q1 = groups.quantile(q=0.25)
- q2 = groups.quantile(q=0.5)
- q3 = groups.quantile(q=0.75)
- iqr = q3 - q1
- upper = q3 + 1.5*iqr
- lower = q1 - 1.5*iqr
- col_names_fromPSQL = list(df_for_bokeh) #
- ls_SeriesName = []
- ls_SeriesUnqCnts = []
- for k in range(len(col_names_fromPSQL)):
- series_name = str(col_names_fromPSQL[k])
- ls_SeriesName.append(series_name)
- unq_values_list = df_for_bokeh[series_name].unique()
- ls_SeriesUnqCnts.append(len(unq_values_list))
- df_calcUnq = pd.DataFrame({'ls_SeriesName':ls_SeriesName,'ls_SeriesUnqCnts':ls_SeriesUnqCnts})
- min_valIndex = df_calcUnq['ls_SeriesUnqCnts'].idxmin()
- col_with_CategoricalValues = df_calcUnq.iloc[min_valIndex]['ls_SeriesName']
- unq_values_list_final = df_for_bokeh[col_with_CategoricalValues].unique()
- list_of_other_Cols = []
- for k in range(len(col_names_fromPSQL)):
- if str(col_names_fromPSQL[k]) == col_with_CategoricalValues:
- pass
- else:
- list_of_other_Cols.append(str(col_names_fromPSQL[k]))
- # find the outliers for each category
- def outliers(group):
- unq_segments_list = df_for_bokeh[col_with_CategoricalValues].unique() #
- values_col = list_of_other_Cols[0] #
- for k in range(len(unq_segments_list)):
- cat = unq_segments_list[k] #
- return group[(group.height > upper.loc[cat][values_col])][values_col]
- out = groups.apply(outliers).dropna()
- # prepare outlier data for plotting, we need coordinates for every outlier.
- if not out.empty:
- outx = []
- outy = []
- for keys in out.index:
- outx.append(keys[0])
- outy.append(out.loc[keys[0]].loc[keys[1]])
- # import bokeh.plotting as bk
- # from bokeh.models import HoverTool
- # source = bk.ColumnDataSource(df_for_bokeh)
- # hover = HoverTool(
- # tooltips = [
- # ("index1", "@segments")
- # ]
- # )
- TOOLTIPS = """
- <div style="background-color:orange;">
- <div>
- <span style="font-size: 15px; color: #966;">@name</span>
- </div>
- <div>
- <span style="font-size: 10px; color: black;">($y{int})</span>
- </div>
- </div>
- """
- unq_segments_list = df_for_bokeh['segments'].unique() #
- cats = unq_segments_list
- p = figure(tools="", background_fill_color="#efefef", x_range=cats,plot_width=195, plot_height=550,tooltips=TOOLTIPS)
- # if no outliers, shrink lengths of stems to be no longer than the minimums or maximums
- qmin = groups.quantile(q=0.00)
- qmax = groups.quantile(q=1.00)
- upper.height = [min([x,y]) for (x,y) in zip(list(qmax.loc[:,'height']),upper.height)]
- lower.height = [max([x,y]) for (x,y) in zip(list(qmin.loc[:,'height']),lower.height)]
- p.segment(cats, upper.height, cats, q3.height, line_color="black") #
- p.segment(cats, lower.height, cats, q1.height, line_color="black")
- # boxes
- p.vbar(cats, 0.7, q2.height, q3.height, fill_color="#E08E79", line_color="black")
- p.vbar(cats, 0.7, q1.height, q2.height, fill_color="#3B8686", line_color="black")
- # whiskers (almost-0 height rects simpler than segments)
- p.rect(cats, lower.height, 0.2, 0.01, line_color="black")
- #height
- p.rect(cats, upper.height, 0.2, 0.01, line_color="black")
- # outliers
- if not out.empty:
- p.circle(outx, outy, size=6, color="#F38630", fill_alpha=0.6)
- p.xgrid.grid_line_color = None
- p.ygrid.grid_line_color = "white"
- p.grid.grid_line_width = 2
- p.xaxis.major_label_text_font_size="12pt"
- p.toolbar.logo = None
- p.toolbar_location = None
- js_boxplot, div_boxplot = components(p)
- cdn_js_boxplot=CDN.js_files[0]
- cdn_css_boxplot=CDN.css_files[0]
- return js_boxplot,div_boxplot ,cdn_js_boxplot,cdn_css_boxplot
Add Comment
Please, Sign In to add comment