Guest User

Python BoxPlot with Bokeh - Multiple data points displayed

a guest
May 7th, 2019
170
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.34 KB | None | 0 0
  1. def bokeh_tukey_summary_boxplot(self): #
  2.         from bokeh.plotting import figure, show, output_file
  3.         from bokeh.embed import components
  4.         """
  5.        Some Py code to get the Current dataset_name
  6.        """
  7.         dataset_name = dataset_name
  8.        
  9.         from sqlalchemy import create_engine
  10.         user = settings.DATABASES['default']['USER']
  11.         password = settings.DATABASES['default']['PASSWORD']
  12.         database_name = settings.DATABASES['default']['NAME']
  13.         database_url = 'postgresql://{user}:{password}@localhost:5432/{database_name}'.format(
  14.             user=user,
  15.             password=password,
  16.             database_name=database_name,
  17.         )
  18.         engine = create_engine(database_url, echo=False)
  19.         schema_default_public = "public"
  20.         sql_command = "SELECT * FROM {} ;".format(str(dataset_name)) #
  21.         df_for_bokeh = pd.read_sql(sql_command,engine)
  22.         groups = df_for_bokeh.groupby('segments')
  23.         for key, item in groups:
  24.             print(groups.get_group(key), "\n\n")
  25.         q1 = groups.quantile(q=0.25)
  26.         q2 = groups.quantile(q=0.5)
  27.         q3 = groups.quantile(q=0.75)
  28.         iqr = q3 - q1
  29.         upper = q3 + 1.5*iqr
  30.         lower = q1 - 1.5*iqr
  31.         col_names_fromPSQL = list(df_for_bokeh) #
  32.         ls_SeriesName = []
  33.         ls_SeriesUnqCnts = []
  34.         for k in range(len(col_names_fromPSQL)):
  35.             series_name = str(col_names_fromPSQL[k])
  36.             ls_SeriesName.append(series_name)
  37.             unq_values_list = df_for_bokeh[series_name].unique()
  38.             ls_SeriesUnqCnts.append(len(unq_values_list))
  39.         df_calcUnq = pd.DataFrame({'ls_SeriesName':ls_SeriesName,'ls_SeriesUnqCnts':ls_SeriesUnqCnts})
  40.         min_valIndex = df_calcUnq['ls_SeriesUnqCnts'].idxmin()
  41.         col_with_CategoricalValues = df_calcUnq.iloc[min_valIndex]['ls_SeriesName']
  42.         unq_values_list_final = df_for_bokeh[col_with_CategoricalValues].unique()
  43.         list_of_other_Cols = []
  44.         for k in range(len(col_names_fromPSQL)):
  45.             if str(col_names_fromPSQL[k]) == col_with_CategoricalValues:
  46.                 pass
  47.             else:
  48.                 list_of_other_Cols.append(str(col_names_fromPSQL[k]))
  49.         # find the outliers for each category
  50.         def outliers(group):
  51.             unq_segments_list = df_for_bokeh[col_with_CategoricalValues].unique() #
  52.             values_col = list_of_other_Cols[0] #
  53.             for k in range(len(unq_segments_list)):
  54.                 cat = unq_segments_list[k] #
  55.             return group[(group.height > upper.loc[cat][values_col])][values_col]
  56.         out = groups.apply(outliers).dropna()
  57.         # prepare outlier data for plotting, we need coordinates for every outlier.
  58.         if not out.empty:
  59.             outx = []
  60.             outy = []
  61.             for keys in out.index:
  62.                 outx.append(keys[0])
  63.                 outy.append(out.loc[keys[0]].loc[keys[1]])
  64.  
  65.         # import bokeh.plotting as bk
  66.         # from bokeh.models import HoverTool
  67.  
  68.         # source = bk.ColumnDataSource(df_for_bokeh)
  69.         # hover = HoverTool(
  70.         #             tooltips = [
  71.         #                 ("index1", "@segments")
  72.         #                         ]
  73.         #                     )
  74.         TOOLTIPS = """
  75.            <div style="background-color:orange;">
  76.                <div>
  77.                    <span style="font-size: 15px; color: #966;">@name</span>
  78.                </div>
  79.                
  80.                <div>
  81.                    <span style="font-size: 10px; color: black;">($y{int})</span>
  82.                </div>
  83.            </div>
  84.        """                        
  85.  
  86.         unq_segments_list = df_for_bokeh['segments'].unique() #
  87.         cats = unq_segments_list
  88.         p = figure(tools="", background_fill_color="#efefef", x_range=cats,plot_width=195, plot_height=550,tooltips=TOOLTIPS)
  89.         # if no outliers, shrink lengths of stems to be no longer than the minimums or maximums
  90.         qmin = groups.quantile(q=0.00)
  91.         qmax = groups.quantile(q=1.00)
  92.         upper.height = [min([x,y]) for (x,y) in zip(list(qmax.loc[:,'height']),upper.height)]
  93.         lower.height = [max([x,y]) for (x,y) in zip(list(qmin.loc[:,'height']),lower.height)]
  94.         p.segment(cats, upper.height, cats, q3.height, line_color="black") #
  95.         p.segment(cats, lower.height, cats, q1.height, line_color="black")
  96.         # boxes
  97.         p.vbar(cats, 0.7, q2.height, q3.height, fill_color="#E08E79", line_color="black")
  98.         p.vbar(cats, 0.7, q1.height, q2.height, fill_color="#3B8686", line_color="black")
  99.         # whiskers (almost-0 height rects simpler than segments)
  100.         p.rect(cats, lower.height, 0.2, 0.01, line_color="black")
  101.         #height
  102.         p.rect(cats, upper.height, 0.2, 0.01, line_color="black")
  103.         # outliers
  104.         if not out.empty:
  105.             p.circle(outx, outy, size=6, color="#F38630", fill_alpha=0.6)
  106.         p.xgrid.grid_line_color = None
  107.         p.ygrid.grid_line_color = "white"
  108.         p.grid.grid_line_width = 2
  109.         p.xaxis.major_label_text_font_size="12pt"
  110.         p.toolbar.logo = None
  111.         p.toolbar_location = None
  112.        
  113.         js_boxplot, div_boxplot = components(p)
  114.         cdn_js_boxplot=CDN.js_files[0]
  115.         cdn_css_boxplot=CDN.css_files[0]
  116.        
  117.         return js_boxplot,div_boxplot ,cdn_js_boxplot,cdn_css_boxplot
Add Comment
Please, Sign In to add comment