- bokeh
- numpy
- pandas
- scikit-learn
- panel
- altair
import asyncio
from io import StringIO
from js import fetch
import altair as alt
import panel as pn
import pandas as pd
from panel.io.pyodide import show
from sklearn.cluster import KMeans
pn.config.sizing_mode = 'stretch_width'
data = await fetch('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv')
penguins = pd.read_csv(StringIO(await data.text())).dropna()
cols = list(penguins.columns)[2:6]
x = pn.widgets.Select(name='x', options=cols, value='bill_depth_mm')
y = pn.widgets.Select(name='y', options=cols, value='bill_length_mm')
n_clusters = pn.widgets.IntSlider(name='n_clusters', start=1, end=5, value=3)
brush = alt.selection_interval(name='brush') # selection of type "interval"
def get_clusters(n_clusters):
kmeans = KMeans(n_clusters=n_clusters)
est = kmeans.fit(penguins[cols].values)
df = penguins.copy()
df['labels'] = est.labels_.astype('str')
return df
def get_chart(x, y, df):
centers = df.groupby('labels').mean()
return (alt.Chart(df)
.mark_point(size=100)
.encode(
x=alt.X(x, scale=alt.Scale(zero=False)),
y=alt.Y(y, scale=alt.Scale(zero=False)),
shape='labels',
color='species'
).add_selection(brush).properties(width=800) +
alt.Chart(centers)
.mark_point(size=250, shape='cross', color='black')
.encode(x=x+':Q', y=y+':Q')
)
chart = pn.pane.Vega()
table = pn.widgets.Tabulator(pagination='remote', page_size=10)
def update_table(event=None):
table.value = get_clusters(n_clusters.value)
n_clusters.param.watch(update_table, 'value')
@pn.depends(x, y, n_clusters, watch=True)
def update_chart(*events):
chart.object = get_chart(x.value, y.value, table.value)
chart.selection.param.watch(update_filters, 'brush')
def update_filters(event=None):
filters = []
for k, v in (getattr(event, 'new') or {}).items():
filters.append(dict(field=k, type='>=', value=v[0]))
filters.append(dict(field=k, type='<=', value=v[1]))
table.filters = filters
update_table()
update_chart()
intro = """
This app provides an example of **building a simple dashboard using
Panel**.\n\nIt demonstrates how to take the output of **k-means
clustering on the Penguins dataset** using scikit-learn,
parameterizing the number of clusters and the variables to
plot.\n\nThe plot and the table are linked, i.e. selecting on the plot
will filter the data in the table.\n\n The **`x` marks the center** of
the cluster.
"""
await show(x, 'x-widget')
await show(y, 'y-widget')
await show(n_clusters, 'n-widget')
await show(intro, 'intro')
await show(chart, 'cluster-plot')
await show(table, 'table')