Vega-lite to Altair - Sorting a chart based on the dynamically updated axis

233 Views Asked by At

I'm trying to recreate the pyLDAvis chart in Altair. I have a VL spec with a lot of transforms in it and I'm having trouble converting that to Altair. All credit goes here and here for helping me to get this far.

I think I'm getting close but I get the following error:

Altair.vegalite.v4.schema.channels.ColorValue, validating 'additionalProperties'
Additional properties are not allowed ('selection' was unexpected)

In the end, I'm most concerned about whether or not I translated all the transforms correctly from VL to Altair.

Any help is much appreciated as I think this would be a nice contribution to the NLP/Topic Modeling community.

import altair as alt
import pandas as pd
import numpy as np

data={
 'Term': ['algorithm','learning','learning','algorithm','algorithm','learning'],
 'Freq_x': [1330,1353,304.42,296.69,157.59,140.35],
 'Total': [1330, 1353,1353.7,1330.47,1330.47,1353.7],
 'Category': ['Default', 'Default', 'Topic1', 'Topic1', 'Topic2', 'Topic2'],
 'logprob': [30.0, 27.0, -5.116, -5.1418, -5.4112, -5.5271],
 'loglift': [30.0, 27.0, 0.0975, 0.0891, -0.1803, -0.3135],
 'saliency_ind': [0, 3, 76, 77, 181, 186],
 'x': [np.nan,np.nan,-0.0080,-0.0080,-0.0053,-0.0053],
 'y': [np.nan,np.nan,-0.0056,-0.0056, 0.0003,0.0003],
 'topics': [np.nan, np.nan, 1.0, 1.0, 2.0, 2.0],
 'cluster': [np.nan, np.nan, 1.0, 1.0, 1.0, 1.0],
 'Freq_y': [np.nan,np.nan,20.39,20.39,14.18,14.18]}

df=pd.DataFrame(data)

pts = alt.selection(type="single", fields=['Category'], empty='none')

points=alt.Chart().mark_circle(tooltip=True).encode(
    x='mean(x)',
    y='mean(y)',
    size='Freq_y',
    tooltip=['topics', 'cluster'],
    detail='Category',
    color=alt.condition(pts, alt.value('#F28E2B'), alt.value('#4E79A7'))
).add_selection(pts)

trans=alt.Chart(
    ).transform_joinaggregate(
        max_fx='max(Freq_x)'
    ).transform_calculate(
        filterCategory="selector046['Category'] ? selector046['Category'] : []"
    ).transform_calculate(
      filtered_Freq_x="indexof(datum.filterCategory,datum['Category']) > -1 ? datum['Freq_x'] : null"
    ).transform_window(
        Sorted='rank()',
        sort=[{'field': "filtered_Freq_x:Q", "order": "descending"}]
    )

b1=alt.Chart().mark_bar().encode(
    x='Freq_x',
    y=alt.Y('Term', sort=alt.SortField("Sorted")),
    tooltip=['Total'],
)

b2=alt.Chart().mark_bar(color='#F28E2B').encode(
    x='filtered_Freq_x:Q',
    y=alt.Y('Term', sort=alt.SortField("Sorted")),
    tooltip=['Total'],
)

bars_1=trans+b1
bars_2=trans+b2

alt.hconcat(points,bars_1+bars_2, data=df).resolve_legend(
    color="independent",
    size="independent"
)

0

There are 0 best solutions below