I'm trying to recreate the pyLDAvis chart in Altair. I have a VL spec with a lot of transforms in it and I'm having trouble converting that to Altair. All credit goes here and here for helping me to get this far.
I think I'm getting close but I get the following error:
Altair.vegalite.v4.schema.channels.ColorValue, validating 'additionalProperties'
Additional properties are not allowed ('selection' was unexpected)
In the end, I'm most concerned about whether or not I translated all the transforms
correctly from VL to Altair.
Any help is much appreciated as I think this would be a nice contribution to the NLP/Topic Modeling community.
import altair as alt
import pandas as pd
import numpy as np
data={
'Term': ['algorithm','learning','learning','algorithm','algorithm','learning'],
'Freq_x': [1330,1353,304.42,296.69,157.59,140.35],
'Total': [1330, 1353,1353.7,1330.47,1330.47,1353.7],
'Category': ['Default', 'Default', 'Topic1', 'Topic1', 'Topic2', 'Topic2'],
'logprob': [30.0, 27.0, -5.116, -5.1418, -5.4112, -5.5271],
'loglift': [30.0, 27.0, 0.0975, 0.0891, -0.1803, -0.3135],
'saliency_ind': [0, 3, 76, 77, 181, 186],
'x': [np.nan,np.nan,-0.0080,-0.0080,-0.0053,-0.0053],
'y': [np.nan,np.nan,-0.0056,-0.0056, 0.0003,0.0003],
'topics': [np.nan, np.nan, 1.0, 1.0, 2.0, 2.0],
'cluster': [np.nan, np.nan, 1.0, 1.0, 1.0, 1.0],
'Freq_y': [np.nan,np.nan,20.39,20.39,14.18,14.18]}
df=pd.DataFrame(data)
pts = alt.selection(type="single", fields=['Category'], empty='none')
points=alt.Chart().mark_circle(tooltip=True).encode(
x='mean(x)',
y='mean(y)',
size='Freq_y',
tooltip=['topics', 'cluster'],
detail='Category',
color=alt.condition(pts, alt.value('#F28E2B'), alt.value('#4E79A7'))
).add_selection(pts)
trans=alt.Chart(
).transform_joinaggregate(
max_fx='max(Freq_x)'
).transform_calculate(
filterCategory="selector046['Category'] ? selector046['Category'] : []"
).transform_calculate(
filtered_Freq_x="indexof(datum.filterCategory,datum['Category']) > -1 ? datum['Freq_x'] : null"
).transform_window(
Sorted='rank()',
sort=[{'field': "filtered_Freq_x:Q", "order": "descending"}]
)
b1=alt.Chart().mark_bar().encode(
x='Freq_x',
y=alt.Y('Term', sort=alt.SortField("Sorted")),
tooltip=['Total'],
)
b2=alt.Chart().mark_bar(color='#F28E2B').encode(
x='filtered_Freq_x:Q',
y=alt.Y('Term', sort=alt.SortField("Sorted")),
tooltip=['Total'],
)
bars_1=trans+b1
bars_2=trans+b2
alt.hconcat(points,bars_1+bars_2, data=df).resolve_legend(
color="independent",
size="independent"
)