Best way to generate a high quality word-cloud image for a Dash-app

4.6k Views Asked by At

I have an issue with my wordclouds displaying very pixelated on my plotly dash app. I am using the wordcloud class to generate the wordcloud, then using go.Figure() with a go.Image() trace to generate the plot that is returned to my dash app as a dcc.Graph(). I have copied the code to the end of this post for reference.

I cannot seem to get the wordcloud image to be clear (wordcloud image shown below). I found a great SO post from mfitzp here which describes very clearly how to do this when using matplotlib; via setting the figsize and using the tight_layout() method. However, I cannot find a way to replicate this method using plotly.

How can I generate a high quality wordcloud using plotly that looks as good as the one posted by mfitzp? If plotly.go is not the best approach, please advise on how I can correctly perform this operation. I need to return the wordcloud to the app so that it can be displayed on screen, so plt.imshow() doesnt seem to be a valid approach for this use case (unless there is something I don't know). It's also necessary for the returned element to have an 'id' as this gets updated later in the code.

Here is my current code (I have replaced some variables with values for simplicity):

import plotly.graph_objects as go
from dash import dcc
from wordcloud import STOPWORDS
from wordcloud import WordCloud

def generate_wordcloud_div(wordcloud_exclusions, input_df, archetype_or_group):
    # save classname
    archetype_or_group = str(archetype_or_group)

    # add search query to list of exclusions
    excluded_words = wordcloud_exclusions + list(STOPWORDS)

    # instantiate wordcloud
    wordcloud = WordCloud(
        stopwords=excluded_words,
        width=900,
        height=400,
        background_color="#F9F9FA",
        colormap="viridis",
        collocations=True,
        regexp=r"[a-zA-z#&]+",
        max_words=30,
        min_word_length=4,
        font_path="assets/Arial Unicode.ttf"
    )

    # generate image
    wordcloud_text = " ".join(text for text in input_df["wordcloud_text"])
    wordcloud_image = wordcloud.generate(wordcloud_text)

    fig = go.Figure()
    fig.add_trace(go.Image(z=wordcloud_image))
    fig.update_layout(
        height=400,
        xaxis={"visible": False},
        yaxis={"visible": False},
        margin={"t": 0, "b": 0, "l": 0, "r": 0},
        hovermode=False,
        paper_bgcolor="#F9F9FA",
        plot_bgcolor="#F9F9FA",
    )

    return dcc.Graph(
        id=f"wordcloud_{archetype_or_group}",
        figure=fig,
        config={"displayModeBar": False},
    )

I have tried to double the size of the wordcloud (width=1800, height=800 in the wordcloud definition), hoping that when it got shrunk down to the figure dimensions it would be more clear, but the image still comes out pixelated.

I have also tried to set the dx and dy values to 900 and 400, respectively, in the go.Image() definition, but again, the image comes out pixelated.

Any advice?

Many thanks for any assistance here!

wordcloud image

2

There are 2 best solutions below

0
Waleed Alfaris On BEST ANSWER

I realized that the loss of quality was stemming from the conversion of the wordcloud image to the plotly graph objects go.image(). To avoid this loss of quality, I created the wordcloud as a pixel array and used this to generate the go.image().

def generate_dash_component(archetype_or_group, fig):
    return dcc.Graph(
        id=f"wordcloud_{archetype_or_group}",
        figure=fig,
        style={"height": 250px},
        config={"displayModeBar": False, "autosizable": True, "responsive": True},
    )


def generate_wordcloud_fig(wordcloud_image):
    fig = px.imshow(wordcloud_image)
    fig.update_layout(
        xaxis={'visible': False},
        yaxis={'visible': False},
        margin={'t': 0, 'b': 0, 'l': 0, 'r': 0},
        hovermode=False,
        paper_bgcolor="#F9F9FA",
        plot_bgcolor="#F9F9FA",
    )
    return fig


def generate_wordcloud_div(wordcloud_exclusions, input_df, archetype_or_group):
    """
    Function that will generate and save wordcloud.
    Text being analyzed already has general stopwords
    removed from earlier preprocessing. Will exclude
    search query only.
    Classname will be used in filename.
    """
    # save classname
    archetype_or_group = str(archetype_or_group)

    # add search query to list of exclusions
    excluded_words = wordcloud_exclusions + list(STOPWORDS)

    # instantiate wordcloud
    wordcloud = WordCloud(
        stopwords=excluded_words,
        min_font_size=8,
        scale=2.5,
        background_color='#F9F9FA',
        collocations=True,
        regexp=r"[a-zA-z#&]+",
        max_words=30,
        min_word_length=4,
        font_path='storage/fonts/Arial-Unicode.ttf',
        collocation_threshold=3,
        colormap=truncate_cmap(plt.get_cmap('ocean'), 0, 0.7),
    )

    # generate image
    wordcloud_text = " ".join(text for text in input_df["wordcloud_text"])
    wordcloud_image = wordcloud.generate(wordcloud_text)
    wordcloud_image = wordcloud_image.to_array()
    fig = generate_wordcloud_fig(wordcloud_image)
    return generate_dash_component(archetype_or_group, fig)
5
hoa tran On

I'm using below code to make wordcloud on dash and it returns good quality image:

import pandas as pd
import numpy as np
import plotly.express as px
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import plotly.graph_objects as go
from io import BytesIO
from wordcloud import WordCloud
import base64
import dash.dependencies as dd

df1 = pd.DataFrame({
    'Title': ['Aaaaaaaaaaaaaaaaaaaaaa','Bbbbbbbbbbbbbbbbbbb','Cccccccccccccccc','Ddddddddddddddddddd','Eeeeeeeeeeeeeeeeeeeeeeee'],
    'Count': [5,15,20,50,10]})

app = dash.Dash(__name__,external_stylesheets=[dbc.themes.LUX])
app.layout = html.Div([
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H5('Word Cloud',className='text-center'),
                    html.Img(id="image_wc"),
                ])
            ])
        ],width={'size':12,"offset":0,'order':1},style={'padding-left' : 25,'padding-right' : 25},className='text-center'),
    ])
])
    
def plot_wordcloud(data):
    d = {a: x for a, x in data.values}
    wc = WordCloud(background_color='white', width=1080, height=360)
    wc.fit_words(d)
    return wc.to_image()

@app.callback(dd.Output('image_wc', 'src'), [dd.Input('image_wc', 'id')])
def make_image(b):
    img = BytesIO()
    plot_wordcloud(data=df1).save(img, format='PNG')
    return 'data:image/png;base64,{}'.format(base64.b64encode(img.getvalue()).decode())
   
if __name__ == "__main__":
    app.run_server(debug=False)

Image: enter image description here