Stacked bar plot with gradient colors for each bar

57 Views Asked by At

I have made a stacked bar plot. I have tried to change three things to improve readability without luck - I am looking for suggestions for code that actually does this. I have only found javascript code and examples online.

  1. I would like to sort each column per count (see columns in current plot)
  2. I would like columns to be different colors in which variables are shades 3. I would like the legend to be sorted by column and not alphabetically
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

def predicted_intents_plot(excel_file):
    intents = pd.read_excel(excel_file, usecols=['root_intent_title','predicted_intent_title'])

    # Counts of largest values of root_intent_title column
    root_intents_counts = intents['root_intent_title'].value_counts()

    # Top 5 root counts
    top_5_root_intents = root_intents_counts.nlargest(5).index.tolist()

    # Filter the DataFrame to only include rows where 'root_intent_title' is in top_5_root_intents
    filtered_intents = intents[intents['root_intent_title'].isin(top_5_root_intents)]

    # Group by 'root_intent_title' and 'predicted_intent_title', and get the size of each group
    grouped_intents = filtered_intents.groupby(['root_intent_title','predicted_intent_title']).size()
        
    # For each 'root_intent_title', get the top 3 'predicted_intent_title'
    top_3_predicted_intents_per_root = grouped_intents.groupby(level=0).apply(lambda x: x.nlargest(3 if len(x) > 3 else len(x))).reset_index(level=1, drop=True)

    # Convert the Series to a DataFrame and reset the index
    top_3_predicted_intents_per_root = top_3_predicted_intents_per_root.reset_index()
    top_3_predicted_intents_per_root.columns = ['Root Intent Title', 'Intent', 'Counts']  # Rename the columns

    # Calculate the total 'Counts' for each 'Root Intent Title'
    total_counts = top_3_predicted_intents_per_root.groupby('Root Intent Title')['Counts'].sum().reset_index()
    total_counts.columns = ['Root Intent Title', 'Total Counts']

    # Merge the total counts with the original DataFrame
    top_3_predicted_intents_per_root = pd.merge(top_3_predicted_intents_per_root, total_counts, on='Root Intent Title')

    # Sort the DataFrame by 'Total Counts' in descending order
    top_3_predicted_intents_per_root.sort_values('Total Counts', ascending=False, inplace=True)

    # Sort the DataFrame by 'Root Intent Title' based on the order in 'Total Counts'
    top_3_predicted_intents_per_root['Root Intent Title'] = pd.Categorical(top_3_predicted_intents_per_root['Root Intent Title'], categories=total_counts.sort_values('Total Counts', ascending=False)['Root Intent Title'], ordered=True)

    # Sort the DataFrame by 'Counts' in descending order
    top_3_predicted_intents_per_root.sort_values('Counts', ascending=False, inplace=True)

    # Pivot the DataFrame
    pivot_df = top_3_predicted_intents_per_root.pivot(index='Root Intent Title', columns='Intent', values='Counts')
    
    # Define a list of colors for each 'Intent'
    colors = ['navy', 'blue', 'lightblue','darkgreen', 'green', 'lightgreen','maroon', 'red', 'salmon','purple', 'violet', 'plum','darkorange', 'orange', 'navajowhite']
    
    # Create a dictionary mapping each 'Intent' to a color
    color_dict = {col: colors[i % len(colors)] for i, col in enumerate(pivot_df.columns)}

    # Create a stacked bar plot
    ax = pivot_df.plot(kind='bar', stacked=True, color=[color_dict[col] for col in pivot_df.columns], figsize=(10,5))

    # Adjust labels, ticks, and layout
    plt.xlabel("", size = 10)  # Set x-axis label
    plt.ylabel("", size = 10)  # Set y-axis label
    plt.xticks(rotation=0, ha='center')  
    plt.tick_params(axis='both', which='both', length=0)  # Remove small lines (ticks)

    # Remove some spines from plot as well as set color to RGB (64, 64, 64) 
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_color((64/255, 64/255, 64/255))  

    # Change color of ticks and labels to RGB (64, 64, 64)
    ax.xaxis.label.set_color((64/255, 64/255, 64/255))
    ax.yaxis.label.set_color((64/255, 64/255, 64/255))
    for tick in ax.get_xticklabels():
        tick.set_color((64/255, 64/255, 64/255))
    for tick in ax.get_yticklabels():
        tick.set_color((64/255, 64/255, 64/255))

    # Move the legend outside the plot
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    plt.show()

The current plot

The ideal plot

0

There are 0 best solutions below