How to plot 4 figures per page with pdfpages in matplotlib?

147 Views Asked by At

I have the code below which produces the output I want.

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
plt.style.use('ggplot')
%matplotlib inline

data = dict({'Variable_Grouping':['Type_A', 'Type_A', 'Type_A', 'Type_C', 'Type_C', 'Type_C', 'Type_C', 'Type_D', 'Type_D', 'Type_E', 'Type_E', 'Type_E', 'Type_H', 'Type_H'], 'Variable':['a1', 'a2', 'a3', 'c1', 'c2', 'c3', 'c4', 'd1', 'd2', 'e1', 'e2', 'e3', 'h1', 'h2'], 'Count':[5, 3, 8, 4, 3, 9, 5, 3, 8, 5, 3, 8, 5, 3],'Percent':[0.0625, 0.125, 0.4375, 0.0, 0.125, 0.5, 0.02, 0.125, 0.03, 0.0625, 0.05, 0.44, 0.07, 0.023]})
to_plot = pd.DataFrame(data)

g = sns.FacetGrid(to_plot, col='Variable_Grouping', col_wrap = 2, sharex=False, sharey = False, height = 5, aspect = 1, margin_titles=True)
g=g.map(plt.bar, "Variable","Count").add_legend()
for ax, (_, subdata) in zip(g.axes, to_plot.groupby('Variable_Grouping')):
    ax2=ax.twinx()
    subdata.plot(x='Variable',y='Percent', ax = ax2, legend=True, color='g', label = 'Percent')
    ax2.set_ylabel('Percent')
    ax2.grid(False)
for ax in g.axes.flatten():
    ax.tick_params(labelbottom=True, labelrotation = 90)
g.fig.suptitle('Analysis', fontsize=16, fontweight = 'demibold', y = 1.02)
g.fig.subplots_adjust(hspace=0.3, wspace=0.7, right = 0.9)
plt.show();

Now I am using matplotlib.backends.backend_pdf to plot the figures in pdf. I want 4 figures per page.

with PdfPages('Analysis.pdf') as pdf:


    g = sns.FacetGrid(to_plot, col='Variable_Grouping', col_wrap = 2, sharex=False, sharey = False, height = 5, aspect = 1, margin_titles=True)
    g=g.map(plt.bar, "Variable","Count").add_legend()


    for ax, (_, subdata) in zip(g.axes, to_plot.groupby('Variable_Grouping')):
        ax2=ax.twinx()
        subdata.plot(x='Variable',y='Percent', ax = ax2, legend=True, color='g', label = 'Percent')
        ax2.set_ylabel('Percent')
        ax2.grid(False)

    for ax in g.axes.flatten():
        ax.tick_params(labelbottom=True, labelrotation = 90)

    g.fig.suptitle('Analysis', fontsize=16, fontweight = 'demibold', y = 1.02)
    g.fig.subplots_adjust(hspace=0.3, wspace=0.7, right = 0.9)
    pdf.savefig(bbox_inches = 'tight')
    plt.close();

The code above gives me all the plots in a single page as expected.

def grouper(iterable, n, fillvalue=None):
    from itertools import zip_longest
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)

if len(to_plot['Variable_Grouping'].unique()) < 4:
    N_plots_per_page =len(to_plot['Variable_Grouping'].unique())
elif len(to_plot['Variable_Grouping'].unique()) >= 4:
    N_plots_per_page = 4

with PdfPages('Analysis.pdf') as pdf:
    for cols in grouper(to_plot['Variable_Grouping'].unique(), N_plots_per_page):
        g = sns.FacetGrid(to_plot, col='Variable_Grouping', col_wrap = 2, sharex=False, sharey = False, height = 5, aspect = 1, margin_titles=True)
        g=g.map(plt.bar, "Variable","Count").add_legend()
        for ax, (_, subdata) in zip(g.axes, to_plot.groupby('Variable_Grouping')):
            ax2=ax.twinx()
            subdata.plot(x='Variable',y='Percent', ax = ax2, legend=True, color='g', label = 'Percent')
            ax2.set_ylabel('Percent')
            ax2.grid(False)
        for ax in g.axes.flatten():
            ax.tick_params(labelbottom=True, labelrotation = 90)
        g.fig.suptitle('Analysis', fontsize=16, fontweight = 'demibold', y = 1.02)
        g.fig.subplots_adjust(hspace=0.3, wspace=0.7, right = 0.9)
        pdf.savefig(bbox_inches = 'tight')
        plt.show()
        plt.close();

In the code above I have tried using the grouper function (https://docs.python.org/3/library/itertools.html#itertools-recipes). This was also mentioned in Export huge seaborn chart into pdf with multiple pages and this repeats all the graphs in all the pages. I wanted to enquire if there is an easy way to get 4 graphs per page or what's wrong with the above code I used using the grouper function which is repeating the graphs. Any help will be appreciated. Thanks.

1

There are 1 best solutions below

1
On BEST ANSWER

The problem is, even you try to get the number of plots per page, you take the whole data inside the loop to plot with to_plot. You need to filter your to_plot with the cols you get by your grouper and your code will work.

The only changes I made is create the variable data_per_page and replace that with to_plot inside of sns.FaceGrid and in for ax, (_,subdata) in zip(...).

with PdfPages('Analysis.pdf') as pdf:
    for cols in grouper(to_plot['Variable_Grouping'].unique(), N_plots_per_page):
        
        data_per_page = to_plot.loc[to_plot['Variable_Grouping'].isin(cols)] 
        
        g = sns.FacetGrid(data_per_page, col='Variable_Grouping', col_wrap = 2, sharex=False, sharey = False, height = 5, aspect = 1, margin_titles=True)
        g=g.map(plt.bar, "Variable","Count").add_legend()
        for ax, (_,subdata) in zip(g.axes, data_per_page.groupby(['Variable_Grouping'])):

            ax2=ax.twinx()
            subdata.plot(x='Variable',y='Percent', ax = ax2, legend=True, color='g', label = 'Percent')
            ax2.set_ylabel('Percent')
            ax2.grid(False)
        for ax in g.axes.flatten():
            ax.tick_params(labelbottom=True, labelrotation = 90)
        g.fig.suptitle('Analysis', fontsize=16, fontweight = 'demibold', y = 1.02)
        g.fig.subplots_adjust(hspace=0.3, wspace=0.7, right = 0.9)
        pdf.savefig(bbox_inches='tight')
        plt.show()
        plt.close()

As a result I get a pdf with 2 pages, on the first there are 4 plots, and on the second only 1.