Generate PDF files using transforms api in code repositories and save to foundry

61 Views Asked by At

I want to create multiple tables after data manipulation of multiple foundry datasets. These tables are to be added to a pdf file which I want to create using the reportlab python library.

How do I utilise the transform api to save the pdf file. Would be appreciated if anyone can help work around this.

1

There are 1 best solutions below

0
ZettaP On

You need a code snippet to read/write files to the filesystem of your output dataset.

For instance:

from transforms.api import transform, Input, Output

@transform(
    hair_eye_color=Input('/examples/students_hair_eye_color'),
    processed=Output('/examples/hair_eye_color_processed')
)
def filter_eye_color(hair_eye_color, processed):
    with processed.filesystem().open('myfile.ext', 'wb') as f:
        # Write something in the filsystem, like:
        pickle.dump(model, f)

In the specific case of reportlab, I did not test the below code, but that should work/be very close to:

from transforms.api import transform, Input, Output
from reportlab.platypus import Table, TableStyle, SimpleDocTemplate, Image, Paragraph
from reportlab.lib.pagesizes import A3, landscape
from reportlab.lib import colors
from reportlab.pdfgen import canvas
from reportlab.lib.styles import getSampleStyleSheet
import pyspark.sql.functions as F

@transform(
    out=Output("/path/dataset_of_pdfs"),
    data=Input("path/data_dataset"),
    pictures=Input("path/pictures_dataset"),
)
def generation(out, data, pictures):
    # Collect the data from input dataset
    data_df = data.dataframe()
    data_rows = map(lambda row: row.asDict(), data_df.collect())         
    data_parsed = [list(curr_row.values()) for curr_row in data_rows]

    # Generate a title
    styles = getSampleStyleSheet()
    current_title = Paragraph("Title Example", styles['Heading1'])

    # Create a table from a dataset (direct "table" printing)
    table_1 = Table(data_parsed, hAlign='CENTER')
    table_1.setStyle(
        TableStyle([
            ('BACKGROUND', (0, 0), (-1, -1), colors.green),
            ('TEXTCOLOR', (0, 0), (1, -1), colors.red),
            ('BOX',(0,0),(-1,-1), 0.25, colors.red)
        ]))

    picture = None
    # Collect a picture from input dataset
    with pictures.filesystem().open("file1.gif", "rb") as img:
        picture = Image(img, hAlign='RIGHT')
        picture.drawHeight = 50
        picture.drawWidth = 50

    # Generate the report
    curr_name = "TEST_REPORT"
    with out.filesystem().open(curr_name + '.pdf', 'wb') as f:
        pdf = PDFReportGenerator1(f)
        pdf.add_element_at_start(table_1)
        pdf.add_element_at_start(picture)
        pdf.add_element_at_start(current_title)
        pdf.build()