How to get list of all variables in jinja 2 templates

47.5k Views Asked by At

I am trying to get list of all variables and blocks in a template. I don't want to create my own parser to find variables. I tried using following snippet.

from jinja2 import Environment, PackageLoader
env = Environment(loader=PackageLoader('gummi', 'templates'))
template = env.get_template('chat.html')

template.blocks is dict where keys are blocks, how can I get all variables inside the blocks ?

8

There are 8 best solutions below

6
On BEST ANSWER

Since no one has answered the question and I found the answer

from jinja2 import Environment, PackageLoader, meta
env = Environment(loader=PackageLoader('gummi', 'templates'))
template_source = env.loader.get_source(env, 'page_content.html')
parsed_content = env.parse(template_source)
meta.find_undeclared_variables(parsed_content)

This will yield list of undeclared variables since this is not executed at run time, it will yield list of all variables.

Note: This will yield html files which are included using include and extends.

1
On

for ones want to find out all variable not just the variable need to be set from outside. one can do following:

from jinja2 import Environment, meta
from jinja2.nodes import Name
content='''
{% set y = 1%}
show y: {{ y }}
{% if x|default(2) == 1%}
{% endif %}
{{ z|default(3) }}
{{ w }}
{% set y2 = y3|default(6)%}
we do not use y2 at all
{% for _i in mylist %}
item is {{ _i }}
{% endfor %}
{{ dict1.x }}
{{ dict1.x2.y.z }}
'''
env = Environment()
parsed_content = env.parse(content)
print("undeclared=", meta.find_undeclared_variables(parsed_content))

vars = set()
vars_internal = set()
for name in parsed_content.find_all(Name):
    if name.ctx == 'load':
        vars.add(name.name)
    else:
        vars_internal.add(name.name)
print(f"{vars=}\n{vars_internal=}")
# we can see vars also contain the value that has been used
# y2 does not show up in vars because it never gets used
print(f"vars assigned but never used: {vars_internal.difference(vars)}")
print(f"vars need to be assigned from outside: {vars.difference(vars_internal)}")
print(f"vars only used internally: {vars_internal}")

output is:

undeclared= {'dict1', 'x', 'z', 'mylist', 'y3', 'w'}
vars={'_i', 'dict1', 'x', 'z', 'mylist', 'y', 'y3', 'w'}
vars_internal={'_i', 'y', 'y2'}
vars assigned but never used: {'y2'}
vars need to be assigned from outside: {'dict1', 'x', 'z', 'mylist', 'y3', 'w'}
vars only used internally: {'_i', 'y', 'y2'}

Someone is asking for the fields, it can also easily archived:

# to obtain the nested field:
def recurse_getattr(g: Getattr):
    if isinstance(g.node, Getattr):
        return recurse_getattr(g.node) + "." + g.attr
    return g.node.name + "." + g.attr

all_fields = set()
for g in parsed_content.find_all(Getattr):
    all_fields.add(recurse_getattr(g))
print(all_fields)
# will output {'dict1.x2.y', 'dict1.x', 'dict1.x2', 'dict1.x2.y.z'}
3
On

For me jinja2.meta.find_undeclared_variables(parsed_content) is not a good fit because it does not provide nested variables.

jinja2schema tool was kinda ok for simple scenarios but with all the loops and other jinja2 dark powers it was failing with errors.

I have played around with jinja2 data structures and was able to get all variables including nested ones. For my use case this was enough. Maybe this will also help for somebody else :)

Here is the code:

from jinja2 import Environment, FileSystemLoader, nodes


def get_variables(path, filename):
    template_variables = set()
    env = Environment(loader=FileSystemLoader(searchpath=path))
    template_source = env.loader.get_source(env, filename)[0]
    parsed_content = env.parse(template_source)
    if parsed_content.body and hasattr(parsed_content.body[0], 'nodes'):
        for variable in parsed_content.body[0].nodes:
            if type(variable) is nodes.Name or type(variable) is nodes.Getattr:
                parsed_variable = parse_jinja_variable(variable)
                if parsed_variable:
                    template_variables.add(parsed_variable)

    return template_variables


def parse_jinja_variable(variable, suffix=''):
    if type(variable) is nodes.Name:
        variable_key = join_keys(variable.name, suffix)
        return variable_key
    elif type(variable) is nodes.Getattr:
        return parse_jinja_variable(variable.node, join_keys(variable.attr, suffix))


def join_keys(parent_key, child_key):
    key = child_key if child_key else parent_key
    if parent_key and child_key:
        key = parent_key + '.' + key
    return key


if __name__ == "__main__":
    variable_keys = get_variables({replace_with_your_template directory}, {replace_with_your_template_file})
    print(*variable_keys, sep='\n')


2
On

Why not regex?

If find it a lot easier to use regex:

import re
with open('templates/templatename.html') as f:
    variables = re.findall("\{\{\s(.*?)\s\}\}", f.read())
2
On

Based on @Kracekumar's answer, but for the simplest use-case of just extracting tokens from a template passed as a string argument with no loading semantics or filter overrides:

env = jinja2.Environment()
parsed_content = env.parse(template_source)
tokens = jinja2.meta.find_undeclared_variables(parsed_content)

tokens will be a set.

2
On

I had the same need and I've written a tool called jinja2schema. It provides a heuristic algorithm for inferring types from Jinja2 templates and can also be used for getting a list of all template variables, including nested ones.

Here is a short example of doing that:

>>> import jinja2
>>> import jinja2schema
>>>
>>> template = '''
... {{ x }}
... {% for y in ys %}
...     {{ y.nested_field_1 }}
...     {{ y.nested_field_2 }}
... {% endfor %}
... '''
>>> variables = jinja2schema.infer(template)
>>>
>>> variables
{'x': <scalar>,
 'ys': [{'nested_field_1': <scalar>, 'nested_field_2': <scalar>}]}
>>>
>>> variables.keys()
['x', 'ys']
>>> variables['ys'].item.keys()
['nested_field_2', 'nested_field_1']
0
On

Though the top answer attaches a note supporting extends, actually, it's not. The env.loader.get_source can only parse the current template.

Here's an implementation for the advice provided by the Violet Shreve's comment in the top answer.


import re, os
from jinja2 import Environment, meta, FileSystemLoader

template_path = 'the path to your templates'
def parse_template_variables(template_name):
    res = set()
    env = Environment(loader=FileSystemLoader(template_path))
    # considering the including and the extending
    stack = [template_name]
    while len(stack) > 0:
        ref_template = stack.pop()
        source, filename, uptodate = env.loader.get_source(env, ref_template)
        parsed_content = env.parse(source)
        res = res.union(meta.find_undeclared_variables(parsed_content))
        # Recursively check if there are other dependencies
        for ref_temp_name in meta.find_referenced_templates(parsed_content):
        stack.append(ref_temp_name)
    return res
0
On

For my pelican theme, i have created a tools for analyse all jinja variables in my templates files.

I share my code

This script generate a sample configuration from all variables exists in template files and get a variables from my official pelicanconf.py

The function that extract all variables from template file

def get_variables(filename):
    env = Environment(loader=FileSystemLoader('templates'))
    template_source = env.loader.get_source(env, filename)[0]
    parsed_content = env.parse(template_source)

The complete script

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# use:
# generate_pelicanconf-sample.py my_official_blog/pelicanconf.py

import sys
import imp
import os

from jinja2 import Environment, FileSystemLoader, meta


# Search all template files
def list_html_templates():
    dirList = os.listdir('templates')

    return dirList


# get all variable in template file
def get_variables(filename):
    env = Environment(loader=FileSystemLoader('templates'))
    template_source = env.loader.get_source(env, filename)[0]
    parsed_content = env.parse(template_source)

    return meta.find_undeclared_variables(parsed_content)


# Check if the pelicanconf.py is in param
if len(sys.argv) != 2:
    print("Please indicate the pelicanconf.py file")
    sys.exit()

# Get all vars from templates files
all_vars = set()
files = list_html_templates()
for fname in files:
    variables = get_variables(fname)
    for var in variables:
        if var.isupper():
            all_vars.add(var)

m = imp.load_source('pelicanconf', sys.argv[1])

# Show pelicanconf.py vars content
for var in all_vars:
    varname = 'm.%s' % var
    if var in m.__dict__:
        print ("%s = %s" % (var, repr(m.__dict__[var])))


    return meta.find_undeclared_variables(parsed_content)

The sample result of this program

LINKS = ((u'Home', u'/'), (u'archives', u'/archives.html'), (u'tags', u'/tags.html'), (u'A propos', u'http://bruno.adele.im'))
SITESUBTITLE = u'Une famille compl\xe8tement 633<'
DEFAULT_LANG = u'fr'
SITEURL = u'http://blog.jesuislibre.org'
AUTHOR = u'Bruno Adel\xe9'
SITENAME = u'Famille de geeks'
SOCIAL = ((u'adele', u'http://adele.im'), (u'feed', u'http://feeds.feedburner.com/FamilleDeGeek'), (u'twitter', u'http://twitter.com/jesuislibre.org'), (u'google+', u'https://plus.google.com/100723270029692582967'), (u'blog', u'http://blog.jesuislibre.org'), (u'facebook', u'http://www.facebook.com/bruno.adele'), (u'flickr', u'http://www.flickr.com/photos/b_adele'), (u'linkedin', u'http://fr.linkedin.com/in/brunoadele'))
FEED_DOMAIN = u'http://blog.jesuislibre.org'
FEED_ALL_ATOM = u'feed.atom'
DISQUS_SITENAME = u'blogdejesuislibreorg'
DEFAULT_PAGINATION = 10
GITHUB_BLOG_SITE = u'https://github.com/badele/blog.jesuislibre.org'

For more détail of this script see https://github.com/badele/pelican-theme-jesuislibre