PyJWT : Exception "Invalid payload string: 'utf-8' codec can't decode" while decoding

2.4k Views Asked by At

I'm using the PyJWT library to do some decoding of some JWTs in Python 3.9.10 with PyJwt version 2.3.0

I have my JWT as a standard string, which I pass to PyJwt in the following way:

def decode_tenduke_jwt(string):
    header = jwt.get_unverified_header(string)
    jwt_options = {
        'verify_signature': False,
        'verify_exp': True,
        'verify_nbf': False,
        'verify_iat': True,
        'verify_aud': False
    }
    return jwt.decode(string, TENDUKE_JWT_PUBLIC_KEY_DEV, algorithms=[header['alg']], options=jwt_options)

The algorithm in use in this particular instance is "RS256". TENDUKE_JWT_PUBLIC_KEY_DEV is our public key which takes the form:

TENDUKE_JWT_PUBLIC_KEY_DEV = """-----BEGIN PUBLIC KEY-----
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
etc
(9x more lines of the same length)
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx==
-----END PUBLIC KEY-----"""

Firstly, the key appears to have issues when verifying the signature. But this is only test code so for now it doesn't matter too much, and can be disabled in the options above.

The real problem is the following exception printout I get when running this:

self = <jwt.api_jwt.PyJWT object at 0x1097d20d0>
jwt = 'eyJhbGciOiJSUzI1NiJ9.eyJ2ZXIiOiIyMDIyLjExMjQiLCJsaWMiOiJmYzZkNDRmMi0yZTc0LTQ2NjEtYjQ2MC1hODE3NDIxMjIzYjMiLCJpYmIiOjE2...NOzKdqqXkDEQtMGcl79WKlDvUmhUIgRSybOLiWyPQLpEIqQDns4Y4HWKSuYyXFVVvzBqUP82a-FxVt5a1lIzIAeGEJlcelgGQzVXjJ-vJMq9bemqYaRJt8'
key = '-----BEGIN PUBLIC KEY-----\nxxxxxxxxxqhkiG9w0xxxxxxxxxxxxxxxxxxxCCgKCAgEAzfbhj6n8ab58JF/tkjbr\nQR/6SSA1QsUoaniLA2jUOY5...fGVMbDyK7pYJWmoy5grQxFL4xxxxxxxxxxxxxxxxxxxxxxxx099ZF0rR6\naLX2WqO7+dJEvrxxxxxxxxxxxwEAAQ==\n-----END PUBLIC KEY-----'
algorithms = ['RS256'], options = {'verify_aud': False, 'verify_exp': True, 'verify_iat': True, 'verify_iss': False, ...}, kwargs = {}
decoded = {'header': {'alg': 'RS256'}, 'payload': b'{"ver":"2022.1124","lic":"fc6d44f2-2e74-4661-b460-a817421223b3","ibb":164747...UV\xfc\xc1\xa9C\xfc\xd9\xaf\x85\xc5[ykYH\xcc\x80\x1e\x18Beq\xe9`\x19\x0c\xd5^2~\xbc\x93*\xf5\xb7\xa6\xa9\x86\x91&\xdf'}

    def decode_complete(
        self,
        jwt: str,
        key: str = "",
        algorithms: List[str] = None,
        options: Dict = None,
        **kwargs,
    ) -> Dict[str, Any]:
        if options is None:
            options = {"verify_signature": True}
        else:
            options.setdefault("verify_signature", True)
    
        if not options["verify_signature"]:
            options.setdefault("verify_exp", False)
            options.setdefault("verify_nbf", False)
            options.setdefault("verify_iat", False)
            options.setdefault("verify_aud", False)
            options.setdefault("verify_iss", False)
    
        if options["verify_signature"] and not algorithms:
            raise DecodeError(
                'It is required that you pass in a value for the "algorithms" argument when calling decode().'
            )
    
        decoded = api_jws.decode_complete(
            jwt,
            key=key,
            algorithms=algorithms,
            options=options,
            **kwargs,
        )
    
        try:
            print("JOSH - decoded:")
            print(decoded)
>           payload = json.loads(decoded["payload"])

python_local_env/lib/python3.9/site-packages/jwt/api_jwt.py:101: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

s = b'{"ver":"2022.1124","lic":"fc6d44f2-2e74-4661-b460-a817421223b3","ibb":1647475200,"iss":"FoundryDev","hw":"88665a370a...88\xe8\xc4\xd8\xd0\xdc\xd8\xc8\xd4\xd0\xe0\xcc\xb0\x89\xa5\x85\xd0\x88\xe8\xc4\xd8\xd0\xdc\xd4\xcc\xe4\xc0\xe0\xcd\xf4'
cls = None, object_hook = None, parse_float = None, parse_int = None, parse_constant = None, object_pairs_hook = None, kw = {}

    def loads(s, *, cls=None, object_hook=None, parse_float=None,
            parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
        """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
        containing a JSON document) to a Python object.
    
        ``object_hook`` is an optional function that will be called with the
        result of any object literal decode (a ``dict``). The return value of
        ``object_hook`` will be used instead of the ``dict``. This feature
        can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    
        ``object_pairs_hook`` is an optional function that will be called with the
        result of any object literal decoded with an ordered list of pairs.  The
        return value of ``object_pairs_hook`` will be used instead of the ``dict``.
        This feature can be used to implement custom decoders.  If ``object_hook``
        is also defined, the ``object_pairs_hook`` takes priority.
    
        ``parse_float``, if specified, will be called with the string
        of every JSON float to be decoded. By default this is equivalent to
        float(num_str). This can be used to use another datatype or parser
        for JSON floats (e.g. decimal.Decimal).
    
        ``parse_int``, if specified, will be called with the string
        of every JSON int to be decoded. By default this is equivalent to
        int(num_str). This can be used to use another datatype or parser
        for JSON integers (e.g. float).
    
        ``parse_constant``, if specified, will be called with one of the
        following strings: -Infinity, Infinity, NaN.
        This can be used to raise an exception if invalid JSON numbers
        are encountered.
    
        To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
        kwarg; otherwise ``JSONDecoder`` is used.
        """
        if isinstance(s, str):
            if s.startswith('\ufeff'):
                raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
                                      s, 0)
        else:
            if not isinstance(s, (bytes, bytearray)):
                raise TypeError(f'the JSON object must be str, bytes or bytearray, '
                                f'not {s.__class__.__name__}')
>           s = s.decode(detect_encoding(s), 'surrogatepass')
E           UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe6 in position 315: invalid continuation byte

/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/__init__.py:341: UnicodeDecodeError

During handling of the above exception, another exception occurred:

name = '../nuke2/build/Nuke.app/Contents/Macos/Nuke13.2'

    def test_playground(name):
    
        run = True
        if run:
    
            # Launches normal Nuke 5 times with no automatic crash - Asserts: Token is removed from token dir,
            # if timeout is in output, if licence error is in output
            u.remove_output()
            u.move_pref_files(True)
            u.SLEEP_TIME = 600
    
            # with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            #     nuke_runs = {executor.submit(u.run_nuke, build=name, crash=b_state): b_state for b_state in u.BOOL_STATE}
    
            token = u.get_token()[0]
            # print("printing token:")
            # print(token)
    
            # sysadmin_token = u.get_token()[0]
            # id = u.get_id(tenduke.list_user_info(token))
            # print(tenduke.list_user_info(sysadmin_token))
    
            # print("printing user info:")
            # print(tenduke.list_user_info(token))
            # print("printing sysadmin user info:")
            # print(tenduke.list_user_info(sysadmin_token))
    
            checkouts = tenduke.list_my_checkouts(token)
    
            # print("printing user checkouts via sysadmin token:")
            # print(checkouts)
    
            num_checkouts = u.count_leases(checkouts)
            print("printing INITIAL number of checkouts:")
            print(num_checkouts)
    
            response = tenduke.checkout("nuke_i", "2022.1124", "904ddda3-de21-4bea-b04e-4d6a06cbae1e", "88665a370aab", token)
            print("printing response of checkouts after checkout:")
            print(response)
    
            num_checkouts = u.count_leases(checkouts)
            print("printing number of checkouts after checkout:")
            print(num_checkouts)
    
>           jwt = u.decode_tenduke_jwt(response)

tests/test_tenduke_calls_DELETEME.py:51: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
scripts/utils.py:286: in decode_tenduke_jwt
    return jwt.decode(string, TENDUKE_JWT_PUBLIC_KEY_DEV, algorithms=[header['alg']], options=jwt_options)
python_local_env/lib/python3.9/site-packages/jwt/api_jwt.py:121: in decode
    decoded = self.decode_complete(jwt, key, algorithms, options, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <jwt.api_jwt.PyJWT object at 0x1097d20d0>
jwt = 'eyJhbGciOiJSUzI1NiJ9.eyJ2ZXIiOiIyMDIyLjExMjQiLCJsaWMiOiJmYzZkNDRmMi0yZTc0LTQ2NjEtYjQ2MC1hODE3NDIxMjIzYjMiLCJpYmIiOjE2...NOzKdqqXkDEQtMGcl79WKlDvUmhUIgRSybOLiWyPQLpEIqQDns4Y4HWKSuYyXFVVvzBqUP82a-FxVt5a1lIzIAeGEJlcelgGQzVXjJ-vJMq9bemqYaRJt8'
key = '-----BEGIN PUBLIC KEY-----\nxxxxxxxxxqhkiG9w0xxxxxxxxxxxxxxxxxxxCCgKCAgEAzfbhj6n8ab58JF/tkjbr\nQR/6SSA1QsUoaniLA2jUOY5...fGVMbDyK7pYJWmoy5grQxFL4xxxxxxxxxxxxxxxxxxxxxxxx099ZF0rR6\naLX2WqO7+dJEvrxxxxxxxxxxxwEAAQ==\n-----END PUBLIC KEY-----'
algorithms = ['RS256'], options = {'verify_aud': False, 'verify_exp': True, 'verify_iat': True, 'verify_iss': False, ...}, kwargs = {}
decoded = {'header': {'alg': 'RS256'}, 'payload': b'{"ver":"2022.1124","lic":"fc6d44f2-2e74-4661-b460-a817421223b3","ibb":164747...UV\xfc\xc1\xa9C\xfc\xd9\xaf\x85\xc5[ykYH\xcc\x80\x1e\x18Beq\xe9`\x19\x0c\xd5^2~\xbc\x93*\xf5\xb7\xa6\xa9\x86\x91&\xdf'}

    def decode_complete(
        self,
        jwt: str,
        key: str = "",
        algorithms: List[str] = None,
        options: Dict = None,
        **kwargs,
    ) -> Dict[str, Any]:
        if options is None:
            options = {"verify_signature": True}
        else:
            options.setdefault("verify_signature", True)
    
        if not options["verify_signature"]:
            options.setdefault("verify_exp", False)
            options.setdefault("verify_nbf", False)
            options.setdefault("verify_iat", False)
            options.setdefault("verify_aud", False)
            options.setdefault("verify_iss", False)
    
        if options["verify_signature"] and not algorithms:
            raise DecodeError(
                'It is required that you pass in a value for the "algorithms" argument when calling decode().'
            )
    
        decoded = api_jws.decode_complete(
            jwt,
            key=key,
            algorithms=algorithms,
            options=options,
            **kwargs,
        )
    
        try:
            # JOSH - added comment here in the PyJWT module code, REMOVE ME
            print("JOSH - decoded:")
            print(decoded)
            payload = json.loads(decoded["payload"])
        except ValueError as e:
>           raise DecodeError("Invalid payload string: %s" % e)
E           jwt.exceptions.DecodeError: Invalid payload string: 'utf-8' codec can't decode byte 0xe6 in position 315: invalid continuation byte

python_local_env/lib/python3.9/site-packages/jwt/api_jwt.py:103: DecodeError

As you can see I've added a printout in the PyJWT module code to get the decoded variable:

JOSH - decoded:
{'payload': b'{"ver":"2022.1124","lic":"fc6d44f2-2e74-4661-b460-a817421223b3","ibb":1647475200,"iss":"FoundryDev","hw":"88665a370aab","valid":"true","feature":"nuke_i","user_id":"0ba15319-fa60-43a3-96b5-134e6f45263a","exp":1647542683,"nuke_i":true,"iat":1647539083,"jti":"a0fb734d-450c-469c-ae82-c7075e38cbbe","rfr":1647539683}\x00B\xe6\xca\xea\xa5\xa5\x04&\x15\x19\x1cWD\(etc etc etc)xd8\xfe\x05KE\xc0?cm\xe23S\x82\xb6S+.(\xb2h\x82\xdf\xfa\x91\xca\x17\x04\xb7\xb9\xc8\xe2\x10\x(etc etc etc)c\xe4\xc0\xe0\xcd\xf4', 'header': {'alg': 'RS256'}, 'signature': b'\xa0\xd1pt\xb1\(etc etc etc)\x91&\xdf'}

As you can see, internally it appears to have decided to store the payload as bytes? This presents a problem with the json.loads() call. Am I doing something wrong in the original two line function which could cause this, or should I be doing something there to prevent this? Thanks!

0

There are 0 best solutions below