I am trying to get distributed cache for Keycloak working in Azure App Service Environment v3. There are 3 instances of my app service that are running the same KC container image with a shared PostgreSQL database. Have configured JDBC_PING for node discovery with the following configuration:
<?xml version="1.0" encoding="UTF-8"?>
<infinispan
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:infinispan:config:11.0 http://www.infinispan.org/schemas/infinispan-config-11.0.xsd"
xmlns="urn:infinispan:config:11.0">
<jgroups>
<stack name="jdbc-ping-tcp" extends="tcp">
<TCP external_addr="${env.WEBSITE_PRIVATE_IP}" bind_port="7600" />
<JDBC_PING connection_driver="org.postgresql.Driver"
connection_username="${env.KC_DB_USERNAME}" connection_password="${env.KC_DB_PASSWORD}"
connection_url="${env.KC_DB_URL}"
initialize_sql="CREATE SCHEMA IF NOT EXISTS ${env.KC_DB_SCHEMA:public}; CREATE TABLE IF NOT EXISTS ${env.KC_DB_SCHEMA:public}.JGROUPSPING (own_addr varchar(200) NOT NULL, cluster_name varchar(200) NOT NULL, bind_addr varchar(200) NOT NULL, updated timestamp default current_timestamp, ping_data BYTEA, constraint PK_JGROUPSPING PRIMARY KEY (own_addr, cluster_name));"
insert_single_sql="INSERT INTO ${env.KC_DB_SCHEMA:public}.JGROUPSPING (own_addr, cluster_name, bind_addr, updated, ping_data) values (?, ?, '${env.WEBSITE_PRIVATE_IP:127.0.0.1}', NOW(), ?);"
delete_single_sql="DELETE FROM ${env.KC_DB_SCHEMA:public}.JGROUPSPING WHERE own_addr=? AND cluster_name=?;"
select_all_pingdata_sql="SELECT ping_data, own_addr, cluster_name FROM ${env.KC_DB_SCHEMA:public}.JGROUPSPING WHERE cluster_name=?"
info_writer_sleep_time="500"
remove_all_data_on_view_change="true"
stack.combine="REPLACE"
stack.position="MPING" />
</stack>
</jgroups>
<cache-container name="keycloak">
<!-- custom stack must be referenced by name in the stack attribute of the transport element -->
<transport lock-timeout="60000" stack="jdbc-ping-tcp"/>
<local-cache name="realms">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<memory max-count="10000"/>
</local-cache>
<local-cache name="users">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<memory max-count="10000"/>
</local-cache>
<distributed-cache name="sessions" owners="2">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="authenticationSessions" owners="2">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="offlineSessions" owners="2">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="clientSessions" owners="2">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="offlineClientSessions" owners="2">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="loginFailures" owners="2">
<expiration lifespan="-1"/>
</distributed-cache>
<local-cache name="authorization">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<memory max-count="10000"/>
</local-cache>
<replicated-cache name="work">
<expiration lifespan="-1"/>
</replicated-cache>
<local-cache name="keys">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<expiration max-idle="3600000"/>
<memory max-count="1000"/>
</local-cache>
<distributed-cache name="actionTokens" owners="2">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<expiration max-idle="-1" lifespan="-1" interval="300000"/>
<memory max-count="-1"/>
</distributed-cache>
</cache-container>
</infinispan>
I can see that when the app service instances start, I only end up with a single record in the JGROUPSPING table. I can see that each node adds a record with the nodes IP address which is then replaced by the next node that starts up.
Keycloak login to master realm takes succeeds but then throws me out back to login screen. Other times, I get a "too many redirects" error in the browser.
I suspect this is an issue with the instance VM's not being able to communicate with each other on TCP/7600.
I can see errors such as the one below in the KC logs when debugging for infinispan and jgroups is enabled.
2023-06-16 23:17:44,457 DEBUG [org.jgroups.protocols.TCP] (TQ-Bundler-7,35a6a393111c-54805) JGRP000034: 35a6a393111c-54805: failure sending message to 959a8a153e47-58391: java.net.ConnectException: Connection refused (Connection refused)
At one point I had all 3 expected entries in JGROUPSPING table but don't really know why as no config changes were made.
Has anyone got this to work with Azure App Services in ASEv3 ?