We have set up Zeebe and Elasticsearch with the official Helm chart “camunda-platform” 8.2.6 on Azure AKS, and we sporadically get following error:
message: Context ancestry chain length is abnormally long. This suggests an error in application code. Length exceeded: 1000
logger_name: io.grpc.Context
thread_name: pool-8-thread-1
stack_trace: java.lang.Exception: null
at io.grpc.Context.validateGeneration(Context.java:1124)
at io.grpc.Context.<init>(Context.java:202)
at io.grpc.Context.withValue(Context.java:345)
at io.opentelemetry.javaagent.shaded.instrumentation.grpc.v1_6.internal.ContextStorageBridge.doAttach(ContextStorageBridge.java:67)
at io.grpc.Context.attach(Context.java:427)
at io.grpc.internal.ManagedChannelImpl$ChannelStreamProvider$1RetryStream.newSubstream(ManagedChannelImpl.java:591)
at io.grpc.internal.RetriableStream.createSubstream(RetriableStream.java:237)
at io.grpc.internal.RetriableStream.start(RetriableStream.java:369)
at io.grpc.internal.ClientCallImpl.startInternal(ClientCallImpl.java:289)
at io.grpc.internal.ClientCallImpl.start(ClientCallImpl.java:191)
at io.grpc.stub.ClientCalls.startCall(ClientCalls.java:341)
at io.grpc.stub.ClientCalls.asyncUnaryRequestCall(ClientCalls.java:315)
at io.grpc.stub.ClientCalls.asyncUnaryRequestCall(ClientCalls.java:303)
at io.grpc.stub.ClientCalls.asyncServerStreamingCall(ClientCalls.java:89)
at io.camunda.zeebe.gateway.protocol.GatewayGrpc$GatewayStub.activateJobs(GatewayGrpc.java:912)
at io.camunda.zeebe.client.impl.worker.JobPoller.poll(JobPoller.java:103)
at io.camunda.zeebe.client.impl.worker.JobPoller.poll(JobPoller.java:92)
at io.camunda.zeebe.client.impl.worker.JobWorkerImpl.poll(JobWorkerImpl.java:172)
at io.camunda.zeebe.client.impl.worker.JobWorkerImpl.lambda$tryPoll$0(JobWorkerImpl.java:141)
at java.base/java.util.Optional.ifPresent(Optional.java:178)
at io.camunda.zeebe.client.impl.worker.JobWorkerImpl.tryPoll(JobWorkerImpl.java:138)
at io.camunda.zeebe.client.impl.worker.JobWorkerImpl.onScheduledPoll(JobWorkerImpl.java:128)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:539)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:833)
We use following configuration for the Helm chart:
global:
elasticsearch:
host: infra-camunda-elasticsearch
port: 9200
identity:
auth:
enabled: false
ingress:
enabled: false
zeebePort: 26500
connectors:
enabled: false
elasticsearch:
enabled: true
clusterName: infra-camunda
nodeGroup: elasticsearch
masterService: infra-camunda-elasticsearch
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nodepool
operator: In
values:
- default
priorityClassName: infra-base
replicas: 2
resources:
requests:
cpu: 500m
esConfig:
elasticsearch.yml: |
ingest.geoip.downloader.enabled: false
identity:
enabled: false
operate:
enabled: false
optimize:
enabled: false
postgresql:
enabled: false
prometheusServiceMonitor:
enabled: false
tasklist:
enabled: false
zeebe:
clusterSize: 3
partitionCount: 3
replicationFactor: 3
logLevel: INFO
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nodepool
operator: In
values:
- default
priorityClassName: infra-base
env:
- name: ZEEBE_BROKER_CLUSTER_MEMBERSHIP_PROBETIMEOUT
value: 500ms
zeebe-gateway:
replicas: 2
podLabels:
component: infra-camunda-zeebe-gateway
logLevel: INFO
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nodepool
operator: In
values:
- default
priorityClassName: infra-base
env:
- name: ZEEBE_GATEWAY_CLUSTER_MEMBERSHIP_PROBETIMEOUT
value: 500ms
As for the client we use Spring Zeebe 8.2.0 in a Spring Boot 3.0.6 application
<dependency>
<groupId>io.camunda</groupId>
<artifactId>spring-zeebe-starter</artifactId>
<version>8.2.0</version>
</dependency>
with following configuration
zeebe:
client:
broker:
gateway-address: infra-zeebe:26500
security:
plaintext: true
worker:
max-jobs-active: 32
threads: 1
Our original setup was different from the proposed installation in the Camunda Platform 8 installation, so we set it now up using the Helm chart provided by Camunda. We temporarily disabled the network policies in our AKS cluster to avoid problems related to blocked connections. Sometimes it seems as if the Zeebe clients would just keep working, but sometimes we have to restart them to get them processing again. Unfortunately the error message is to generic to really yield some insights to me and I cannot find anything related to this error and Zeebe online. So has anyone encountered the same problem in a cloud cluster setting?