I have this docker image:
# syntax = docker/dockerfile:1.2
FROM continuumio/miniconda3
# install os dependencies
RUN mkdir -p /usr/share/man/man1
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
curl \
python3-pip \
vim \
sudo \
default-jre \
git \
gcc \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# install python dependencies
RUN pip install openmim
RUN pip install torch
RUN mim install mmcv-full==1.7.0
RUN pip install mmpose==0.29.0
RUN pip install mmdet==2.27.0
RUN pip install torchserve
# prep torchserve
RUN mkdir -p /home/torchserve/model-store
RUN wget https://github.com/facebookresearch/AnimatedDrawings/releases/download/v0.0.1/drawn_humanoid_detector.mar -P /home/torchserve/model-store/
RUN wget https://github.com/facebookresearch/AnimatedDrawings/releases/download/v0.0.1/drawn_humanoid_pose_estimator.mar -P /home/torchserve/model-store/
COPY config.properties /home/torchserve/config.properties
# print the contents of /model-store
RUN ls /home/torchserve/model-store
# starting command
CMD /opt/conda/bin/torchserve --start --ts-config /home/torchserve/config.properties && sleep infinity
and in the same folder I have the following config.properties:
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
inference_address=http://0.0.0.0:8080
management_address=http://0.0.0.0:8081
metrics_address=http://0.0.0.0:8082
model_store=/home/torchserve/model-store
load_models=all
default_response_timeout=5000
it works perfectly fine locally but when I push it to gcloud run the following error occurs and the models do not run properly also /ping is returning healthy here is the error:
org.pytorch.serve.wlm.WorkerInitializationException: Backend worker startup time out.
at org.pytorch.serve.wlm.WorkerLifeCycle.startWorker ( org/pytorch.serve.wlm/WorkerLifeCycle.java:177 )
at org.pytorch.serve.wlm.WorkerThread.connect ( org/pytorch.serve.wlm/WorkerThread.java:339 )
at org.pytorch.serve.wlm.WorkerThread.run ( org/pytorch.serve.wlm/WorkerThread.java:183 )
at java.util.concurrent.ThreadPoolExecutor.runWorker ( java/util.concurrent/ThreadPoolExecutor.java:1128 )
at java.util.concurrent.ThreadPoolExecutor$Worker.run ( java/util.concurrent/ThreadPoolExecutor.java:628 )
at java.lang.Thread.run ( java/lang/Thread.java:829 )
what is the issue?
Here are the starter logs: Not sure what to look for and hope this isn't too small to look through