Sagemaker pytorch inference stops at model call on gpu

976 Views Asked by At

I deployed a pytorch model using sagemaker and can successfully query it on a CPU. Deploying it on a GPU leads to a InternalServerError client-side though. Checking the CloudWatch Logs shows that the request is received, preprocessing finishes and the call to the model is started. I can also see log from the metric collector about the prediction time. At that point there are no further logs though. The print statement I put right after the model call is never reached.

It is possible that there is an error happening which doesn't make it to CloudWatch. I have noticed that sagemaker seems to not show stack traces fully. Unfortunately I have already set the log_level to DEBUG without success.

I'm running on a sagemaker docker container - pytorch-inference:1.10-gpu-py38 on a ml.g4dn.xlarge instance. The model itself is compiled using torchscript.trace. I am using a custom transform function which you can see below as well as the CloudWatch Logs (the log continues as the client retries 4x).

If anyone has any idea what is happening here it would be very much appreciated!

import io
import torch
import os, sys
import json
import logging
from PIL import Image

from sagemaker_inference import (
    content_types,
    decoder,
    encoder,
    errors,
    utils,
)
from MyDetrFeatureExtractor import MyDetrFeatureExtractor


INFERENCE_ACCELERATOR_PRESENT_ENV = "SAGEMAKER_INFERENCE_ACCELERATOR_PRESENT"
IMG_WIDTH = 800
IMG_HEIGHT = 1131
MODEL_FILE = "model.pt"
THRESHOLD = 0.2
feature_extractor = MyDetrFeatureExtractor.from_pretrained(
            "facebook/detr-resnet-50", size=(IMG_WIDTH, IMG_HEIGHT))
index_to_name = json.load(open('/opt/ml/model/code/id2label.json', 'r'))

logger = logging.getLogger("sagemaker-inference")
# logger.addHandler(logging.StreamHandler(sys.stdout))


def model_fn(model_dir):
    logger.info(f"Trying to load model from {model_dir}/{MODEL_FILE}.")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = torch.jit.load(f"{model_dir}/{MODEL_FILE}", map_location=torch.device(device))
    model = model.to(device)
    return model


def preprocess(images):
    logger.info("Preprocessing image...")
    try:
        encoding = feature_extractor(images=images, return_tensors="pt")
        pixel_values = encoding["pixel_values"]
    except Exception as e:
        logger.error("Preprocessing Failed.")
        logger.error(e)
    return pixel_values


def load_fn(input_data, content_type):
    """A default input_fn that can handle JSON, CSV and NPZ formats.
    Args:
        input_data: the request payload serialized in the content_type format
        content_type: the request content_type
    Returns: input_data deserialized into torch.FloatTensor or torch.cuda.FloatTensor,
        depending if cuda is available.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    if content_type == "application/x-image":
        if isinstance(input_data, str):
            # if the image is a string of bytesarray.
            print("Found string of bytesarray. Translating to Image.")
            image = base64.b64decode(input_data)
        elif isinstance(input_data, (bytearray, bytes)):
            # If the image is sent as bytesarray
            print("Found bytesarray. Translating to Image.")
            image = Image.open(io.BytesIO(input_data))
    else:
        err_msg = f"Type [{content_type}] not support this type yet"
        logger.error(err_msg)
        raise ValueError(err_msg)

    # image = Image.from_array(np_array)
    size = image.size
    image_sizes_orig = [[size[1], size[0]]]
    logger.info(f"Image of size {size} loaded. Start Preprocessing.")
    tensor = preprocess(image)
    return tensor.to(device), torch.tensor(image_sizes_orig)


def inference_fn(data, model):
    """A default predict_fn for PyTorch. Calls a model on data deserialized in input_fn.
    Runs prediction on GPU if cuda is available.
    Args:
        data: input data (torch.Tensor) for prediction deserialized by input_fn
        model: PyTorch model loaded in memory by model_fn
    Returns: a prediction
    """
    with torch.no_grad():
        if os.getenv(INFERENCE_ACCELERATOR_PRESENT_ENV) == "true":
            device = torch.device("cpu")
            model = model.to(device)
            input_data = data.to(device)
            model.eval()
            with torch.jit.optimized_execution(True, {"target_device": "eia:0"}):
                output = model(input_data)
        else:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            logger.info(f"Running predictions on {device}.")
            model = model.to(device)
            input_data = data.to(device)
            model.eval()
            logger.info("Compute predictions.")
            output = model(input_data)
            logger.info("Finished actual inference")

    return output


def postprocess(output, img_sizes_orig):
    logger.info("Postprocessing image...")
    try:
        results_all = feature_extractor.post_process(output, img_sizes_orig, use_dict=False)
        results = []
        for res_per_img in results_all:
            scores_per_img = res_per_img['scores'].detach().numpy()
            # keep only predictions with confidence >= threshold
            keep = scores_per_img > THRESHOLD
            labels_per_img = list(map(
                index_to_name.get, 
                res_per_img['labels'][keep].detach().numpy().astype(str)
            ))
            bboxes_per_img = res_per_img['boxes'][keep].detach().numpy()
            scores_per_img = scores_per_img[keep]
            out = [{
                'bbox': list(map(int, bbox)),
                'score': score.astype(float),
                'label': label
            } for score, label, bbox in 
            zip(scores_per_img, labels_per_img, bboxes_per_img)]
            logger.info(f"Appending {out}.")
            results.append(out)
    except Exception as e:
        logger.error("Postprocessing Failed.")
        logger.error(e)
    
    return results


def create_output(prediction, accept):
        """A default output_fn for PyTorch. Serializes predictions from predict_fn to JSON, CSV or NPY format.
        Args:
            prediction: a prediction result from predict_fn
            accept: type which the output data needs to be serialized
        Returns: output data serialized
        """
        if type(prediction) == torch.Tensor:
            prediction = prediction.detach().cpu().numpy().tolist()

        for content_type in utils.parse_accept(accept):
            if content_type in encoder.SUPPORTED_CONTENT_TYPES:
                encoded_prediction = encoder.encode(prediction, content_type)
                if content_type == content_types.CSV:
                    encoded_prediction = encoded_prediction.encode("utf-8")
                if content_type == content_types.JSON:
                    encoded_prediction = encoded_prediction.encode("utf-8")
                return encoded_prediction, accept

        raise errors.UnsupportedFormatError(accept)



def transform_fn(model, request_body, content_type, accept_type):
    logger.info("Received Request.")
    images, image_sizes = load_fn(request_body, content_type)
    logger.info("Starting Inference.")
    output = inference_fn(images, model)
    logger.info("Postprocessing.")
    results = postprocess(output, image_sizes)
    logger.info(results)
    return create_output(results, accept_type)

and the logs...

Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from -r /opt/ml/model/code/requirements.txt (line 1)) (1.22.2)
Requirement already satisfied: Pillow in /opt/conda/lib/python3.8/site-packages (from -r /opt/ml/model/code/requirements.txt (line 2)) (9.1.1)
Collecting nvgpu
  Downloading nvgpu-0.9.0-py2.py3-none-any.whl (9.4 kB)
Collecting transformers==4.17
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.8/3.8 MB 50.7 MB/s eta 0:00:00
Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.8/site-packages (from transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (20.4)
Collecting regex!=2019.12.17
  Downloading regex-2022.7.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (765 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 765.0/765.0 kB 23.8 MB/s eta 0:00:00
Collecting filelock
  Downloading filelock-3.7.1-py3-none-any.whl (10 kB)
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.5/101.5 kB 25.5 MB/s eta 0:00:00
Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.8/site-packages (from transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (4.64.0)
Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.8/site-packages (from transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (5.4.1)
Collecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.12.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 111.5 MB/s eta 0:00:00
Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (2.27.1)
Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 880.6/880.6 kB 90.0 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pynvml
  Downloading pynvml-11.4.1-py3-none-any.whl (46 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 47.0/47.0 kB 15.6 MB/s eta 0:00:00
Requirement already satisfied: psutil in /opt/conda/lib/python3.8/site-packages (from nvgpu->-r /opt/ml/model/code/requirements.txt (line 3)) (5.9.0)
Requirement already satisfied: pandas in /opt/conda/lib/python3.8/site-packages (from nvgpu->-r /opt/ml/model/code/requirements.txt (line 3)) (1.4.2)
Collecting flask-restful
  Downloading Flask_RESTful-0.3.9-py2.py3-none-any.whl (25 kB)
Collecting tabulate
  Downloading tabulate-0.8.10-py3-none-any.whl (29 kB)
Collecting termcolor
  Downloading termcolor-1.1.0.tar.gz (3.9 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting arrow
  Downloading arrow-1.2.2-py3-none-any.whl (64 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.0/64.0 kB 19.0 MB/s eta 0:00:00
Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from nvgpu->-r /opt/ml/model/code/requirements.txt (line 3)) (1.16.0)
Collecting flask
  Downloading Flask-2.1.3-py3-none-any.whl (95 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 95.6/95.6 kB 29.0 MB/s eta 0:00:00
Collecting ansi2html
  Downloading ansi2html-1.8.0-py3-none-any.whl (16 kB)
Collecting packaging>=20.0
  Downloading packaging-21.3-py3-none-any.whl (40 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 40.8/40.8 kB 13.5 MB/s eta 0:00:00
Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.1.0->transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (4.2.0)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging>=20.0->transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7.0 in /opt/conda/lib/python3.8/site-packages (from arrow->nvgpu->-r /opt/ml/model/code/requirements.txt (line 3)) (2.8.2)
Collecting itsdangerous>=2.0
  Downloading itsdangerous-2.1.2-py3-none-any.whl (15 kB)
Requirement already satisfied: click>=8.0 in /opt/conda/lib/python3.8/site-packages (from flask->nvgpu->-r /opt/ml/model/code/requirements.txt (line 3)) (8.1.3)
Collecting Jinja2>=3.0
  Downloading Jinja2-3.1.2-py3-none-any.whl (133 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 133.1/133.1 kB 37.2 MB/s eta 0:00:00
Collecting Werkzeug>=2.0
  Downloading Werkzeug-2.1.2-py3-none-any.whl (224 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 224.9/224.9 kB 50.7 MB/s eta 0:00:00
Collecting importlib-metadata>=3.6.0
  Downloading importlib_metadata-4.12.0-py3-none-any.whl (21 kB)
Requirement already satisfied: pytz in /opt/conda/lib/python3.8/site-packages (from flask-restful->nvgpu->-r /opt/ml/model/code/requirements.txt (line 3)) (2022.1)
Collecting aniso8601>=0.82
  Downloading aniso8601-9.0.1-py2.py3-none-any.whl (52 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 52.8/52.8 kB 17.9 MB/s eta 0:00:00
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (1.26.9)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (2022.5.18.1)
Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (3.3)
Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.8/site-packages (from requests->transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (2.0.12)
Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from sacremoses->transformers==4.17->-r /opt/ml/model/code/requirements.txt (line 4)) (1.1.0)
Collecting zipp>=0.5
  Downloading zipp-3.8.1-py3-none-any.whl (5.6 kB)
Collecting MarkupSafe>=2.0
  Downloading MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25 kB)
Building wheels for collected packages: sacremoses, termcolor
  Building wheel for sacremoses (setup.py): started
  Building wheel for sacremoses (setup.py): finished with status 'done'
  Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895241 sha256=a3bb167ffae5506dddf61987611fcdfc0b8204913917be57bf7567f41240501c
  Stored in directory: /root/.cache/pip/wheels/82/ab/9b/c15899bf659ba74f623ac776e861cf2eb8608c1825ddec66a4
  Building wheel for termcolor (setup.py): started
  Building wheel for termcolor (setup.py): finished with status 'done'
  Created wheel for termcolor: filename=termcolor-1.1.0-py3-none-any.whl size=4832 sha256=f2b732eca48c5b5b44b0b23a29ba7130b890cb8b7df31955e7d7f34c7caeeb16
  Stored in directory: /root/.cache/pip/wheels/a0/16/9c/5473df82468f958445479c59e784896fa24f4a5fc024b0f501
Successfully built sacremoses termcolor
Installing collected packages: tokenizers, termcolor, aniso8601, zipp, Werkzeug, tabulate, regex, pynvml, packaging, MarkupSafe, itsdangerous, filelock, ansi2html, sacremoses, Jinja2, importlib-metadata, huggingface-hub, arrow, transformers, flask, flask-restful, nvgpu
  Attempting uninstall: packaging
    Found existing installation: packaging 20.4
    Uninstalling packaging-20.4:
      Successfully uninstalled packaging-20.4
Successfully installed Jinja2-3.1.2 MarkupSafe-2.1.1 Werkzeug-2.1.2 aniso8601-9.0.1 ansi2html-1.8.0 arrow-1.2.2 filelock-3.7.1 flask-2.1.3 flask-restful-0.3.9 huggingface-hub-0.8.1 importlib-metadata-4.12.0 itsdangerous-2.1.2 nvgpu-0.9.0 packaging-21.3 pynvml-11.4.1 regex-2022.7.9 sacremoses-0.0.53 tabulate-0.8.10 termcolor-1.1.0 tokenizers-0.12.1 transformers-4.17.0 zipp-3.8.1
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
WARNING: There was an error checking the latest version of pip.
WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance.
2022-07-22T11:10:02,627 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Initializing plugins manager...
2022-07-22T11:10:02,696 [INFO ] main org.pytorch.serve.ModelServer - 
Torchserve version: 0.5.3
TS Home: /opt/conda/lib/python3.8/site-packages
Current directory: /
Temp directory: /home/model-server/tmp
Number of GPUs: 1
Number of CPUs: 1
Max heap size: 3234 M
Python executable: /opt/conda/bin/python3.8
Config file: /etc/sagemaker-ts.properties
Inference address: http://0.0.0.0:8080
Management address: http://0.0.0.0:8080
Metrics address: http://127.0.0.1:8082
Model Store: /.sagemaker/ts/models
Initial Models: model=/opt/ml/model
Log dir: /logs
Metrics dir: /logs
Netty threads: 0
Netty client threads: 0
Default workers per model: 1
Blacklist Regex: N/A
Maximum Response Size: 6553500
Maximum Request Size: 6553500
Limit Maximum Image Pixels: true
Prefer direct buffer: false
Allowed Urls: [file://.*|http(s)?://.*]
Custom python dependency for model allowed: false
Metrics report format: prometheus
Enable metrics API: true
Workflow Store: /.sagemaker/ts/models
Model config: 
{
    "model": {
        "1.0": {
            "defaultVersion": true,
            "marName": "model.mar",
            "minWorkers": 1,
            "maxWorkers": 1,
            "batchSize": 1,
            "maxBatchDelay": 10000,
            "responseTimeout": 60
        }
    }
}
2022-07-22T11:10:02,703 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager -  Loading snapshot serializer plugin...
2022-07-22T11:10:02,706 [INFO ] main org.pytorch.serve.ModelServer - Loading initial models: /opt/ml/model
2022-07-22T11:10:02,709 [WARN ] main org.pytorch.serve.archive.model.ModelArchive - Model archive version is not defined. Please upgrade to torch-model-archiver 0.2.0 or higher
2022-07-22T11:10:02,710 [WARN ] main org.pytorch.serve.archive.model.ModelArchive - Model archive createdOn is not defined. Please upgrade to torch-model-archiver 0.2.0 or higher
2022-07-22T11:10:02,712 [INFO ] main org.pytorch.serve.wlm.ModelManager - Model model loaded.
2022-07-22T11:10:02,722 [INFO ] main org.pytorch.serve.ModelServer - Initialize Inference server with: EpollServerSocketChannel.
2022-07-22T11:10:02,797 [INFO ] main org.pytorch.serve.ModelServer - Inference API bind to: http://0.0.0.0:8080
2022-07-22T11:10:02,797 [INFO ] main org.pytorch.serve.ModelServer - Initialize Metrics server with: EpollServerSocketChannel.
2022-07-22T11:10:02,800 [INFO ] main org.pytorch.serve.ModelServer - Metrics API bind to: http://127.0.0.1:8082
Model server started.
2022-07-22T11:10:03,018 [WARN ] pool-3-thread-1 org.pytorch.serve.metrics.MetricCollector - worker pid is not available yet.
2022-07-22T11:10:03,544 [INFO ] pool-3-thread-1 TS_METRICS - CPUUtilization.Percent:0.0|#Level:Host|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,545 [INFO ] pool-3-thread-1 TS_METRICS - DiskAvailable.Gigabytes:26.050277709960938|#Level:Host|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,545 [INFO ] pool-3-thread-1 TS_METRICS - DiskUsage.Gigabytes:25.937984466552734|#Level:Host|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,545 [INFO ] pool-3-thread-1 TS_METRICS - DiskUtilization.Percent:49.9|#Level:Host|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,546 [INFO ] pool-3-thread-1 TS_METRICS - GPUMemoryUtilization.Percent:0.0|#Level:Host,device_id:0|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,546 [INFO ] pool-3-thread-1 TS_METRICS - GPUMemoryUsed.Megabytes:0|#Level:Host,device_id:0|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,546 [INFO ] pool-3-thread-1 TS_METRICS - GPUUtilization.Percent:0|#Level:Host,device_id:0|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,547 [INFO ] pool-3-thread-1 TS_METRICS - MemoryAvailable.Megabytes:13904.71875|#Level:Host|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,547 [INFO ] pool-3-thread-1 TS_METRICS - MemoryUsed.Megabytes:1511.390625|#Level:Host|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,547 [INFO ] pool-3-thread-1 TS_METRICS - MemoryUtilization.Percent:11.7|#Level:Host|#hostname:container-0.local,timestamp:1658488203
2022-07-22T11:10:03,814 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Listening on port: /home/model-server/tmp/.ts.sock.9000
2022-07-22T11:10:03,815 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - [PID]60
2022-07-22T11:10:03,815 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Torch worker started.
2022-07-22T11:10:03,815 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Python runtime: 3.8.10
2022-07-22T11:10:03,821 [INFO ] W-9000-model_1.0 org.pytorch.serve.wlm.WorkerThread - Connecting to: /home/model-server/tmp/.ts.sock.9000
2022-07-22T11:10:03,830 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Connection accepted: /home/model-server/tmp/.ts.sock.9000.
2022-07-22T11:10:03,832 [INFO ] W-9000-model_1.0 org.pytorch.serve.wlm.WorkerThread - Flushing req. to backend at: 1658488203832
2022-07-22T11:10:03,902 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - model_name: model, batchSize: 1
2022-07-22T11:10:04,735 [WARN ] W-9000-model_1.0-stderr MODEL_LOG - 
2022-07-22T11:10:04,736 [WARN ] W-9000-model_1.0-stderr MODEL_LOG - Downloading:   0%|          | 0.00/274 [00:00<?, ?B/s]
2022-07-22T11:10:04,737 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Trying to load model from /opt/ml/model/model.pt.
2022-07-22T11:10:05,938 [INFO ] pool-2-thread-2 ACCESS_LOG - /169.254.178.2:40592 "GET /ping HTTP/1.1" 200 6
2022-07-22T11:10:05,939 [INFO ] pool-2-thread-2 TS_METRICS - Requests2XX.Count:1|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:10:08,126 [INFO ] W-9000-model_1.0 org.pytorch.serve.wlm.WorkerThread - Backend response time: 4223
2022-07-22T11:10:08,127 [INFO ] W-9000-model_1.0 TS_METRICS - W-9000-model_1.0.ms:5410|#Level:Host|#hostname:container-0.local,timestamp:1658488208
2022-07-22T11:10:08,127 [INFO ] W-9000-model_1.0 TS_METRICS - WorkerThreadTime.ms:72|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:10:10,861 [INFO ] pool-2-thread-2 ACCESS_LOG - /169.254.178.2:40592 "GET /ping HTTP/1.1" 200 1
2022-07-22T11:10:10,861 [INFO ] pool-2-thread-2 TS_METRICS - Requests2XX.Count:1|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:12:03,445 [INFO ] pool-3-thread-2 TS_METRICS - CPUUtilization.Percent:0.0|#Level:Host|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,447 [INFO ] pool-3-thread-2 TS_METRICS - DiskAvailable.Gigabytes:26.09253692626953|#Level:Host|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,447 [INFO ] pool-3-thread-2 TS_METRICS - DiskUsage.Gigabytes:25.89572525024414|#Level:Host|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,448 [INFO ] pool-3-thread-2 TS_METRICS - DiskUtilization.Percent:49.8|#Level:Host|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,449 [INFO ] pool-3-thread-2 TS_METRICS - GPUMemoryUtilization.Percent:5.731683102786419|#Level:Host,device_id:0|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,449 [INFO ] pool-3-thread-2 TS_METRICS - GPUMemoryUsed.Megabytes:866|#Level:Host,device_id:0|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,449 [INFO ] pool-3-thread-2 TS_METRICS - GPUUtilization.Percent:0|#Level:Host,device_id:0|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,449 [INFO ] pool-3-thread-2 TS_METRICS - MemoryAvailable.Megabytes:12352.65625|#Level:Host|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,449 [INFO ] pool-3-thread-2 TS_METRICS - MemoryUsed.Megabytes:3051.94140625|#Level:Host|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:03,450 [INFO ] pool-3-thread-2 TS_METRICS - MemoryUtilization.Percent:21.5|#Level:Host|#hostname:container-0.local,timestamp:1658488323
2022-07-22T11:12:05,859 [INFO ] pool-2-thread-2 ACCESS_LOG - /169.254.178.2:40592 "GET /ping HTTP/1.1" 200 0
2022-07-22T11:12:05,860 [INFO ] pool-2-thread-2 TS_METRICS - Requests2XX.Count:1|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:12:10,860 [INFO ] pool-2-thread-2 ACCESS_LOG - /169.254.178.2:40592 "GET /ping HTTP/1.1" 200 0
2022-07-22T11:12:10,860 [INFO ] pool-2-thread-2 TS_METRICS - Requests2XX.Count:1|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:12:15,860 [INFO ] pool-2-thread-2 ACCESS_LOG - /169.254.178.2:40592 "GET /ping HTTP/1.1" 200 1
2022-07-22T11:12:15,860 [INFO ] pool-2-thread-2 TS_METRICS - Requests2XX.Count:1|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:12:20,193 [INFO ] W-9000-model_1.0 org.pytorch.serve.wlm.WorkerThread - Flushing req. to backend at: 1658488340193
2022-07-22T11:12:20,195 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Backend received inference at: 1658488340
2022-07-22T11:12:20,196 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Received Request.
2022-07-22T11:12:20,205 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Found bytesarray. Translating to Image.
2022-07-22T11:12:20,206 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Image of size (1654, 2339) loaded. Start Preprocessing.
2022-07-22T11:12:20,206 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Preprocessing image...
2022-07-22T11:12:20,342 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Starting Inference.
2022-07-22T11:12:20,343 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Running predictions on cuda.
2022-07-22T11:12:20,349 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Compute predictions.
2022-07-22T11:12:20,869 [INFO ] pool-2-thread-2 ACCESS_LOG - /169.254.178.2:40608 "GET /ping HTTP/1.1" 200 0
2022-07-22T11:12:20,870 [INFO ] pool-2-thread-2 TS_METRICS - Requests2XX.Count:1|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:12:22,119 [INFO ] W-9000-model_1.0-stdout MODEL_METRICS - PredictionTime.Milliseconds:1923.3|#ModelName:model,Level:Model|#hostname:container-0.local,requestID:f49f15ab-aed4-4ecf-80e2-22910f5d578e,timestamp:1658488342
2022-07-22T11:12:22,120 [INFO ] W-9000-model_1.0 org.pytorch.serve.wlm.WorkerThread - Backend response time: 1925
2022-07-22T11:12:22,121 [INFO ] W-9000-model_1.0 ACCESS_LOG - /169.254.178.2:40592 "POST /invocations HTTP/1.1" 500 1940
2022-07-22T11:12:22,122 [INFO ] W-9000-model_1.0 TS_METRICS - Requests5XX.Count:1|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:12:22,122 [INFO ] W-9000-model_1.0 TS_METRICS - QueueTime.ms:0|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:12:22,122 [INFO ] W-9000-model_1.0 TS_METRICS - WorkerThreadTime.ms:4|#Level:Host|#hostname:container-0.local,timestamp:null
2022-07-22T11:12:22,172 [INFO ] W-9000-model_1.0 org.pytorch.serve.wlm.WorkerThread - Flushing req. to backend at: 1658488342171
2022-07-22T11:12:22,177 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Backend received inference at: 1658488342
1

There are 1 best solutions below

0
On

It turns out wrapping the model call into a try-except statement and manually printing the error message comes through to CloudWatch!

I hope that insight is useful for anyone stuck without an error message in the future.