I am following the example to serve sklearn model https://github.com/SeldonIO/MLServer/blob/master/docs/examples/sklearn/README.md
I am able to train and genreate the model, and then do a REST call for the inference successfully. However, I am trying to craft a gRPC call now, and the only example I could find is this https://mlserver.readthedocs.io/en/latest/examples/custom-json/README.html?highlight=grpc#send-test-inference-request-grpc
However, this is using another model. SO I try to follow this example but replace it with inference request data from my current infer.py, please see infer-grpc.py
below.
train.py
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# Split data into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(
data, digits.target, test_size=0.5, shuffle=False)
# We learn the digits on the first half of the digits
classifier.fit(X_train, y_train)
import joblib
model_file_name = "mnist-svm.joblib"
joblib.dump(classifier, model_file_name)
infer.py (http)
import requests
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# Split data into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(
data, digits.target, test_size=0.5, shuffle=False)
# We learn the digits on the first half of the digits
classifier.fit(X_train, y_train)
x_0 = X_test[0:1]
inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist()
}
]
}
endpoint = "http://localhost:8089/v2/models/mnist-svm/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request)
print(response.json())
infer-grpc.py
import mlserver.types
import requests
import json
import grpc
import mlserver.grpc.converters as converters
import mlserver.grpc.dataplane_pb2_grpc as dataplane
import mlserver.types as types
import requests
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# Split data into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(
data, digits.target, test_size=0.5, shuffle=False)
# We learn the digits on the first half of the digits
classifier.fit(X_train, y_train)
x_0 = X_test[0:1]
model_name = "mnist-svm"
ip = {"input": x_0.tolist()}
inputs_bytes = json.dumps(x_0.tolist()).encode("UTF-8")
print([len(inputs_bytes)])
print(inputs_bytes)
inference_request = types.InferenceRequest(
inputs=[
types.RequestInput(
name="predict",
shape=[len(inputs_bytes)],
datatype="BYTES",
data=[inputs_bytes],
)
]
)
inference_request_g = converters.ModelInferRequestConverter.from_types(
inference_request,
model_name=model_name,
model_version=None
)
grpc_channel = grpc.insecure_channel("localhost:8081")
grpc_stub = dataplane.GRPCInferenceServiceStub(grpc_channel)
response = grpc_stub.ModelInfer(inference_request_g)
response
throws an error:
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNKNOWN
details = "Unexpected <class 'ValueError'>: cannot reshape array of size 347 into shape (1,)"
The V2 Inference Protocol lets you send tensors directly. Therefore, since you are trying to send a Numpy tensor, you should be able to encode it as (i.e. without the intermediate
json.dumps
step):Note that, since MLServer 1.1.0, you can also use codecs to encode your payloads. Therefore, you should also be able to do the following: