This looks like an interesting problem.
When I am zipping my model and code together(like below), things work fine.
huggingface_model = HuggingFaceModel(
model_data="s3://abc/xyz/model.tar.gz", # s3 path having both model and code
role=role, # iam role with permissions to create an Endpoint
transformers_version="4.17", # transformers version used
pytorch_version="1.10", # pytorch version used
py_version='py38', # python version used
)
On un-zipping, folder structure will be like below:
model.tar.gz
|
- stable-diffusion
|
- controlnet
|
- code
Where code has two files inference.py
and requirements.txt
As mentioned before, this works fine.
Since creating zip of code and model together means that zipping and uploading time is very huge. So I want to de-couple model and code and tried below.
huggingface_model = HuggingFaceModel(
model_data="s3://abc/xyz/model.tar.gz", # path to your model
source_dir="s3://abc/xyz/sourcedir.tar.gz", # path to you script
entry_point="inference.py",
role=role, # iam role with permissions to create an Endpoint
transformers_version="4.17", # transformers version used
pytorch_version="1.10", # pytorch version used
py_version='py38', # python version used
)
Here on unzipping, model.tar.gz
will give you stable-diffusion
and controlnet
. And unzipping sourcedir.tar.gz
will give you inference.py
and requirements.txt
.
This is giving following error:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Cell In[13], line 16
5 huggingface_model = HuggingFaceModel(
6 model_data=s3_model_uri, # path to your model
7 source_dir=s3_code_uri, # parth to you script
(...)
12 py_version='py38', # python version used
13 )
15 # deploy the endpoint endpoint
---> 16 predictor = huggingface_model.deploy(
17 initial_instance_count=1,
18 instance_type="ml.g4dn.xlarge"
19 )
File /opt/conda/lib/python3.10/site-packages/sagemaker/huggingface/model.py:313, in HuggingFaceModel.deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, tags, kms_key, wait, data_capture_config, async_inference_config, serverless_inference_config, volume_size, model_data_download_timeout, container_startup_health_check_timeout, inference_recommendation_id, explainer_config, **kwargs)
306 inference_tool = "neuron" if instance_type.startswith("ml.inf1") else "neuronx"
307 self.image_uri = self.serving_image_uri(
308 region_name=self.sagemaker_session.boto_session.region_name,
309 instance_type=instance_type,
310 inference_tool=inference_tool,
311 )
--> 313 return super(HuggingFaceModel, self).deploy(
314 initial_instance_count,
315 instance_type,
316 serializer,
317 deserializer,
318 accelerator_type,
319 endpoint_name,
320 tags,
321 kms_key,
322 wait,
323 data_capture_config,
324 async_inference_config,
325 serverless_inference_config,
326 volume_size=volume_size,
327 model_data_download_timeout=model_data_download_timeout,
328 container_startup_health_check_timeout=container_startup_health_check_timeout,
329 inference_recommendation_id=inference_recommendation_id,
330 explainer_config=explainer_config,
331 )
File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:1406, in Model.deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, tags, kms_key, wait, data_capture_config, async_inference_config, serverless_inference_config, volume_size, model_data_download_timeout, container_startup_health_check_timeout, inference_recommendation_id, explainer_config, **kwargs)
1403 if self._base_name is not None:
1404 self._base_name = "-".join((self._base_name, compiled_model_suffix))
-> 1406 self._create_sagemaker_model(
1407 instance_type=instance_type,
1408 accelerator_type=accelerator_type,
1409 tags=tags,
1410 serverless_inference_config=serverless_inference_config,
1411 )
1413 serverless_inference_config_dict = (
1414 serverless_inference_config._to_request_dict() if is_serverless else None
1415 )
1416 production_variant = sagemaker.production_variant(
1417 self.name,
1418 instance_type,
(...)
1424 container_startup_health_check_timeout=container_startup_health_check_timeout,
1425 )
File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:794, in Model._create_sagemaker_model(self, instance_type, accelerator_type, tags, serverless_inference_config)
768 def _create_sagemaker_model(
769 self,
770 instance_type=None,
(...)
773 serverless_inference_config=None,
774 ):
775 """Create a SageMaker Model Entity
776
777 Args:
(...)
792 not provided in serverless inference. So this is used to find image URIs.
793 """
--> 794 container_def = self.prepare_container_def(
795 instance_type,
796 accelerator_type=accelerator_type,
797 serverless_inference_config=serverless_inference_config,
798 )
800 if not isinstance(self.sagemaker_session, PipelineSession):
801 # _base_name, model_name are not needed under PipelineSession.
802 # the model_data may be Pipeline variable
803 # which may break the _base_name generation
804 model_uri = None
File /opt/conda/lib/python3.10/site-packages/sagemaker/huggingface/model.py:498, in HuggingFaceModel.prepare_container_def(self, instance_type, accelerator_type, serverless_inference_config, inference_tool)
489 deploy_image = self.serving_image_uri(
490 region_name,
491 instance_type,
(...)
494 inference_tool=inference_tool,
495 )
497 deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
--> 498 self._upload_code(deploy_key_prefix, repack=True)
499 deploy_env = dict(self.env)
500 deploy_env.update(self._script_mode_env_vars())
File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:723, in Model._upload_code(self, key_prefix, repack)
707 self.uploaded_code = fw_utils.UploadedCode(
708 s3_prefix=repacked_model_data,
709 script_name=os.path.basename(self.entry_point),
710 )
712 LOGGER.info(
713 "Repacking model artifact (%s), script artifact "
714 "(%s), and dependencies (%s) "
(...)
720 repacked_model_data,
721 )
--> 723 utils.repack_model(
724 inference_script=self.entry_point,
725 source_directory=self.source_dir,
726 dependencies=self.dependencies,
727 model_uri=self.model_data,
728 repacked_model_uri=repacked_model_data,
729 sagemaker_session=self.sagemaker_session,
730 kms_key=self.model_kms_key,
731 )
733 self.repacked_model_data = repacked_model_data
File /opt/conda/lib/python3.10/site-packages/sagemaker/utils.py:517, in repack_model(inference_script, source_directory, dependencies, model_uri, repacked_model_uri, sagemaker_session, kms_key)
510 local_download_dir = (
511 None
512 if sagemaker_session.settings is None
513 or sagemaker_session.settings.local_download_dir is None
514 else sagemaker_session.settings.local_download_dir
515 )
516 with _tmpdir(directory=local_download_dir) as tmp:
--> 517 model_dir = _extract_model(model_uri, sagemaker_session, tmp)
519 _create_or_update_code_dir(
520 model_dir,
521 inference_script,
(...)
525 tmp,
526 )
528 tmp_model_path = os.path.join(tmp, "temp-model.tar.gz")
File /opt/conda/lib/python3.10/site-packages/sagemaker/utils.py:607, in _extract_model(model_uri, sagemaker_session, tmp)
605 local_model_path = model_uri.replace("file://", "")
606 with tarfile.open(name=local_model_path, mode="r:gz") as t:
--> 607 t.extractall(path=tmp_model_dir)
608 return tmp_model_dir
File /opt/conda/lib/python3.10/tarfile.py:2059, in TarFile.extractall(self, path, members, numeric_owner)
2057 tarinfo.mode = 0o700
2058 # Do not set_attrs directories, as we will do that further down
-> 2059 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2060 numeric_owner=numeric_owner)
2062 # Reverse sort directories.
2063 directories.sort(key=lambda a: a.name)
File /opt/conda/lib/python3.10/tarfile.py:2100, in TarFile.extract(self, member, path, set_attrs, numeric_owner)
2097 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2099 try:
-> 2100 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2101 set_attrs=set_attrs,
2102 numeric_owner=numeric_owner)
2103 except OSError as e:
2104 if self.errorlevel > 0:
File /opt/conda/lib/python3.10/tarfile.py:2173, in TarFile._extract_member(self, tarinfo, targetpath, set_attrs, numeric_owner)
2170 self._dbg(1, tarinfo.name)
2172 if tarinfo.isreg():
-> 2173 self.makefile(tarinfo, targetpath)
2174 elif tarinfo.isdir():
2175 self.makedir(tarinfo, targetpath)
File /opt/conda/lib/python3.10/tarfile.py:2222, in TarFile.makefile(self, tarinfo, targetpath)
2220 target.truncate()
2221 else:
-> 2222 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
File /opt/conda/lib/python3.10/tarfile.py:251, in copyfileobj(src, dst, length, exception, bufsize)
249 if len(buf) < bufsize:
250 raise exception("unexpected end of data")
--> 251 dst.write(buf)
253 if remainder != 0:
254 buf = src.read(remainder)
OSError: [Errno 28] No space left on device