I'm trying to train a spaCy-Model in Azure ML-Studio using a Python Script and Mlflow. This has worked out so far using the following script:
import os
import argparse
import pandas as pd
import mlflow
import mlflow.spacy
import spacy
from pathlib import Path
from spacy.cli.download import download
from spacy.cli.init_config import fill_config
from spacy.cli.train import train
class spacyNERModel(mlflow.pyfunc.PythonModel):
def __init__(self, nlpModel):
self.nlpModel = nlpModel
def spacy_predict(self, text):
doc = self.nlpModel(str(text))
return list(map(lambda x: (x.start_char, x.end_char, x.label_), doc.spans['sc']))
def predict(self, context, model_input):
res = map(self.spacy_predict, model_input["text"])
return list(res)
def main():
"""Main function of the script."""
# input and output arguments
parser = argparse.ArgumentParser()
parser.add_argument("--train", type=str, help="path to training data")
parser.add_argument("--test", type=str, help="path to test data")
parser.add_argument("--config", type=str, help="path to config file")
parser.add_argument("--registered_model_name", type=str, help="model name")
args = parser.parse_args()
# Start Logging
mlflow.start_run()
download('de_dep_news_trf')
fill_config(Path("config.cfg"), args.config)
train(Path("config.cfg"), Path("model"), overrides={"paths.train": args.train, "paths.dev": args.test})
##########################
#<save and register model>
##########################
# Registering the model to the workspace
print("Registering the model via MLFlow")
model = spacy.load("model/model-best")
mlflow.pyfunc.log_model(
python_model=spacyNERModel(model),
registered_model_name=args.registered_model_name,
artifact_path=args.registered_model_name,
)
# Saving the model to a file
mlflow.spacy.save_model(
spacy_model=model,
path=os.path.join(args.registered_model_name, "trained_model"),
)
###########################
#</save and register model>
###########################
# Stop Logging
mlflow.end_run()
if __name__ == "__main__":
main()
I followed this tutorial from Microsoft. When i wanted to add logging I found out that autologging isn't available for spaCy models. This is why I want to use spacy.MLflowLogger.v2. This always resulted in Errors like this:
============================= Training pipeline =============================[0m
[38;5;4mℹ Pipeline: ['transformer', 'spancat', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.0[0m
[38;5;3m⚠ Aborting and saving the final best model. Encountered exception:
Exception('UserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/requirements.txt already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/MLmodel already exists.\nUserError:
Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/conda.yaml already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/python_env.yaml already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/tokenizer already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/config.cfg already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/meta.json already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/ner/moves already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/ner/cfg already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/ner/model already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/spancat/cfg already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/spancat/model already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/vectors.cfg
already exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/vectors already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/key2row already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/strings.json
already exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/lookups.bin
already exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/transformer/cfg already
exists.\nUserError: Resource Conflict: ArtifactId
ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/transformer/model
already exists.')[0m
Traceback (most recent call last):
File "/mnt/azureml/cr/j/xxxxxxxxxx/exe/wd/main.py", line 67, in <module>
main()
File "/mnt/azureml/cr/j/xxxxxxxxxx/exe/wd/main.py", line 41, in main
train(Path("config.cfg"), Path("model"), overrides={"paths.train": args.train, "paths.dev": args.test})
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/spacy/cli/train.py", line 84, in train
train_nlp(nlp, output_path, use_gpu=use_gpu, stdout=sys.stdout, stderr=sys.stderr)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/spacy/training/loop.py", line 135, in train
raise e
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/spacy/training/loop.py", line 125, in train
log_step(info if is_best_checkpoint is not None else None)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/spacy_loggers/mlflow.py", line 65, in log_step
_log_step_mlflow(mlflow, info)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/spacy_loggers/mlflow.py", line 214, in _log_step_mlflow
mlflow.spacy.log_model(nlp, "best")
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/mlflow/spacy/__init__.py", line 257, in log_model
return Model.log(
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/mlflow/models/model.py", line 623, in log
mlflow.tracking.fluent.log_artifacts(local_path, mlflow_model.artifact_path, run_id)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/mlflow/tracking/fluent.py", line 1046, in log_artifacts
MlflowClient().log_artifacts(run_id, local_dir, artifact_path)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/mlflow/tracking/client.py", line 1195, in log_artifacts
self._tracking_client.log_artifacts(run_id, local_dir, artifact_path)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/mlflow/tracking/_tracking_service/client.py", line 538, in log_artifacts
self._get_artifact_repo(run_id).log_artifacts(local_dir, artifact_path)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/azureml/mlflow/_store/artifact/artifact_repo.py", line 88, in log_artifacts
self.artifacts.upload_dir(local_dir, dest_path)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/azureml/mlflow/_client/artifact/run_artifact_client.py", line 97, in upload_dir
result = self._upload_files(
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/azureml/mlflow/_client/artifact/base_artifact_client.py", line 34, in _upload_files
empty_artifact_content = self._create_empty_artifacts(paths=batch_remote_paths)
File "/azureml-envs/azureml_xxxxxxxxxx/lib/python3.10/site-packages/azureml/mlflow/_client/artifact/run_artifact_client.py", line 170, in _create_empty_artifacts
raise Exception("\n".join(error_messages))
Exception: UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/requirements.txt already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/MLmodel already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/conda.yaml already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/python_env.yaml already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/tokenizer already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/config.cfg already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/meta.json already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/ner/moves already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/ner/cfg already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/ner/model already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/spancat/cfg already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/spancat/model already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/vectors.cfg already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/vectors already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/key2row already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/strings.json already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/vocab/lookups.bin already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/transformer/cfg already exists.
UserError: Resource Conflict: ArtifactId ExperimentRun/dcid.teal_net_xxxxxxxxxx/best/model.spacy/transformer/model already exists.
The metrics were logged and shown in Azure ML-Studio but then the job failed. I tried removing mlflow.start_run() and mlflow.end_run() and setting nesting to false (and the other way aroung) but this didn't help.
The current logger part of my base_config.cfg -File looks like this:
[training.logger]
@loggers = "spacy.MLflowLogger.v2"
nested = True
How can I get rid off that error? Is it a bug or am I using the logger in the wrong way?