I have a batch job on DataFlow runner to calculate the embedding from the input text. Through the journey of pipeline. I am using tft.impl.context and impl.AnalyzeAndTransformDataset for the same
Here is the code snippet:-
def preprocess_fn(input_features):
import tensorflow_transform as tft
record_identifier=input_features['record_identifier']
load_identifier=input_features['load_identifier']
output_mode=input_features['output_mode']
text=input_features['text']
module_url="https://tfhub.dev/google/universal-sentence-encoder/4" #select the url on runtime
import tensorflow_hub as hub
module = hub.load(str(module_url))
text_embed = module(text)
output_features = {
'record_identifier': record_identifier,
'load_identifier':load_identifier,
'output_mode': output_mode,
'text': text,
'text_embed': text_embed
}
#logging.info('output:-{}'.format(output_features))
return output_features
with impl.Context(known_args.temp_location,force_tf_compat_v1=False):
transformed,transform_fn = (articles_raw_dataset | 'Extract embeddings' >> impl.AnalyzeAndTransformDataset(preprocess_fn))
embeddings, transformed_metadata = transformed
Here is my DockerFile configuration
RUN pip install apache-beam[gcp]==2.48.0
RUN pip install tensorflow
RUN pip install tensorflow-hub==0.14.0
RUN pip install tensorflow-transform==1.13.0
Here is the snapshot of the error
'NoneType' object has no attribute 'saved_model_dir' [while running 'Extract embeddings/TransformDataset/Transform-ptransform-213']
at .process ( /usr/local/lib/python3.9/site-packages/tensorflow_transform/beam/impl.py:416 )
at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1572 )
at apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window ( apache_beam/runners/common.py:982 )
at apache_beam.runners.common.PerWindowInvoker.invoke_process ( apache_beam/runners/common.py:838 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1508 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:905 )
at apache_beam.runners.worker.operations.GeneralPurposeConsumerSet.receive ( apache_beam/runners/worker/operations.py:324 )
at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:905 )
at apache_beam.runners.worker.operations.GeneralPurposeConsumerSet.receive ( apache_beam/runners/worker/operations.py:324 )
at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
at apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window ( apache_beam/runners/common.py:982 )
at apache_beam.runners.common.PerWindowInvoker.invoke_process ( apache_beam/runners/common.py:818 )
at apache_beam.runners.common.DoFnRunner.process_with_sized_restriction ( apache_beam/runners/common.py:1433 )
at apache_beam.runners.worker.operations.SdfProcessSizedElements.process ( apache_beam/runners/worker/operations.py:1030 )
at apache_beam.runners.worker.operations.SdfProcessSizedElements.process ( apache_beam/runners/worker/operations.py:1021 )
at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:237 )
at apache_beam.runners.worker.operations.Operation.output ( apache_beam/runners/worker/operations.py:528 )
at apache_beam.runners.worker.operations.Operation.output ( apache_beam/runners/worker/operations.py:526 )
at .process_encoded ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py:231 )
at .process_bundle ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py:1061 )
at .process_bundle ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py:667 )
at .do_instruction ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py:629 )
at .<lambda> ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py:370 )
at ._execute ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py:295 )