universal sentence encoder batch pipeline failing

29 Views Asked by At

I have a batch job on DataFlow runner to calculate the embedding from the input text. Through the journey of pipeline. I am using tft.impl.context and impl.AnalyzeAndTransformDataset for the same

Here is the code snippet:-

    def preprocess_fn(input_features):
        import tensorflow_transform as tft 
        record_identifier=input_features['record_identifier']
        load_identifier=input_features['load_identifier']
        output_mode=input_features['output_mode']
        text=input_features['text']
        module_url="https://tfhub.dev/google/universal-sentence-encoder/4" #select the url on runtime
        import tensorflow_hub as hub
        module = hub.load(str(module_url))
        text_embed = module(text)
        
        output_features = {
            'record_identifier': record_identifier,
            'load_identifier':load_identifier,
            'output_mode': output_mode,
            'text': text,
            'text_embed': text_embed
        }
        #logging.info('output:-{}'.format(output_features))
        return output_features
    
    with impl.Context(known_args.temp_location,force_tf_compat_v1=False):
        transformed,transform_fn = (articles_raw_dataset | 'Extract embeddings' >> impl.AnalyzeAndTransformDataset(preprocess_fn))
        embeddings, transformed_metadata = transformed

Here is my DockerFile configuration

    RUN pip install apache-beam[gcp]==2.48.0
    RUN pip install tensorflow
    RUN pip install tensorflow-hub==0.14.0
    RUN pip install tensorflow-transform==1.13.0

Here is the snapshot of the error

'NoneType' object has no attribute 'saved_model_dir' [while running 'Extract embeddings/TransformDataset/Transform-ptransform-213']
    
    at .process ( /usr/local/lib/python3.9/site-packages/tensorflow_transform/beam/impl.py:416 )
    at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1572 )
    at apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window ( apache_beam/runners/common.py:982 )
    at apache_beam.runners.common.PerWindowInvoker.invoke_process ( apache_beam/runners/common.py:838 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
    at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1508 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:905 )
    at apache_beam.runners.worker.operations.GeneralPurposeConsumerSet.receive ( apache_beam/runners/worker/operations.py:324 )
    at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
    at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
    at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
    at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
    at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
    at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
    at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
    at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
    at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
    at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
    at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
    at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
    at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
    at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
    at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
    at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
    at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
    at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
    at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
    at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
    at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
    at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
    at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
    at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:905 )
    at apache_beam.runners.worker.operations.GeneralPurposeConsumerSet.receive ( apache_beam/runners/worker/operations.py:324 )
    at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
    at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
    at apache_beam.runners.common.SimpleInvoker.invoke_process ( apache_beam/runners/common.py:624 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1418 )
    at apache_beam.runners.common.DoFnRunner._reraise_augmented ( apache_beam/runners/common.py:1492 )
    at apache_beam.runners.common.DoFnRunner.process ( apache_beam/runners/common.py:1420 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:908 )
    at apache_beam.runners.worker.operations.DoOperation.process ( apache_beam/runners/worker/operations.py:907 )
    at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
    at apache_beam.runners.common._OutputHandler._write_value_to_tag ( apache_beam/runners/common.py:1695 )
    at apache_beam.runners.common._OutputHandler.handle_process_outputs ( apache_beam/runners/common.py:1582 )
    at apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window ( apache_beam/runners/common.py:982 )
    at apache_beam.runners.common.PerWindowInvoker.invoke_process ( apache_beam/runners/common.py:818 )
    at apache_beam.runners.common.DoFnRunner.process_with_sized_restriction ( apache_beam/runners/common.py:1433 )
    at apache_beam.runners.worker.operations.SdfProcessSizedElements.process ( apache_beam/runners/worker/operations.py:1030 )
    at apache_beam.runners.worker.operations.SdfProcessSizedElements.process ( apache_beam/runners/worker/operations.py:1021 )
    at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:240 )
    at apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive ( apache_beam/runners/worker/operations.py:237 )
    at apache_beam.runners.worker.operations.Operation.output ( apache_beam/runners/worker/operations.py:528 )
    at apache_beam.runners.worker.operations.Operation.output ( apache_beam/runners/worker/operations.py:526 )
    at .process_encoded ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py:231 )
    at .process_bundle ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py:1061 )
    at .process_bundle ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py:667 )
    at .do_instruction ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py:629 )
    at .<lambda> ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py:370 )
    at ._execute ( /usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py:295 )
0

There are 0 best solutions below