I am using dataproc 2.1 with the following software_config in a json file.
"software_config": {
"properties": {},
"optional_components": ["JUPYTER","ZEPPELIN","Hudi"]
}
But I am getting the following error:
[2023-12-14, 04:08:59 IST] {taskinstance.py:1851} ERROR - Task failed with exception
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/airflow/providers/google/cloud/operators/dataproc.py", line 609, in execute
operation = self._create_cluster(hook)
File "/usr/local/lib/python3.9/site-packages/airflow/providers/google/cloud/operators/dataproc.py", line 535, in _create_cluster
return hook.create_cluster(
File "/usr/local/lib/python3.9/site-packages/airflow/providers/google/common/hooks/base_google.py", line 468, in inner_wrapper
return func(self, *args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/airflow/providers/google/cloud/hooks/dataproc.py", line 332, in create_cluster
result = client.create_cluster(
File "/usr/local/lib/python3.9/site-packages/google/cloud/dataproc_v1/services/cluster_controller/client.py", line 534, in create_cluster
request = clusters.CreateClusterRequest(request)
File "/usr/local/lib/python3.9/site-packages/proto/message.py", line 516, in __init__
pb_value = marshal.to_proto(pb_type, value)
File "/usr/local/lib/python3.9/site-packages/proto/marshal/marshal.py", line 211, in to_proto
pb_value = rule.to_proto(value)
File "/usr/local/lib/python3.9/site-packages/proto/marshal/rules/message.py", line 36, in to_proto
return self._descriptor(**value)
ValueError: unknown enum label "Hudi"
NOTE: I tried with "HUDI" as well but still I am getting the error.
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/airflow/providers/google/cloud/operators/dataproc.py", line 609, in execute
operation = self._create_cluster(hook)
File "/usr/local/lib/python3.9/site-packages/airflow/providers/google/cloud/operators/dataproc.py", line 535, in _create_cluster
return hook.create_cluster(
File "/usr/local/lib/python3.9/site-packages/airflow/providers/google/common/hooks/base_google.py", line 468, in inner_wrapper
return func(self, *args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/airflow/providers/google/cloud/hooks/dataproc.py", line 332, in create_cluster
result = client.create_cluster(
File "/usr/local/lib/python3.9/site-packages/google/cloud/dataproc_v1/services/cluster_controller/client.py", line 534, in create_cluster
request = clusters.CreateClusterRequest(request)
File "/usr/local/lib/python3.9/site-packages/proto/message.py", line 516, in __init__
pb_value = marshal.to_proto(pb_type, value)
File "/usr/local/lib/python3.9/site-packages/proto/marshal/marshal.py", line 211, in to_proto
pb_value = rule.to_proto(value)
File "/usr/local/lib/python3.9/site-packages/proto/marshal/rules/message.py", line 36, in to_proto
return self._descriptor(**value)
ValueError: unknown enum label "HUDI"
It looks like you can install this component to Dataproc 2.1.2+ image versions:
Reference:
https://cloud.google.com/dataproc/docs/concepts/components/hudi#compatible_image_versions