I'm just learning to use mosaic and delta live table in Databricks. I was following this example https://github.com/databrickslabs/mosaic/tree/main/notebooks/examples/python/OpenStreetMaps and after configuring everything I'm not able to run the pipeline.
The error is in the line where i enable mosaic
mos.enable_mosaic(spark, dbutils)
I also tried to install a specific version of mosaic to downgrade at the mosaic v3 and use the preview version of DLT.
This the configuration file of my DLT
{
"id": "<id>",
"pipeline_type": "WORKSPACE",
"clusters": [
{
"label": "default",
"node_type_id": "Standard_DS3_v2",
"num_workers": 1
}
],
"development": true,
"continuous": false,
"channel": "CURRENT",
"photon": true,
"libraries": [
{
"notebook": {
"path": "/Users/<user>/Mosaic-Example/02_Process"
}
}
],
"name": "open_street_map",
"edition": "ADVANCED",
"storage": "/temp/mosaic/open_street_maps",
"target": "open_street_maps",
"data_sampling": false
}
and here the complete Trace Error
java.lang.RuntimeException: Failed to execute python command for notebook '/Users/<user>/Mosaic-Example/02_Process' with id RunnableCommandId(9033327179885129490) and error AnsiResult(---------------------------------------------------------------------------
Py4JError Traceback (most recent call last)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3e1599be-1a1b-46d7-9547-6fb8561be97d/lib/python3.10/site-packages/mosaic/core/library_handler.py:90, in MosaicLibraryHandler.auto_attach(self)
89 optionModule = getattr(optionClass, "MODULE$")
---> 90 lib = JavaJarId(
91 JarURI,
92 ManagedLibraryId.defaultOrganization(),
93 NoVersionModule.simpleString(),
94 optionModule.apply(None),
95 optionModule.apply(None),
96 )
97 except:
File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1587, in JavaClass.__call__(self, *args)
1586 answer = self._gateway_client.send_command(command)
-> 1587 return_value = get_return_value(
1588 answer, self._gateway_client, None, self._fqn)
1590 for temp_arg in temp_args:
File /databricks/spark/python/pyspark/errors/exceptions/captured.py:188, in capture_sql_exception.<locals>.deco(*a, **kw)
187 try:
--> 188 return f(*a, **kw)
189 except Py4JJavaError as e:
File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
329 else:
--> 330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
333 else:
Py4JError: An error occurred while calling None.com.databricks.libraries.JavaJarId. Trace:
py4j.Py4JException: Constructor com.databricks.libraries.JavaJarId([class java.net.URI, class java.lang.String, class java.lang.String, class scala.None$, class scala.None$]) does not exist
at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:203)
at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:220)
at py4j.Gateway.invoke(Gateway.java:255)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
at java.lang.Thread.run(Thread.java:750)
During handling of the above exception, another exception occurred:
Py4JError Traceback (most recent call last)
File ~/.ipykernel/2098/command--1-2880499838:3
1 import mosaic as mos
----> 3 mos.enable_mosaic(spark, dbutils)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3e1599be-1a1b-46d7-9547-6fb8561be97d/lib/python3.10/site-packages/mosaic/api/enable.py:47, in enable_mosaic(spark, dbutils)
14 """
15 Enable Mosaic functions.
16
(...)
44
45 """
46 config.mosaic_spark = spark
---> 47 _ = MosaicLibraryHandler(config.mosaic_spark)
48 config.mosaic_context = MosaicContext(config.mosaic_spark)
50 # Register SQL functions
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3e1599be-1a1b-46d7-9547-6fb8561be97d/lib/python3.10/site-packages/mosaic/core/library_handler.py:29, in MosaicLibraryHandler.__init__(self, spark)
25 raise FileNotFoundError(
26 f"Mosaic JAR package {self._jar_filename} could not be located at {self.mosaic_library_location}."
27 )
28 LOGGER.info(f"Automatically attaching Mosaic JAR to cluster.")
---> 29 self.auto_attach()
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3e1599be-1a1b-46d7-9547-6fb8561be97d/lib/python3.10/site-packages/mosaic/core/library_handler.py:98, in MosaicLibraryHandler.auto_attach(self)
90 lib = JavaJarId(
91 JarURI,
92 ManagedLibraryId.defaultOrganization(),
(...)
95 optionModule.apply(None),
96 )
97 except:
---> 98 lib = JavaJarId(
99 JarURI,
100 ManagedLibraryId.defaultOrganization(),
101 NoVersionModule.simpleString()
102 )
104 libSeq = converters.asScalaBufferConverter((lib,)).asScala().toSeq()
106 context = DatabricksILoop.getSharedDriverContextIfExists().get()
File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1587, in JavaClass.__call__(self, *args)
1581 command = proto.CONSTRUCTOR_COMMAND_NAME +\
1582 self._command_header +\
1583 args_command +\
1584 proto.END_COMMAND_PART
1586 answer = self._gateway_client.send_command(command)
-> 1587 return_value = get_return_value(
1588 answer, self._gateway_client, None, self._fqn)
1590 for temp_arg in temp_args:
1591 if hasattr(temp_arg, "_detach"):
File /databricks/spark/python/pyspark/errors/exceptions/captured.py:188, in capture_sql_exception.<locals>.deco(*a, **kw)
186 def deco(*a: Any, **kw: Any) -> Any:
187 try:
--> 188 return f(*a, **kw)
189 except Py4JJavaError as e:
190 converted = convert_exception(e.java_exception)
File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
--> 330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
333 else:
334 raise Py4JError(
335 "An error occurred while calling {0}{1}{2}".
336 format(target_id, ".", name))
Py4JError: An error occurred while calling None.com.databricks.libraries.JavaJarId. Trace:
py4j.Py4JException: Constructor com.databricks.libraries.JavaJarId([class java.net.URI, class java.lang.String, class java.lang.String]) does not exist
at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:203)
at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:220)
at py4j.Gateway.invoke(Gateway.java:255)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
at java.lang.Thread.run(Thread.java:750)
,None,Map(),Map(),List(),List(),Map())
at com.databricks.pipelines.execution.core.languages.PythonRepl.$anonfun$runCmd$1(PythonRepl.scala:337)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:266)
at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:264)
at com.databricks.pipelines.execution.core.languages.PythonRepl.recordFrameProfile(PythonRepl.scala:65)
at com.databricks.pipelines.execution.core.languages.PythonRepl.runCmd(PythonRepl.scala:305)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePythonPipelineGraphLoader.$anonfun$loadPythonGraph$12(WorkspacePythonPipelineGraphLoader.scala:159)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:75)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePythonPipelineGraphLoader.$anonfun$loadPythonGraph$10(WorkspacePythonPipelineGraphLoader.scala:143)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePythonPipelineGraphLoader.$anonfun$loadPythonGraph$10$adapted(WorkspacePythonPipelineGraphLoader.scala:105)
at scala.collection.immutable.Map$Map1.foreach(Map.scala:193)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePythonPipelineGraphLoader.loadPythonGraph(WorkspacePythonPipelineGraphLoader.scala:105)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePipelineGraphLoader.loadGraph(WorkspacePipelineGraphLoader.scala:159)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePipelineGraphLoader.loadGraph(WorkspacePipelineGraphLoader.scala:53)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePipelineExecutionExtension$.loadGraph(WorkspacePipelineExecutionExtension.scala:18)
at com.databricks.pipelines.execution.service.DLTComputeRunnableContext.loadGraph(DLTComputeRunnableContext.scala:100)
at com.databricks.pipelines.execution.core.UpdateExecution.initializationForUpdates(UpdateExecution.scala:555)
at com.databricks.pipelines.execution.core.UpdateExecution.$anonfun$initializeAndLoadGraphForRegularUpdate$1(UpdateExecution.scala:642)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.$anonfun$recordPipelinesOperation$3(DeltaPipelinesUsageLogging.scala:115)
at com.databricks.pipelines.common.monitoring.OperationStatusReporter.executeWithPeriodicReporting(OperationStatusReporter.scala:120)
at com.databricks.pipelines.common.monitoring.OperationStatusReporter$.executeWithPeriodicReporting(OperationStatusReporter.scala:160)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.$anonfun$recordPipelinesOperation$6(DeltaPipelinesUsageLogging.scala:135)
at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:573)
at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:668)
at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:686)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.withAttributionContext(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.withAttributionTags(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:663)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:582)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.recordOperationWithResultTags(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:573)
at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:542)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.recordOperation(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.recordOperation0(DeltaPipelinesUsageLogging.scala:59)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.$anonfun$recordPipelinesOperation$1(DeltaPipelinesUsageLogging.scala:127)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.recordPipelinesOperation(DeltaPipelinesUsageLogging.scala:105)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.recordPipelinesOperation$(DeltaPipelinesUsageLogging.scala:101)
at com.databricks.pipelines.execution.core.UpdateExecution.recordPipelinesOperation(UpdateExecution.scala:68)
at com.databricks.pipelines.execution.core.UpdateExecution.executeStage(UpdateExecution.scala:412)
at com.databricks.pipelines.execution.core.UpdateExecution.initializeAndLoadGraphForRegularUpdate(UpdateExecution.scala:642)
at com.databricks.pipelines.execution.core.UpdateExecution.$anonfun$executeUpdate$1(UpdateExecution.scala:524)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at com.databricks.pipelines.execution.core.UpdateExecution.executeUpdate(UpdateExecution.scala:523)
at com.databricks.pipelines.execution.core.UpdateExecution.$anonfun$start$3(UpdateExecution.scala:232)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.withAttributionContext(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.withAttributionTags(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging$$anon$1.runWithAttributionTags(DeltaPipelinesUsageLogging.scala:77)
at sun.reflect.NativeMethodAccessorImpl.invoke0(NativeMethodAccessorImpl.java:-2)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.withDbAttributionTags(DeltaPipelinesUsageLogging.scala:84)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.withDbAttributionTags$(DeltaPipelinesUsageLogging.scala:83)
at com.databricks.pipelines.execution.core.UpdateExecution.withDbAttributionTags(UpdateExecution.scala:68)
at com.databricks.pipelines.execution.core.UpdateExecution.$anonfun$start$1(UpdateExecution.scala:208)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.pipelines.execution.core.UCContextCompanion$OptionUCContextHelper.runWithNewUCSIfAvailable(BaseUCContext.scala:1087)
at com.databricks.pipelines.execution.core.UpdateExecution.start(UpdateExecution.scala:195)
at com.databricks.pipelines.execution.service.ExecutionBackend$$anon$2.$anonfun$run$2(ExecutionBackend.scala:712)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.pipelines.execution.core.CommandContextUtils$.withCommandContext(CommandContextUtils.scala:66)
at com.databricks.pipelines.execution.service.ExecutionBackend$$anon$2.run(ExecutionBackend.scala:708)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:118)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:81)
at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:41)
at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:80)
at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:66)
at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:115)
at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:118)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
What am I missing?