Strange error while running Dask on Windows

49 Views Asked by At

I’m trying to run the text vectorization example from the dask examples website but i keep getting this error on the pipe.fit() line:

ValueError: Layer (‘fit-cd0b5ca52f2c9a52f6c731defb4b4124’, 24) not in the HighLevelGraph’s layers: [‘from_pandas-8f0675dab85c873a055e3ff8b258cbce’, ‘getitem-8e9b8cb5e390ef27e9c606e88f4d8305’, ‘_transformer-0d48ff13d988fe55b2ebe2c8ffb8c405’, 2207520048768, ‘isin-2017431f699a918998d96bcc559a37b2’, ‘getitem-f4118e2e898ccf9c76c631916878f04a’, ‘ndarray-0edbb77c-b209-434a-9e08-d51958198a34’, ‘astype-b02ba61f005b644214eb131baf17fbef’]

And it only happens on windows pcs, works fine on linux.

Did this ever happen to anyone else?

ValueError                                Traceback (most recent call last)
Cell In [12], line 1
----> 1 pipe.fit(df['text'], y,
      2          incremental__classes=[0, 1])

File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\sklearn\pipeline.py:394, in Pipeline.fit(self, X, y, **fit_params)
    392     if self._final_estimator != "passthrough":
    393         fit_params_last_step = fit_params_steps[self.steps[-1][0]]
--> 394         self._final_estimator.fit(Xt, y, **fit_params_last_step)
    396 return self

File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\dask_ml\wrappers.py:495, in Incremental.fit(self, X, y, **fit_kwargs)
    493 def fit(self, X, y=None, **fit_kwargs):
    494     estimator = sklearn.base.clone(self.estimator)
--> 495     self._fit_for_estimator(estimator, X, y, **fit_kwargs)
    496     return self

File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\dask_ml\wrappers.py:479, in Incremental._fit_for_estimator(self, estimator, X, y, **fit_kwargs)
    477     result = estimator.partial_fit(X=X, y=y, **fit_kwargs)
    478 else:
--> 479     result = fit(
    480         estimator,
    481         X,
    482         y,
    483         random_state=self.random_state,
    484         shuffle_blocks=self.shuffle_blocks,
    485         assume_equal_chunks=self.assume_equal_chunks,
    486         **fit_kwargs,
    487     )
    489 copy_learned_attributes(result, self)
    490 self.estimator_ = result

File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\dask_ml\_partial.py:136, in fit(model, x, y, compute, shuffle_blocks, random_state, assume_equal_chunks, **kwargs)
    132     from dask import sharedict
    134     new_dsk = sharedict.merge(*graphs.values())
--> 136 value = Delayed((name, nblocks - 1), new_dsk)
    138 if compute:
    139     return value.compute()

File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\dask\delayed.py:501, in Delayed.__init__(self, key, dsk, length, layer)
    499 self._layer = layer or key
    500 if isinstance(dsk, HighLevelGraph) and self._layer not in dsk.layers:
--> 501     raise ValueError(
    502         f"Layer {self._layer} not in the HighLevelGraph's layers: {list(dsk.layers)}"
    503     )

ValueError: Layer ('fit-d4e10b765f878f992c22aef804140620', 24) not in the HighLevelGraph's layers: ['from_pandas-8f0675dab85c873a055e3ff8b258cbce', 'getitem-8e9b8cb5e390ef27e9c606e88f4d8305', '_transformer-0d48ff13d988fe55b2ebe2c8ffb8c405', 2195545223936, 'isin-0adbaa8cd18afa85a47a2c9db849c587', 'getitem-f4118e2e898ccf9c76c631916878f04a', 'ndarray-f048cac7-ecaf-4145-926a-555d18deb73a', 'astype-bae22adfd02e007cdba13a6867f49afb']
0

There are 0 best solutions below