I’m trying to run the text vectorization example from the dask examples website but i keep getting this error on the pipe.fit() line:
ValueError: Layer (‘fit-cd0b5ca52f2c9a52f6c731defb4b4124’, 24) not in the HighLevelGraph’s layers: [‘from_pandas-8f0675dab85c873a055e3ff8b258cbce’, ‘getitem-8e9b8cb5e390ef27e9c606e88f4d8305’, ‘_transformer-0d48ff13d988fe55b2ebe2c8ffb8c405’, 2207520048768, ‘isin-2017431f699a918998d96bcc559a37b2’, ‘getitem-f4118e2e898ccf9c76c631916878f04a’, ‘ndarray-0edbb77c-b209-434a-9e08-d51958198a34’, ‘astype-b02ba61f005b644214eb131baf17fbef’]
And it only happens on windows pcs, works fine on linux.
Did this ever happen to anyone else?
ValueError Traceback (most recent call last)
Cell In [12], line 1
----> 1 pipe.fit(df['text'], y,
2 incremental__classes=[0, 1])
File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\sklearn\pipeline.py:394, in Pipeline.fit(self, X, y, **fit_params)
392 if self._final_estimator != "passthrough":
393 fit_params_last_step = fit_params_steps[self.steps[-1][0]]
--> 394 self._final_estimator.fit(Xt, y, **fit_params_last_step)
396 return self
File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\dask_ml\wrappers.py:495, in Incremental.fit(self, X, y, **fit_kwargs)
493 def fit(self, X, y=None, **fit_kwargs):
494 estimator = sklearn.base.clone(self.estimator)
--> 495 self._fit_for_estimator(estimator, X, y, **fit_kwargs)
496 return self
File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\dask_ml\wrappers.py:479, in Incremental._fit_for_estimator(self, estimator, X, y, **fit_kwargs)
477 result = estimator.partial_fit(X=X, y=y, **fit_kwargs)
478 else:
--> 479 result = fit(
480 estimator,
481 X,
482 y,
483 random_state=self.random_state,
484 shuffle_blocks=self.shuffle_blocks,
485 assume_equal_chunks=self.assume_equal_chunks,
486 **fit_kwargs,
487 )
489 copy_learned_attributes(result, self)
490 self.estimator_ = result
File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\dask_ml\_partial.py:136, in fit(model, x, y, compute, shuffle_blocks, random_state, assume_equal_chunks, **kwargs)
132 from dask import sharedict
134 new_dsk = sharedict.merge(*graphs.values())
--> 136 value = Delayed((name, nblocks - 1), new_dsk)
138 if compute:
139 return value.compute()
File ~\anaconda3\envs\dask-tutorial\envs\dask\lib\site-packages\dask\delayed.py:501, in Delayed.__init__(self, key, dsk, length, layer)
499 self._layer = layer or key
500 if isinstance(dsk, HighLevelGraph) and self._layer not in dsk.layers:
--> 501 raise ValueError(
502 f"Layer {self._layer} not in the HighLevelGraph's layers: {list(dsk.layers)}"
503 )
ValueError: Layer ('fit-d4e10b765f878f992c22aef804140620', 24) not in the HighLevelGraph's layers: ['from_pandas-8f0675dab85c873a055e3ff8b258cbce', 'getitem-8e9b8cb5e390ef27e9c606e88f4d8305', '_transformer-0d48ff13d988fe55b2ebe2c8ffb8c405', 2195545223936, 'isin-0adbaa8cd18afa85a47a2c9db849c587', 'getitem-f4118e2e898ccf9c76c631916878f04a', 'ndarray-f048cac7-ecaf-4145-926a-555d18deb73a', 'astype-bae22adfd02e007cdba13a6867f49afb']