I am trying to save a relatively big DataFrame (memory usage according to the info() method returns 663+ MB) using the to_hdf() method to a HDFstore.
But everytime I run into a "Memory Error".
Hence, I have two questions regarding this:
- Is the memory error primarily because I don't have enough RAM (I have 16 GB)?
- How can I save it, given my RAM restriction. Is there a way to sort of append it to the HDFstore in chunks.
I know there is a possibility to save it as a 'table' rather than 'fixed' and that allows appending but I have not tried that yet mostly because I was hoping for a simpler alternative.
Thanks a lot in advance :)
P.S. I would like to add that I tried to_pickle() and that worked smoothly so my assumption is, it couldn't have been a physical RAM problem.
Error:
MemoryError Traceback (most recent call last)
<ipython-input-10-05bb5886160a> in <module>()
1 train_data = pd.HDFStore('strat_train_data.h5')
----> 2 strat_train_set.to_hdf(train_data, 'strat_train_set')
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\generic.py in to_hdf(self, path_or_buf, key, **kwargs)
1280
1281 from pandas.io import pytables
-> 1282 return pytables.to_hdf(path_or_buf, key, self, **kwargs)
1283
1284 def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
268 f(store)
269 else:
--> 270 f(path_or_buf)
271
272
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in <lambda>(store)
260 f = lambda store: store.append(key, value, **kwargs)
261 else:
--> 262 f = lambda store: store.put(key, value, **kwargs)
263
264 path_or_buf = _stringify_path(path_or_buf)
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in put(self, key, value, format, append, **kwargs)
869 format = get_option("io.hdf.default_format") or 'fixed'
870 kwargs = self._validate_format(format, kwargs)
--> 871 self._write_to_group(key, value, append=append, **kwargs)
872
873 def remove(self, key, where=None, start=None, stop=None):
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
1311
1312 # write the object
-> 1313 s.write(obj=value, append=append, complib=complib, **kwargs)
1314
1315 if s.is_table and index:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in write(self, obj, **kwargs)
2890 # I have no idea why, but writing values before items fixed #2299
2891 blk_items = data.items.take(blk.mgr_locs)
-> 2892 self.write_array('block%d_values' % i, blk.values, items=blk_items)
2893 self.write_index('block%d_items' % i, blk_items)
2894
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in write_array(self, key, value, items)
2658 vlarr = self._handle.create_vlarray(self.group, key,
2659 _tables().ObjectAtom())
-> 2660 vlarr.append(value)
2661 else:
2662 if empty_array:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\vlarray.py in append(self, sequence)
517 atom = self.atom
518 if not hasattr(atom, 'size'): # it is a pseudo-atom
--> 519 sequence = atom.toarray(sequence)
520 statom = atom.base
521 else:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\atom.py in toarray(self, object_)
1051
1052 def toarray(self, object_):
-> 1053 buffer_ = self._tobuffer(object_)
1054 array = numpy.ndarray(buffer=buffer_, dtype=self.base.dtype,
1055 shape=len(buffer_))
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\atom.py in _tobuffer(self, object_)
1171
1172 def _tobuffer(self, object_):
-> 1173 return pickle.dumps(object_, pickle.HIGHEST_PROTOCOL)
1174
1175 def fromarray(self, array):
MemoryError: