I am trying to get my GloVe vectors for a machine learning model, without having to load the model every time. Therefore I would like to save the glove_model dictionary to a json file such that it can be loaded elsewhere after its initial construction.
However I keep getting this error: "TypeError: Object of type ndarray is not JSON serializable" - full error below. Thanks!
import numpy as np
import json
def make_glove_model():
'''
Load gloVe pre-trained vectors.
Dict keys = tokens (strings); values = word vectors (np arrays of length 50).
'''
filename = 'Data/glove_twitter_50d.txt'
print("gloVe vectors loading . . .")
with open(filename,'r', encoding='utf8') as foo:
gloveModel = {}
for line in foo:
splitLines = line.split()
word = splitLines[0]
wordEmbedding = np.array([float(value) for value in splitLines[1:]])
gloveModel[word] = wordEmbedding
# Get average of word vectors to be used for unseen words, per GloVe author
with open(filename, 'r', encoding='utf8') as foo:
for i, line in enumerate(foo):
pass
n_vec = i + 1
hidden_dim = len(line.split(' ')) - 1
vecs = np.zeros((n_vec, hidden_dim), dtype=np.float32)
with open(filename, 'r', encoding='utf8') as foo:
for i, line in enumerate(foo):
vecs[i] = np.array([float(n) for n in line.split(' ')[1:]], dtype=np.float32)
avg_vec = np.mean(vecs, axis=0)
print(len(gloveModel),"gloVe vectors loaded.")
return gloveModel, avg_vec
glove_model, avg_vec = make_glove_model()
with open('glove_model.json', 'w') as f:
json.dump(glove_model, f)
This throws an error:
Traceback (most recent call last):
File "D:\Documents on D\Machine_Learning\Sentiment_Analysis\import_glove.py", line 43, in <module>
json.dump(glove_model, f)
File "D:\Anaconda\envs\tfgpu\lib\json\__init__.py", line 179, in dump
for chunk in iterable:
File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 405, in _iterencode_dict
yield from chunks
File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 438, in _iterencode
o = _default(o)
File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type ndarray is not JSON serializable
Yet when I check data type,
type(glove_model)
Out[17]: dict
type(avg_vec)
Out[18]: numpy.ndarray
I think this is because your dict contains a
numpy.ndarray
.