How can my variable paradoxically both an ndarray and also a dict?

61 Views Asked by At

I am trying to get my GloVe vectors for a machine learning model, without having to load the model every time. Therefore I would like to save the glove_model dictionary to a json file such that it can be loaded elsewhere after its initial construction.

However I keep getting this error: "TypeError: Object of type ndarray is not JSON serializable" - full error below. Thanks!

import numpy as np 
import json

def make_glove_model():
    ''' 
    Load gloVe pre-trained vectors. 
    Dict keys = tokens (strings); values = word vectors (np arrays of length 50). 
    ''' 
    filename = 'Data/glove_twitter_50d.txt'
    print("gloVe vectors loading . . .")
    with open(filename,'r', encoding='utf8') as foo:
        gloveModel = {}
        for line in foo:
            splitLines = line.split()
            word = splitLines[0]
            wordEmbedding = np.array([float(value) for value in splitLines[1:]])
            gloveModel[word] = wordEmbedding
            
    # Get average of word vectors to be used for unseen words, per GloVe author
    with open(filename, 'r', encoding='utf8') as foo:
        for i, line in enumerate(foo):
            pass
    n_vec = i + 1
    hidden_dim = len(line.split(' ')) - 1
    
    vecs = np.zeros((n_vec, hidden_dim), dtype=np.float32)
    
    with open(filename, 'r', encoding='utf8') as foo:
        for i, line in enumerate(foo):
            vecs[i] = np.array([float(n) for n in line.split(' ')[1:]], dtype=np.float32)
    
    avg_vec = np.mean(vecs, axis=0)
    print(len(gloveModel),"gloVe vectors loaded.")
    return gloveModel, avg_vec

glove_model, avg_vec = make_glove_model()
with open('glove_model.json', 'w') as f:
    json.dump(glove_model, f)

This throws an error:

Traceback (most recent call last):

  File "D:\Documents on D\Machine_Learning\Sentiment_Analysis\import_glove.py", line 43, in <module>
    json.dump(glove_model, f)

  File "D:\Anaconda\envs\tfgpu\lib\json\__init__.py", line 179, in dump
    for chunk in iterable:

  File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 431, in _iterencode
    yield from _iterencode_dict(o, _current_indent_level)

  File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 405, in _iterencode_dict
    yield from chunks

  File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 438, in _iterencode
    o = _default(o)

  File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 179, in default
    raise TypeError(f'Object of type {o.__class__.__name__} '

TypeError: Object of type ndarray is not JSON serializable

Yet when I check data type,

type(glove_model)
Out[17]: dict

type(avg_vec)
Out[18]: numpy.ndarray
1

There are 1 best solutions below

1
On

I think this is because your dict contains a numpy.ndarray.

Embedding = np.array([float(value) for value in splitLines[1:]])
gloveModel[word] = wordEmbedding