Hi I am trying to implement the Siamese Neural Network for one-shot image recognition with the Omniglot dataset. The initial step for the implementation requires to generate pair samples with same/distinct classes, for that I use the make_pair function from Ben Myara's github with little modifications. However, the keyError pops up every time when I call the function, so I want to know what causes this error, Here is my implementation:
import requests
import io
def load_numpy_arr_from_url(url):
"""
Loads a numpy array from surfdrive.
Input:
url: Download link of dataset
Outputs:
dataset: numpy array with input features or labels
"""
response = requests.get(url)
response.raise_for_status()
return np.load(io.BytesIO(response.content))
# Downloading may take a while..
train_x =load_numpy_arr_from_url('https://surfdrive.surf.nl/files/index.php/s/tvQmLyY7MhVsADb/download')
#Transform bool type to integer
train_data = train_x* 1
train_y = load_numpy_arr_from_url('https://surfdrive.surf.nl/files/index.php/s/z234AHrQqx9RVGH/download')
import torch
def make_pairs(data, labels, num=1000):
digits = {}
for i, j in enumerate(labels):
if not j in digits:
digits[j] = []
digits[j].append(i)
pairs, labels_ = [], []
for i in range(num):
if np.random.rand() >= .5: # same digit
digit = random.choice(range(len(labels+1)))
d1, d2 = random.choice(digits[digit], size=2, replace=False)
labels_.append(1)
else:
digit1, digit2 = np.random.choice(range(len(labels+1)), size=2, replace=False)
d1, d2 = random.choice(digits[digit1]), np.random.choice(digits[digit2])
labels_.append(0)
pairs.append(torch.from_numpy(np.concatenate([data[d1,:], data[d2,:]])).view(1, 56, 28))
return torch.cat(pairs), torch.LongTensor(labels_)
The error occurs when I try to call the function with the following command:
make_pairs(train_data,train_y, 5)
Here is the traceback error I got:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-30-7d53181e46ef> in <module>()
25
26 return torch.cat(pairs), torch.LongTensor(labels_)
---> 27 make_pairs(train_data,train_y, 5)
28 #print(a)
<ipython-input-30-7d53181e46ef> in make_pairs(data, labels, num)
14 if np.random.rand() >= .5: # same digit
15 digit = random.choice(range(len(labels+1)))
---> 16 print(random.choice(digits[digit], replace=False))
17 d1, d2 = random.choice(digits[digit], size=2, replace=False)
18 labels_.append(1)
KeyError: 12803
Moreover, I have also trying to implement part of the function without the for loop and everything seems to works properly there:
import numpy as np
digits = {}
for i, j in enumerate(train_y):
if not j in digits:
digits[j] = []
digits[j].append(i)
pairs, labels_ = [], []
digit = np.random.choice(range(len(train_y)+1)
d1, d2 = np.random.choice(digits[digit], size=2, replace=False)
labels_.append(1)
print(torch.from_numpy(np.concatenate([train_data[d1,:], train_data[d2,:]])).view(1, 56, 28))