I am fairly new to python and machine learning. I have been using the neupy library to create neural networks successfully. However, now that I have a decent network, I want to save it. The documentation shows how to use the dill library to do this. The network seems to write to the file correctly, but it will not load the file to be used. The code is repeated because I intend to have to separate scripts after the code is correctly implemented. I have copied the code exactly as given (http://neupy.com/docs/storage.html)
My code is :
import dill
import csv
import numpy as np
from sklearn import datasets, preprocessing
from sklearn.cross_validation import train_test_split
from neupy import algorithms, layers
from neupy.functions import rmsle
np.random.seed(0)
#variables
EPOCHS = 200
HIDDENLAYER = 17
miss = 0.1
hit = 0.2
TRAIN = 0.7
ROUND = 2
STEP = 0.003
TOL = 0.02
with open('binary_conversion_dataset_input_2.csv','r') as dest1_f:
data_iter = csv.reader(dest1_f,
delimiter = ',',
quotechar = '"')
data = [data for data in data_iter]
data_array1 = np.asarray(data, dtype = float)
hitmiss_in = data_array1 #loads entire dataset from excel csv file
with open('binary_conversion_dataset_target_2.csv','r') as dest2_f:
data_iter = csv.reader(dest2_f,
delimiter = ',',
quotechar = '"')
data = [data for data in data_iter]
data_array2 = np.asarray(data, dtype = float)
hitmiss_target = data_array2 #loads entire dataset from excel csv file
hitmiss_input = hitmiss_in[:,:]
hitmiss_target = hitmiss_target[:,:]
hitmiss_predict = [0.53, 0.80, 0.40, 0.20, 0.07]
#####break target set into single numbers
hitmiss_target1a = hitmiss_target[:,0]
hitmiss_target1b = hitmiss_target[:,1]
hitmiss_target1c = hitmiss_target[:,2]
hitmiss_target1d = hitmiss_target[:,3]
hitmiss_target1e = hitmiss_target[:,4]
##hitmiss_target1f = hitmiss_target[:,5]
##hitmiss_target1g = hitmiss_target[:,6]
##hitmiss_target1h = hitmiss_target[:,7]
##hitmiss_target1i = hitmiss_target[:,8]
##hitmiss_target1j = hitmiss_target[:,9]
##hitmiss_target1k = hitmiss_target[:,10]
##hitmiss_target1l = hitmiss_target[:,11]
##hitmiss_target1m = hitmiss_target[:,12]
##hitmiss_target1n = hitmiss_target[:,13]
##hitmiss_target1o = hitmiss_target[:,14]
##hitmiss_target1p = hitmiss_target[:,15]
##hitmiss_target1q = hitmiss_target[:,16]
##hitmiss_target1r = hitmiss_target[:,17]
##hitmiss_target1s = hitmiss_target[:,18]
##hitmiss_target1t = hitmiss_target[:,19]
################################################Neural Network for hit miss
x_train, x_test, y_train, y_test = train_test_split(
hitmiss_input, hitmiss_target1a, train_size=TRAIN
)
cgnet = algorithms.ConjugateGradient(
connection=[
layers.TanhLayer(5),
layers.TanhLayer(HIDDENLAYER),
layers.OutputLayer(1),
],
search_method='golden',
tol = TOL, step = STEP,
show_epoch=25,
optimizations=[algorithms.LinearSearch],
)
cgnet.train(x_train, y_train, x_test, y_test, epochs=EPOCHS)
hitmiss_final_A = cgnet.predict(hitmiss_predict).round(ROUND)
with open('network-storage.dill', 'w') as net:
dill.dumps(net, dill.HIGHEST_PROTOCOL)
#p = pickle.dumps(g, pickle.HIGHEST_PROTOCOL)
print hitmiss_final_A
import dill
import csv
import numpy as np
from sklearn import datasets, preprocessing
from sklearn.cross_validation import train_test_split
from neupy import algorithms, layers
from neupy.functions import rmsle
np.random.seed(0)
#variables
EPOCHS = 2000
HIDDENLAYER = 17
miss = 0.1
hit = 0.2
TRAIN = 0.7
ROUND = 2
STEP = 0.003
TOL = 0.02
with open('binary_conversion_dataset_input_2.csv','r') as dest1_f:
data_iter = csv.reader(dest1_f,
delimiter = ',',
quotechar = '"')
data = [data for data in data_iter]
data_array1 = np.asarray(data, dtype = float)
hitmiss_in = data_array1 #loads entire dataset from excel csv file
with open('binary_conversion_dataset_target_2.csv','r') as dest2_f:
data_iter = csv.reader(dest2_f,
delimiter = ',',
quotechar = '"')
data = [data for data in data_iter]
data_array2 = np.asarray(data, dtype = float)
hitmiss_target = data_array2 #loads entire dataset from excel csv file
hitmiss_input = hitmiss_in[:,:]
hitmiss_target = hitmiss_target[:,:]
hitmiss_predict = [0.53, 0.80, 0.40, 0.20, 0.07]
#####break target set into single numbers
hitmiss_target1a = hitmiss_target[:,0]
hitmiss_target1b = hitmiss_target[:,1]
hitmiss_target1c = hitmiss_target[:,2]
hitmiss_target1d = hitmiss_target[:,3]
hitmiss_target1e = hitmiss_target[:,4]
###Neural Network
x_train, x_test, y_train, y_test = train_test_split(
hitmiss_input, hitmiss_target1a, train_size=TRAIN
)
with open('network-storage.dill', 'r') as f:
cgnet = dill.load(f)
hitmiss_final_A = cgnet.predict(hitmiss_predict).round(ROUND)
print hitmiss_final_A
The errors produced are:
Traceback (most recent call last):
File "C:\Python27\save network script.py", line 171, in <module>
cgnet = dill.load(f)
File "C:\Python27\lib\site-packages\dill\dill.py", line 128, in load
obj = pik.load()
File "C:\Python27\lib\pickle.py", line 858, in load
dispatch[key](self)
File "C:\Python27\lib\pickle.py", line 880, in load_eof
raise EOFError
EOFError
Is it possible that the variable notation I have chosen is causing it to loop multiple times causing issues? Or is there possibly to much to store?
Your dumping line should be like
Or
dumps
returns a string and does not write to the file on it's own. It remained empty and when reading it you received an EOF (end of file) error immediately.