Loop slowdown using Python instead of shell script

75 Views Asked by At

I have two Python scripts network.py and analysis.py, where the first one reads in a timeseries as np.array (about 180mb), trains a neural network on it using some parameters, creates predictions and saves it as .txt file along with truth for possible later inspection (both files up to about 160mb). These files are then read in by the second script, which generates values that are stored in a multidimensional np.array, the dimensionality depending on the number of previously mentionedparameters. These were previously passed in via shell script as arguments using sys.

I wanted to retrieve values over the whole parameter space and used a shell script loop for that. I let it run overnight and didnt measure the time it took, but one can estimate it to run about 12 hours.

In order to make that faster, I did the following: Instead of a shell script, I used another python script, importing the necessary functionality of network.py and analysis.py, looping over them. I passed truth and predictions from one script to another without saving and reading and got rid of having to read in the timeseries (mentioned at the very beginning) at every loop.

I let it run overnight again and having a look after about 20 hours, it had only finished about 65%, progressing really slow, much much slower than at the beginning of the run. Using top I saw that the process occupied about 26g of virtual memory, which seems insanely much to me.

Did I miss something very basic? Is there a common mistake? I couldnt find an answer yet. Your help is highly appreciated!

Here are the full scripts. Im sorry they are a bit messy, its all work in progress...

The loop:

#!/usr/bin/env python3

import numpy as np
from kerasNetwork import network
from analysis import analysis

thesis_home = '/home/r/Raphael.Kriegmair/uni/master/thesis'
n = 1
size = '10000'
print("INFO: Reading training data")
inputStates = np.loadtxt(thesis_home + '/trainingData/modifiedShallowWater/mswOutput_' + size + '.txt')
for variables in 'u', 'h', 'r', 'uh', 'ur', 'hr', 'uhr':
    for hiddenLayers in '1', '2', '3', '4', '5':
        for nodesPerLayer in '50', '100', '250', '500', '750':
            for train in '0.1', '0.3', '0.5', '0.7', '0.9':
                truth, predictions = network(inputStates, size, variables, hiddenLayers, nodesPerLayer, train)
                analysis(truth, predictions, size, variables, hiddenLayers, nodesPerLayer,train)
                print('CURRENTLY:  ' + str(n) + ' / 875' )
                n = n+1

network.py:

#!/usr/bin/env python3

from keras.models import Sequential
from keras.layers import Dense, Conv1D#, Activation, BatchNormalization
#from keras.preprocessing.sequence import TimeseriesGenerator
import numpy as np


def network(inputStates, size, variables, hiddenLayers, nodesPerLayer, train):

    nodesPerLayer = int(nodesPerLayer)
    train = float(train)    

    print("INFO: Preprocessing training data")
    # input/output state pairs share time index
    outputStates = inputStates[:, 1:]
    inputStates = inputStates[:, :-1]


    # split variables

    u_input = inputStates[0:250,:]
    h_input = inputStates[250:500,:]
    r_input = inputStates[500:750,:]

    u_output = outputStates[0:250,:]
    h_output = outputStates[250:500,:]
    r_output = outputStates[500:750,:]

    numStates = len(inputStates[0,:])


    # normalize data

    u_mean = np.mean(u_input)
    h_mean = np.mean(h_input)
    r_mean = np.mean(r_input)

    u_sigma = np.std(u_input)
    h_sigma = np.std(h_input)
    r_sigma = np.std(r_input)

    u_input, u_output = (u_input - u_mean)/u_sigma, (u_output - u_mean)/u_sigma
    h_input, h_output = (h_input - h_mean)/h_sigma, (h_output - h_mean)/h_sigma
    r_input, r_output = (r_input - r_mean)/r_sigma, (r_output - r_mean)/r_sigma


    # choose variables

    if variables == 'uhr':
        trainInput = np.concatenate((u_input,
                                     h_input,
                                     r_input,
                                     ), axis = 0)
        trainOutput = np.concatenate((u_output,
                                      h_output,
                                      r_output
                                      ), axis = 0)

    elif variables == 'uh':
        trainInput = np.concatenate((u_input,
                                     h_input
                                     ), axis = 0)
        trainOutput = np.concatenate((u_output,
                                      h_output
                                      ), axis = 0)

    elif variables == 'ur':
        trainInput = np.concatenate((u_input,
                                     r_input
                                     ), axis = 0)
        trainOutput = np.concatenate((u_output,
                                      r_output
                                      ), axis = 0)

    elif variables == 'hr':
        trainInput = np.concatenate((h_input,
                                     r_input
                                     ), axis = 0)
        trainOutput = np.concatenate((h_output,
                                      r_output
                                      ), axis = 0)


    elif variables == 'u':
        trainInput = u_input
        trainOutput = u_output

    elif variables == 'h':
        trainInput = h_input
        trainOutput = h_output

    elif variables == 'r':
        trainInput = r_input
        trainOutput = r_output

    else:
        print('ARGUMENT ERROR: INVALID VARIABLE COMBINATION')

    dim = len(trainInput[:,0])


    print("INFO: Initializing model")
    # activations: relu, sigmoid, ...
    model = Sequential()
    model.add(Dense(nodesPerLayer, activation='relu', input_dim=dim))
    #BatchNormalization()

    for layers in range(int(hiddenLayers)-1):
        model.add(Dense(nodesPerLayer, activation='relu'))

    model.add(Dense(dim, activation='linear'))
    model.compile(optimizer='adam',
                  loss='mse',
                  metrics=['accuracy'])



    # Train the model, iterating on the data 
    print("INFO: Training")

    val = 1 - train
    model.fit(np.swapaxes(trainInput,0,1), np.swapaxes(trainOutput,0,1), epochs=15, validation_split=val)



    print("INFO: Generating predictions")
    # generate predictions

    predictionNumStates = int(numStates*val)
    trainNumStates = int(numStates*train)
    predictions = np.empty((1,) + trainInput[:,:predictionNumStates].shape)
    print("Predictions shape:  ", predictions.shape)

    predictions[0,:,:] = trainInput[:,trainNumStates+1:]

    for n in range(predictionNumStates-1):
        #print(predictions[:,n].shape)
        #, steps=1
        # TODO: why do I need this extra index here???
        predictions[:,:,n] = model.predict(predictions[:,:,n])


    print("INFO: Saving results")
    # compare
    truth = trainOutput[:,trainNumStates+1:]
    predictions = predictions[0,:,:]
    #difference = np.square(truth - predictions[0,:,:])

    return truth, predictions

#    predictions_filename = 'predictions_' + size + '_' + variables + '_' + hiddenLayers + '_' + str(nodesPerLayer) + '_' + str(train) + '.txt'
#    truth_filename = 'truth_' + size + '_' + variables + '_' + str(train) + '.txt'
#    
#    # these files are moved to "work" directory afterwards by experiments' shell script
#    np.savetxt(thesis_home + '/temporary/' + predictions_filename, predictions)
#    np.savetxt(thesis_home + '/temporary/' + truth_filename, truth)

analysis.py:

import numpy as np
import sys 

#expPath = sys.argv[1]
#size = sys.argv[2]
#variables = sys.argv[3]
#hiddenLayers = sys.argv[4]
#train = sys.argv[5]
#nodesPerLayer = sys.argv[6]


#predictions_filename = 'predictions_' + size + '_' + variables + '_' + hiddenLayers + '_' + str(nodesPerLayer) + '_' + str(train) + '.txt'
#truth_filename = 'truth_' + size + '_' + variables + '_' + str(train) + '.txt'
#
#predictions = np.loadtxt(thesis_home + '/temporary/' + predictions_filename)
#truth = np.loadtxt(thesis_home + '/temporary/' + truth_filename)


def analysis(truth, predictions, size, variables, hiddenLayers, nodesPerLayer,train):

    print('INFO: Analysis part')

    thesis_home = '/home/r/Raphael.Kriegmair/uni/master/thesis'

    index_size = {0 : 10000}
    index_nodesPerLayer = {0 : 50, 1 : 100, 2 : 250, 3 : 500, 4 : 750}
    index_train = {0 : 0.1, 1 : 0.3, 2 : 0.5, 3 : 0.7, 4 : 0.9}
    index_hiddenLayers = {0 : 1, 1 : 2, 2 : 3, 3 : 4, 4 : 5}
#    index_variables = {0 : 'u', 1 : 'h', 2 : 'r', # single
#                       3 : 'u', 4 : 'h', # pair
#                       5 : 'u', 6 : 'r', # pair
#                       7 : 'h', 8 : 'r', # pair
#                       9 : 'u', 10 : 'h', 11 : 'r'} # combined

    size_index = {v:k for k,v in index_size.items()}
    nodesPerLayer_index = {v:k for k,v in index_nodesPerLayer.items()}
    train_index = {v:k for k,v in index_train.items()}
    hiddenLayers_index = {v:k for k,v in index_hiddenLayers.items()}
    #variables_index = {v:k for k,v in index_variables.items()}
    # 12 is trash key
    variable_keys = {'u': (0,12,12),
                     'h': (12,1,12),
                     'r': (12,12,2),
                     'uh': (3,4,12),
                     'ur': (5,12,6),
                     'hr': (12,7,8),
                     'uhr': (9,10,11)}



    difference = np.square(truth - predictions)
    # from now on only absolute value of truth needed
    truth = np.absolute(truth)


    timesteps = len(difference[0,:])


    if variables == 'uhr':   

        u_mean = np.mean(truth[:250,:])
        h_mean = np.mean(truth[250:500,:])
        r_mean = np.mean(truth[500:750,:])

        # relative error
        u_rmse = np.sqrt(np.mean(difference[:250,:], axis=0)) / u_mean
        h_rmse = np.sqrt(np.mean(difference[250:500,:], axis=0)) / h_mean
        r_rmse = np.sqrt(np.mean(difference[500:750,:], axis=0)) / r_mean


    elif variables == 'uh':

        u_mean = np.mean(truth[:250,:])
        h_mean = np.mean(truth[250:500,:])
        r_mean = np.zeros((timesteps))

        # relative error
        u_rmse = np.sqrt(np.mean(difference[:250,:], axis=0)) / u_mean
        h_rmse = np.sqrt(np.mean(difference[250:500,:], axis=0)) / h_mean
        r_rmse = np.zeros((timesteps))

    elif variables == 'ur':

        u_mean = np.mean(truth[:250,:])
        h_mean = np.zeros((timesteps))
        r_mean = np.mean(truth[250:500,:])

        # relative error
        u_rmse = np.sqrt(np.mean(difference[:250,:], axis=0)) / u_mean
        h_rmse = np.zeros((timesteps))
        r_rmse = np.sqrt(np.mean(difference[250:500,:], axis=0)) / r_mean

    elif variables == 'hr':

        u_mean = np.zeros((timesteps))
        h_mean = np.mean(truth[:250,:])
        r_mean = np.mean(truth[250:500,:])

        # relative error
        u_rmse = np.zeros((timesteps))
        h_rmse = np.sqrt(np.mean(difference[:250,:], axis=0)) / h_mean
        r_rmse = np.sqrt(np.mean(difference[250:500,:], axis=0)) / r_mean

    elif variables == 'u':

        u_mean = np.mean(truth[:250,:])
        h_mean = np.zeros((timesteps))
        r_mean = np.zeros((timesteps))

        # relative error
        u_rmse = np.sqrt(np.mean(difference[:250,:], axis=0)) / u_mean
        h_rmse = np.zeros((timesteps))
        r_rmse = np.zeros((timesteps))

    elif variables == 'h':    

        u_mean = np.zeros((timesteps))
        h_mean = np.mean(truth[:250,:])
        r_mean = np.zeros((timesteps))

        # relative error
        u_rmse = np.zeros((timesteps))
        h_rmse = np.sqrt(np.mean(difference[:250,:], axis=0)) / h_mean
        r_rmse = np.zeros((timesteps))

    elif variables == 'r':

        u_mean = np.zeros((timesteps))
        h_mean = np.zeros((timesteps))
        r_mean = np.mean(truth[:250,:])

        # relative error
        u_rmse = np.zeros((timesteps))
        h_rmse = np.zeros((timesteps))
        r_rmse = np.sqrt(np.mean(difference[:250,:], axis=0)) / r_mean



    # compute running mean

    u_runningMean = np.zeros((timesteps))
    h_runningMean = np.zeros((timesteps))
    r_runningMean = np.zeros((timesteps))

    usum_ = 0.
    hsum_ = 0.
    rsum_ = 0.
    for i in range(timesteps):
        usum_ += u_rmse[i]
        hsum_ += h_rmse[i]
        rsum_ += r_rmse[i]
        u_runningMean[i] = usum_/(i+1)
        h_runningMean[i] = hsum_/(i+1)
        r_runningMean[i] = rsum_/(i+1)



    # compute running standard deviation

    u_runningStdDev = np.zeros((timesteps))
    h_runningStdDev = np.zeros((timesteps))
    r_runningStdDev = np.zeros((timesteps))

    usum_ = 0.
    hsum_ = 0.
    rsum_ = 0.
    for i in range(timesteps):
        usum_ += np.square(u_rmse[i] - u_runningMean[i])
        hsum_ += np.square(h_rmse[i] - h_runningMean[i])
        rsum_ += np.square(r_rmse[i] - r_runningMean[i])
        u_runningStdDev[i] = np.sqrt(usum_/(i+1))
        h_runningStdDev[i] = np.sqrt(hsum_/(i+1))
        r_runningStdDev[i] = np.sqrt(rsum_/(i+1))



    # dirty fix for suspiciously large last values   
    u_rmse[timesteps-1] = u_runningMean[timesteps-1]
    h_rmse[timesteps-1] = h_runningMean[timesteps-1]
    r_rmse[timesteps-1] = r_runningMean[timesteps-1]



    #results = np.zeros((12, # 3 single = 3, 3 pairs = 6, 1 combined = 3
    #                    len(size_index),
    #                    len(nodesPerLayer_index),
    #                    len(train_index),
    #                    len(hiddenLayers_index)))

    results = np.load(thesis_home + '/experiments/results.npy')

    u_index = variable_keys[variables][0]
    h_index = variable_keys[variables][1]
    r_index = variable_keys[variables][2]

    results[u_index, 
            size_index[int(size)], nodesPerLayer_index[int(nodesPerLayer)], train_index[float(train)], hiddenLayers_index[int(hiddenLayers)]] = u_runningMean[timesteps-1]

    results[h_index, 
            size_index[int(size)], nodesPerLayer_index[int(nodesPerLayer)], train_index[float(train)], hiddenLayers_index[int(hiddenLayers)]] = h_runningMean[timesteps-1]

    results[r_index, 
            size_index[int(size)], nodesPerLayer_index[int(nodesPerLayer)], train_index[float(train)], hiddenLayers_index[int(hiddenLayers)]] = r_runningMean[timesteps-1]

    np.save(thesis_home + '/experiments/results.npy', results)
0

There are 0 best solutions below