Failing to generate scalar predictions from NuPIC CLA model

212 Views Asked by At

I'm failing to get scalar predictions out of a CLA model.

Here's a self-contained example. It uses config to create a model using the ModelFactory. Then it trains it with a simple data set ({input_field=X, output_field=X} where X is random between 0-1). Then it attempts to extract predictions with input of the form {input_field=X, output_field=None}.

#!/usr/bin/python

import random
from nupic.frameworks.opf.modelfactory import ModelFactory

config = {
    'model': "CLA",
    'version': 1,
    'modelParams': {
        'inferenceType': 'NontemporalClassification',
        'sensorParams': {
            'verbosity' : 0,
            'encoders': {
              '_classifierInput':     {
                'classifierOnly': True,
                'clipInput': True,
                'fieldname': u'output_field',
                'maxval': 1.0,
                'minval': 0.0,
                'n': 100,
                'name': '_classifierInput',
                'type': 'ScalarEncoder',
                'w': 21},
              u'input_field':     {
                'clipInput': True,
                'fieldname': u'input_field',
                'maxval': 1.0,
                'minval': 0.0,
                'n': 100,
                'name': u'input_field',
                'type': 'ScalarEncoder',
                'w': 21},
            },
        },

        'spEnable': False,
        'tpEnable' : False,

        'clParams': {
            'regionName' : 'CLAClassifierRegion',
            'clVerbosity' : 0,
            'alpha': 0.001,
            'steps': '0',
        },
    },
}

model = ModelFactory.create(config)

ROWS = 100
def sample():
    return random.uniform(0.0, 1.0)

# training data is {input_field: X, output_field: X}
def training():
    for r in range(ROWS):
        value = sample()
        yield {"input_field": value, "output_field": value}

# testing data is {input_field: X, output_field: None} (want output_field predicted)
def testing():
    for r in range(ROWS):
        value = sample()
        yield {"input_field": value, "output_field": None}

model.enableInference({"predictedField": "output_field"})
model.enableLearning()
for row in training():
    model.run(row)
#model.finishLearning() fails in clamodel.py

model.disableLearning()
for row in testing():
    result = model.run(row)
    print result.inferences # Shows None as value

The output I see is high confidence None rather than what I expect, which is something close to the input value (since the model was trained on input==output).

{'multiStepPredictions': {0: {None: 1.0}}, 'multiStepBestPredictions': {0: None}, 'anomalyScore': None}
{'multiStepPredictions': {0: {None: 0.99999999999999978}}, 'multiStepBestPredictions': {0: None}, 'anomalyScore': None}
{'multiStepPredictions': {0: {None: 1.0000000000000002}}, 'multiStepBestPredictions': {0: None}, 'anomalyScore': None}
{'multiStepPredictions': {0: {None: 1.0}}, 'multiStepBestPredictions': {0: None}, 'anomalyScore': None}
  • 'NontemporalClassification' seems to be the right inferenceType, because it's a simple classification. But does that work with scalars?
  • Is there a different way of expressing that I want a prediction other than output_field=None?
  • I need output_field to be classifierOnly=True. Is there related configuration missing or wrong?

Thanks for your help.

2

There are 2 best solutions below

1
On

The inferenceType you want is TemporalMultistep.

See this example for a complete walkthrough.

2
On

Here's the working example. The key changes were

  1. Use TemporalMultiStep as recommended by @matthew-taylor (adding required parameters)
  2. Use "implementation": "py" in clParams. My values are in the range 0.0-1.0. The fast classifier always returns None for values in that range. The same code with "py" implementation returns valid values. Change the range to 10-100 and the fast algorithm returns valid values also. It was this change that finally produced non-None results.
  3. Less significant than #2, in order to improve the results I repeat each training row in order to let it sink in, which makes sense for training.

To see the classifier bug, comment out line 19 "implementation": "py". The results will be None. Then change MIN_VAL to 10 and MAX_VAL to 100 and watch the results come back.

#!/usr/bin/python

import random
from nupic.frameworks.opf.modelfactory import ModelFactory
from nupic.support import initLogging
from nupic.encoders import ScalarEncoder
import numpy

MIN_VAL = 0.0
MAX_VAL = 1.0

config = {
    'model': "CLA",
    'version': 1,
    'predictAheadTime': None,

    'modelParams': {
        'clParams': {
            "implementation": "py", # cpp version fails with small numbers
            'regionName' : 'CLAClassifierRegion',
            'clVerbosity' : 0,
            'alpha': 0.001,
            'steps': '1',
        },
        'inferenceType': 'TemporalMultiStep',
        'sensorParams': {
            'encoders': {
              '_classifierInput':     {
                'classifierOnly': True,
                'clipInput': True,
                'fieldname': 'output_field',
                'maxval': MAX_VAL,
                'minval': MIN_VAL,
                'n': 200,
                'name': '_classifierInput',
                'type': 'ScalarEncoder',
                'w': 21},
              u'input_field':     {
                'clipInput': True,
                'fieldname': 'input_field',
                'maxval': MAX_VAL,
                'minval': MIN_VAL,
                'n': 100,
                'name': 'input_field',
                'type': 'ScalarEncoder',
                'w': 21},
            },

            'sensorAutoReset' : None,
            'verbosity' : 0,
        },

        'spEnable': True,
        'spParams': { 
                     'columnCount': 2048,
                     'globalInhibition': 1,
                     'spatialImp': 'cpp',
                 },
        'tpEnable' : True,
        'tpParams': { 'activationThreshold': 12,
                     'cellsPerColumn': 32,
                     'columnCount': 2048,
                     'temporalImp': 'cpp',
       },
        'trainSPNetOnlyIfRequested': False,
    },
}
# end of config dictionary

model = ModelFactory.create(config)

TRAINING_ROWS = 100
TESTING_ROWS = 100

def sample(r = 0.0):
    return random.uniform(MIN_VAL, MAX_VAL)

def training():
    for r in range(TRAINING_ROWS):
        value = sample(r / TRAINING_ROWS)
        for rd in range(5):
            yield {
                "input_field": value,
                "output_field": value,
                '_reset': 1 if (rd==0) else 0,
            }


def testing():
    for r in range(TESTING_ROWS):
        value = sample()
        yield {
            "input_field": value,
            "output_field": None,
        }

model.enableInference({"predictedField": "output_field"})
for row in training():
    model.run(row)

for row in testing():
    result = model.run(row)
    prediction = result.inferences['multiStepBestPredictions'][1]
    if prediction==None:
        print "Input %f, Output None" % (row['input_field'])
    else:
        print "Input %f, Output %f (err %f)" % (row['input_field'], prediction, prediction - row['input_field'])