Custom weighted loss in cntk

285 Views Asked by At

I like to calculate weighted error below:

def calc_err(pred, targets, weights) :
    nClass = np.size(pred, axis=0) 

    Is = [1.0 for i in range(nClass)]
    nonTargets = C.minus(Is, targets)
    wrongPred = C.minus(Is, pred)
    wColumn = C.times(targets, weights)
    wTarget = C.element_times(wColumn, targets)
    wNonTarget = C.element_times(wColumn, nonTargets)
    c1 = C.negate(C.reduce_sum(C.element_times(wTarget, C.log(pred)), axis = -1))
    c2 = C.negate(C.reduce_sum(C.element_times(wNonTarget, C.log(wrongPred)), axis = -1))
    ce = c1 + c2

    return ce.eval()

where pred is prediction probabilities, targets is expected one-hot array, and weights is 2D array. I've created a corresponding custom loss below:

def WeightedCrossEntropy(z, targets):
    pred = C.softmax(z)
    nClass = np.size(pred, axis=0) 
    Is = [1 for i in range(nClass)]

    nonTargets = C.minus(Is, targets)
    wrongPred = C.minus(Is, pred)
    wColumn = C.times(targets, weights)
    wTarget = C.element_times(wColumn, targets)
    wNonTarget = C.element_times(wColumn, nonTargets)
    c1 = C.negate(C.reduce_sum(C.element_times(wTarget, C.log(pred)), axis=-1))
    c2 = C.negate(C.reduce_sum(C.element_times(wNonTarget, C.log(wrongPred)), axis=-1))
    ce = c1 + c2

    return ce

When I tried to train, I have noticed that while custom loss is indeed decreasing, but the test error from calc_err(pred, targets, weights) only decrease one or two epochs or not at all. Is my WeightedCrossEntropy(z, targets) ok or what did I do wrong?

2

There are 2 best solutions below

4
On

Is the weights a constant or parameter? Please make sure these two function take the same inputs, parameters and constants.

0
On

The below is how weights are pre-calculated:

def ColBasedCustomWeight(nClass) :
    ratio = 1.0
    pfSum = [0] * nClass
    pWs = [[0 for x in range(nClass)] for y in range(nClass)]
    for j in range(nClass):
        for i in range(nClass):
            n = i - j
            if n > 0: 
                # false negative if j = 0
                pWs[j][i] = math.pow(1.5,ratio*n) if j == 0 else math.pow(1.2,ratio*n) 
            elif n < 0:
                # false positive if i = 0
                pWs[j][i] = math.pow(1.5, -ratio*n) if i == 0 else math.pow(1.2, -ratio*n)
            else: 
                pWs[j][i] = 1.0
            pfSum[i] += pWs[j][i]

    #normalize the weights to nClass
    for  j in range(nClass):
        for i in range(nClass):
            pWs[j][i] *= nClass / pfSum[i]

    return pWs

Any problems with the weight definition or calculation?