Deap run time is giving error of key Error

220 Views Asked by At
C:\Python27\python.exe
C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py Traceback (most 
recent call last):   File
"C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 166, in
<module>
    main_run("C:\defect4j\TinyGP")   File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 151, in
main_run
    Fitness, RawScore, Formula = main()   File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 130, in
main
    halloffame=hof, verbose=True)   File "C:\Python27\lib\site-packages\deap\algorithms.py", line 150, in
eaSimple
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)   File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 82, in
eval_func
    spectrum = FaultVersionsDict[str(numberOfversion[version])] KeyError: '244'

Process finished with exit code 1

How can I solve this error? This is the full code:

import os
import sys
import operator
import numpy as np
import pandas as pd
import time
import pickle
import warnings
import random
import itertools

import random
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
import numpy
from deap import gp
import glob

warnings.filterwarnings('ignore')


def datafile():
    files = []
    for filepath in glob.iglob(r'C:\defect4j\All single fault\*.txt'):
        files.append(filepath)
    return files

def readcsv():
    for csvfiles in datafile():
        nFaultVersion = len(datafile())
        vFaultLocation = np.zeros(nFaultVersion)
        vStatementCount = np.zeros(nFaultVersion)
        i = 0
        FaultVersionsDict = {}
        nFirstFault = (pd.read_csv(csvfiles, sep=',', nrows=0)).columns[0]
        df = pd.read_csv(csvfiles, skiprows=1, header=None).values
        dset = list(list(float(elem) for elem in row) for row in df)
        vFaultLocation[i] = nFirstFault
        vStatementCount[i] = df.shape[0]
        FaultVersionsDict[str(i)] = df
        i = i + 1
    return [vFaultLocation, vStatementCount, FaultVersionsDict]


def safeDiv(left, right):
    try: return left / right
    except ZeroDivisionError: return 0

pset = gp.PrimitiveSet("MAIN", 4)
pset.addPrimitive(numpy.add, 2, name="gp_add")
pset.addPrimitive(numpy.subtract, 2, name="gp-vsub")
pset.addPrimitive(numpy.multiply, 2, name="gp_mul")
pset.addPrimitive(numpy.sqrt, 1, name="gp_sqrt")
pset.addPrimitive(safeDiv, 2, name="gp_div")
pset.addPrimitive(numpy.negative, 1, name="gp_neg")
pset.addPrimitive(numpy.cos, 1, name="gp_cos")
pset.addPrimitive(numpy.sin, 1, name="gp_sin")


#pset.addEphemeralConstant("rand101", lambda: random.randint(-1, 1))
pset.addEphemeralConstant("rand",lambda: random.random() * 100)
pset.renameArguments(ARG0='EP', ARG1="EF", ARG2="NP", ARG3="NF")

creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, gp.PrimitiveTree, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def eval_func(individual):
    #F = gp.compileADF(expr=individual, psets=pset)
    F = toolbox.compile(expr=individual)
    #F = gp.compile(expr=individual, pset=pset)
    fit = []
    for version in range(len(numberOfversion)):
    #for version in numberOfversion:
        spectrum = FaultVersionsDict[str(numberOfversion[version])]
        EP = spectrum[:, 0]
        EF = spectrum[:, 1]
        NP = spectrum[:, 2]
        NF = spectrum[:, 3]
        #print(spectrum)
        susp_v = eval(F)
        sortedSusp_v = -np.sort(-susp_v)
        faultLocation = int(vFaultLocation[numberOfversion[version]])
        susForFault = susp_v[faultLocation]
        tieCount = np.where(sortedSusp_v == susForFault)
        # firstTie = tieCount[0].min() + 1 #zero-based
        LastTie = tieCount[0].max() + 1  # the last index of a tie of faulty statement
        faultPosinRank = LastTie
        currentFit = 100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100
        fit.append(currentFit)
        # sortedIndexList = list(np.argsort(susp_v)[::-1])
        # faultPosinRank = sortedIndexList.index(vFaultLocation[numberOfversion[version]])
        # fit.append(100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100)
    avgFiteness = np.mean(fit)
    return avgFiteness


toolbox.register("evaluate", eval_func)
toolbox.register("select", tools.selAutomaticEpsilonLexicase)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))


def main():
    random.seed(318)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("avg", numpy.mean)
    mstats.register("std", numpy.std)
    mstats.register("min", numpy.min)
    mstats.register("max", numpy.max)

    pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,
                                   halloffame=hof, verbose=True)
    # print log
    return pop, log, hof


def main_run(outputFolder):
    if os.path.exists(outputFolder) is False:
        os.mkdir(outputFolder)
    outputFile_i = os.path.join(outputFolder, "formula.csv")
    outputFile_v = os.path.join(outputFolder, "VersionSamples.csv")
    outputFile_f = os.path.join(outputFolder, "Fiteness.csv")
    file_v = open(outputFile_v, "w")
    file_f = open(outputFile_f, "w")
    with open(outputFile_i, "w") as file:
        global numberOfversion
        for i in range(30):
            numberOfversion = random.sample(range(0, 255), 120)
            #numberOfversion = random.sample(range(0, 92), 20)  # randomly choose 20 samples from 92 faulty versions
            file_v.write(str(numberOfversion) + "\n")
            file_v.flush()

            Fitness, RawScore, Formula = main()
            file.write(Formula)
            file_f.write(str(Fitness) + "," + str(RawScore) + "\n")
            file_f.flush()
            print(i)
            file.flush()
        file.close()
        file_v.close()


if __name__ == '__main__':
    global numberOfversion
    #print(datafile())
    vFaultLocation, vStatementCount, FaultVersionsDict = readcsv()
    #print(readcsv())
    main_run("C:\defect4j\TinyGP")
    main()
    #print(eval_func(individual=readcsv()))
0

There are 0 best solutions below