C:\Python27\python.exe
C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py Traceback (most
recent call last): File
"C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 166, in
<module>
main_run("C:\defect4j\TinyGP") File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 151, in
main_run
Fitness, RawScore, Formula = main() File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 130, in
main
halloffame=hof, verbose=True) File "C:\Python27\lib\site-packages\deap\algorithms.py", line 150, in
eaSimple
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 82, in
eval_func
spectrum = FaultVersionsDict[str(numberOfversion[version])] KeyError: '244'
Process finished with exit code 1
How can I solve this error? This is the full code:
import os
import sys
import operator
import numpy as np
import pandas as pd
import time
import pickle
import warnings
import random
import itertools
import random
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
import numpy
from deap import gp
import glob
warnings.filterwarnings('ignore')
def datafile():
files = []
for filepath in glob.iglob(r'C:\defect4j\All single fault\*.txt'):
files.append(filepath)
return files
def readcsv():
for csvfiles in datafile():
nFaultVersion = len(datafile())
vFaultLocation = np.zeros(nFaultVersion)
vStatementCount = np.zeros(nFaultVersion)
i = 0
FaultVersionsDict = {}
nFirstFault = (pd.read_csv(csvfiles, sep=',', nrows=0)).columns[0]
df = pd.read_csv(csvfiles, skiprows=1, header=None).values
dset = list(list(float(elem) for elem in row) for row in df)
vFaultLocation[i] = nFirstFault
vStatementCount[i] = df.shape[0]
FaultVersionsDict[str(i)] = df
i = i + 1
return [vFaultLocation, vStatementCount, FaultVersionsDict]
def safeDiv(left, right):
try: return left / right
except ZeroDivisionError: return 0
pset = gp.PrimitiveSet("MAIN", 4)
pset.addPrimitive(numpy.add, 2, name="gp_add")
pset.addPrimitive(numpy.subtract, 2, name="gp-vsub")
pset.addPrimitive(numpy.multiply, 2, name="gp_mul")
pset.addPrimitive(numpy.sqrt, 1, name="gp_sqrt")
pset.addPrimitive(safeDiv, 2, name="gp_div")
pset.addPrimitive(numpy.negative, 1, name="gp_neg")
pset.addPrimitive(numpy.cos, 1, name="gp_cos")
pset.addPrimitive(numpy.sin, 1, name="gp_sin")
#pset.addEphemeralConstant("rand101", lambda: random.randint(-1, 1))
pset.addEphemeralConstant("rand",lambda: random.random() * 100)
pset.renameArguments(ARG0='EP', ARG1="EF", ARG2="NP", ARG3="NF")
creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, gp.PrimitiveTree, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
def eval_func(individual):
#F = gp.compileADF(expr=individual, psets=pset)
F = toolbox.compile(expr=individual)
#F = gp.compile(expr=individual, pset=pset)
fit = []
for version in range(len(numberOfversion)):
#for version in numberOfversion:
spectrum = FaultVersionsDict[str(numberOfversion[version])]
EP = spectrum[:, 0]
EF = spectrum[:, 1]
NP = spectrum[:, 2]
NF = spectrum[:, 3]
#print(spectrum)
susp_v = eval(F)
sortedSusp_v = -np.sort(-susp_v)
faultLocation = int(vFaultLocation[numberOfversion[version]])
susForFault = susp_v[faultLocation]
tieCount = np.where(sortedSusp_v == susForFault)
# firstTie = tieCount[0].min() + 1 #zero-based
LastTie = tieCount[0].max() + 1 # the last index of a tie of faulty statement
faultPosinRank = LastTie
currentFit = 100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100
fit.append(currentFit)
# sortedIndexList = list(np.argsort(susp_v)[::-1])
# faultPosinRank = sortedIndexList.index(vFaultLocation[numberOfversion[version]])
# fit.append(100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100)
avgFiteness = np.mean(fit)
return avgFiteness
toolbox.register("evaluate", eval_func)
toolbox.register("select", tools.selAutomaticEpsilonLexicase)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
def main():
random.seed(318)
pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", numpy.mean)
mstats.register("std", numpy.std)
mstats.register("min", numpy.min)
mstats.register("max", numpy.max)
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,
halloffame=hof, verbose=True)
# print log
return pop, log, hof
def main_run(outputFolder):
if os.path.exists(outputFolder) is False:
os.mkdir(outputFolder)
outputFile_i = os.path.join(outputFolder, "formula.csv")
outputFile_v = os.path.join(outputFolder, "VersionSamples.csv")
outputFile_f = os.path.join(outputFolder, "Fiteness.csv")
file_v = open(outputFile_v, "w")
file_f = open(outputFile_f, "w")
with open(outputFile_i, "w") as file:
global numberOfversion
for i in range(30):
numberOfversion = random.sample(range(0, 255), 120)
#numberOfversion = random.sample(range(0, 92), 20) # randomly choose 20 samples from 92 faulty versions
file_v.write(str(numberOfversion) + "\n")
file_v.flush()
Fitness, RawScore, Formula = main()
file.write(Formula)
file_f.write(str(Fitness) + "," + str(RawScore) + "\n")
file_f.flush()
print(i)
file.flush()
file.close()
file_v.close()
if __name__ == '__main__':
global numberOfversion
#print(datafile())
vFaultLocation, vStatementCount, FaultVersionsDict = readcsv()
#print(readcsv())
main_run("C:\defect4j\TinyGP")
main()
#print(eval_func(individual=readcsv()))