I'm building a genetic algorithm in Python that takes Location input from a .csv file (id, name, x, y), puts them in a list and calculates the optimal distances.
At the end, the program is supposed to return the best chromosome, and I want to print the location names in list form. The issue is that in this final chromosome (final_location_list in main.py), there are only 3 locations instead of the expected 10. Why is this so?
Chromosome.py
import random
import math
import csv
from typing import List
class Location:
def __init__(self, id, name, x_coord, y_coord):
self.id = int(id)
self.name = name
self.x_coord = float(x_coord)
self.y_coord = float(y_coord)
# extract data from dataset and put in a List with Location values
dataset = []
with open('test.csv', 'r') as file:
reader = csv.reader(file, delimiter=',')
next(reader)
for row in reader:
id, name, x, y = row[0], row[2], row[6], row[7]
dataset.append(Location(id=id, name=name, x_coord=x, y_coord=y))
# take dataset as parameter and create distance matrix
def generate_dist_matrix(nodes_list):
node_number = len(nodes_list)
matrix = [[0 for _ in range(node_number)] for _ in range(node_number)]
for i in range(node_number):
for j in range(node_number):
if i != j:
x1, y1 = nodes_list[i].x_coord, nodes_list[i].y_coord
x2, y2 = nodes_list[j].x_coord, nodes_list[j].y_coord
matrix[i][j] = math.dist((x1, y1), (x2, y2))
return matrix
dist_matrix = generate_dist_matrix(dataset)
# chromosome class contains name, x, y, distance, and fitness.
# Use distance to calculate fitness.
class Chromosome:
def __init__(self, nodes_list):
self.chromosome = nodes_list
self.ids_in_list = [node.id for node in nodes_list]
self.cost = self.calculate_distance()
self.fitness_value = 1 / self.cost if self.cost != 0 else float('inf')
def calculate_distance(self):
total_distance = 0
for i in range(len(self.chromosome) - 1):
total_distance += dist_matrix[self.chromosome[i].id - 1][self.chromosome[i + 1].id - 1]
total_distance += dist_matrix[self.chromosome[-1].id - 1][self.chromosome[0].id - 1] # Return to the starting point
return total_distance
main.py
import GeneticAlgorithm as GA
import Chromosome as chr # Ensure these modules are implemented correctly
# Example usage
generations = 100
pop_size = 10
mut_rate = 0.2
dataset = chr.dataset # Assuming chr.dataset contains the dataset
def execute_GA(generations, pop_size, mut_rate, dataset):
# Initialize the population
generated_pop = GA.initialize(dataset, pop_size)
costs_for_plot = []
# Execute the GA for the specified number of generations
for i in range(generations):
# Create a new generation
new_gen = GA.new_generation(generated_pop, mut_rate)
# Print the best cost for the current generation
best_cost = GA.find_best(new_gen).cost
if (best_cost == 0):
break
print(f"Generation {i + 1}: Best Cost = {best_cost}")
# Store the best cost for plotting
costs_for_plot.append(best_cost)
# Update the population for the next generation
generated_pop = new_gen
# Return the final generation and costs for plotting
return new_gen, costs_for_plot
final_generation, costs_for_plot = execute_GA(generations, pop_size, mut_rate, dataset)
best_solution = GA.find_best(final_generation)
print(best_solution.chromosome)
final_location_list = []
for i in range(len(best_solution.chromosome)):
# get id from each location and print out its associated name from dataset
location_id = dataset[i].id
location_name = dataset[location_id-1].name
final_location_list.append(location_name)
print(final_location_list)
GeneticAlgorithm.py
import random
import Chromosome as chromo
def create_random_list(node_list):
start = node_list[0]
temp = node_list[1:]
random.shuffle(temp)
temp.insert(0, start)
temp.append(start)
return temp
def initialize(data, pop_size): #generates an initial population
initial_pop = []
for _ in range(pop_size): # Corrected iterating over pop_size
temp = create_random_list(data)
generated_chromosome = chromo.Chromosome(temp)
initial_pop.append(generated_chromosome)
return initial_pop
def selection(population): #selects two chromosomes randomly from the population
ch1 = random.choice(population)
ch2 = random.choice(population)
if ch1.fitness_value > ch2.fitness_value:
winner = ch1
else:
winner = ch2
return winner
#while ch2 == ch1:
# ch2 = random.choice(population)
#return ch1, ch2
def crossover(ch1, ch2):
indexes = random.randint(2, 5)
crossed_ch1 = ch1.chromosome[1:indexes] + [item for item in ch2.chromosome[1:-1] if item not in ch1.chromosome]
crossed_ch2 = ch2.chromosome[1:indexes] + [item for item in ch1.chromosome[1:-1] if item not in ch2.chromosome]
crossed_ch1.insert(0, ch1.chromosome[0])
crossed_ch1.append(ch1.chromosome[0])
crossed_ch2.insert(0, ch2.chromosome[0])
crossed_ch2.append(ch2.chromosome[0])
return crossed_ch1, crossed_ch2
def mutation(chromosome):
swap_index1 = random.randint(1, len(chromosome) - 2) # Avoid swapping start/end nodes
swap_index2 = random.randint(1, len(chromosome) - 2)
chromosome[swap_index1], chromosome[swap_index2] = chromosome[swap_index2], chromosome[swap_index1]
return chromosome
def find_best(generation):
return min(generation, key=lambda x: x.cost)
def new_generation(previous_gen, mutation_rate):
new_generation = [find_best(previous_gen)]
for _ in range(len(previous_gen) // 2):
parent_1 = selection(previous_gen)
parent_2 = selection(previous_gen)
child_1, child_2 = crossover(parent_1, parent_2)
if random.random() < mutation_rate:
child_2 = mutation(child_2)
new_generation.append(chromo.Chromosome(child_1))
new_generation.append(chromo.Chromosome(child_2))
return new_generation
test.csv
id,place_id,name,main_category,reviews,rating,latitude,longitude
1,ChIJvSXoM47DSjARJx_AFWVTwcU,Penang Little India,Tourist attraction,15127,4.3,5.417400799,100.3393882
2,ChIJiefCZo_DSjARmpsvzxP60eY,Padang Kota Lama (Esplanade),Tourist attraction,14240,4.3,5.420548751,100.3443193
3,ChIJ0ZhzIZHDSjARWcrnjUdMN-M,Georgetown UNESCO Historic Site,History,13056,4.4,5.417185276,100.3380258
4,ChIJQY55_XrCSjARE4XPYsmgG54,Penang Hill,Nature,12895,4.3,5.409486582,100.2775463
5,ChIJZYb8XRjCSjARxD5Df-qEkCw,Kek Lok Si Temple,Religious site,9974,4.4,5.399854925,100.2736609
6,ChIJ-cCMa5LDSjARF1sg9FiPauc,Penang Street Art,Tourist attraction,8892,4.4,5.414788457,100.3382397
7,ChIJ83gcpVjoSjARH4vjVeRjzHg,Entopia by Penang Butterfly Farm,Nature,8122,4.4,5.447856468,100.215064
8,ChIJWeDiIPHCSjAROzyNNz_QUx8,Penang Botanic Gardens,Nature,7337,4.5,5.437942953,100.2906788
9,ChIJ50W1D43DSjARlPqYV1MqscE,Chew Jetty,Tourist attraction,6493,4.1,5.412995206,100.3398099
10,ChIJkbOK8JbDSjARukDZqTnwoRo,Penang Road Famous Teochew Chendul,Food,6479,4.1,5.417153557,100.3309071
Thank you.