For a school project I have to program a Wordle game. Now, I'm pretty much done with it, but there's still one requirement I need to complete. A certain function needs finish within 1 second, hoewever mine is taking nearly 6 seconds to finish. The function is called 'smart_guess', it takes two arguments: 'wordlist' and 'targets'. 'Wordlist' is a list of words you are allowed to guess. 'Targets' is a list of possible target words, based on previous guesses. When no guesses have been made yet, targets will be equal to wordlist. The return value is a word (i.e. string) that appears in wordlist. It is supposed to be a smart guess that helps to find the actual target in very few turns.
def smart_guess(wordlist, targets):
''' Returns best guess after comparing the distributions of each sampled guess '''
#Get randomized sample from targetlist
samples = sample_targets(targets)
#Get big number to start with
min_largest_value = len(wordlist)
best_guess = ""
#Iterate trough samples
for guess in samples:
#Find the biggest number in distribution
biggest_value_in_distr = max(distributions(guess, targets).values())
#Check if biggest number is the smallest of all, if so, add the guess to best_guess
if biggest_value_in_distr < min_largest_value:
min_largest_value = biggest_value_in_distr
best_guess = guess
if min_largest_value <= 2:
return best_guess
return best_guess
def sample_targets(targets):
#Get randomized sample from targetlist and add a total random word
len_word = len(targets[0])
decr = 10
if len_word == 4:
sample_size = 100
decr -= 1
if len_word == 5:
sample_size = 100
decr -= 1
if len_word == 6:
sample_size = len_word * decr
decr -= 1
if len_word == 7:
sample_size = 60
decr -= 1
if len_word == 8:
sample_size = len_word * decr
decr -= 1
if len_word == 9:
sample_size = 8
decr -= 1
if len_word == 10:
sample_size = 5
samples = set([i for i in targets[0:sample_size]])
samples.add(random.choice(targets))
samples.add(random.choice(targets))
samples.add(random.choice(targets))
return samples
This is the function I want to make run faster. And for it to be more clear, I'll add my whole program down here:
import random
def load_words(file):
result = set()
with open(file) as f:
for line in f.readlines():
word = line.strip().lower()
if word.isalpha() and word.isascii():
result.add(word)
return sorted(result)
def compare(guess, target):
''' Compare two words and give string with 'X' letter is in good place, 'O' not in good place but in word and '-': not in the word. '''
result = list(target)
index_list = list(range(len(guess)))
letter_dict = {}
for letter in target:
letter_dict[letter] = target.count(letter)
# Iterate list of indexes
for idx in range(len(index_list)):
# Look which letters are in good place
if guess[idx] == target[idx]:
# Decrease letter count
letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
# Delete index from list add 'X'
result[idx] = "X"
index_list.remove(idx)
for idx in index_list:
#Check if letter still is in letter_dict and in target
if guess[idx] in target and letter_dict[guess[idx]] > 0:
# Remove lettercount from dict
letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
# Add 'O' to place in guess_list
result[idx] = "O"
else:
result[idx] = "-"
return "".join(result)
dutch_words = load_words("wordlist.txt")
d6 = [word for word in dutch_words if len(word) == 6]
d6q = [word for word in d6 if word.startswith("q")]
def filter_targets(targets, guess_results):
final_targets = []
for target in targets:
#Create list with compared results
temp_list = []
for guess in guess_results:
temp_list.append(compare(guess, target))
#Compare results are the same, add to final_targets
if temp_list == list(guess_results.values()):
final_targets.append(target)
return final_targets
def distributions(guess, targets):
distr_dict = {}
#Check how many times compared gives result
for target in targets:
result = compare(guess, target)
if result not in list(distr_dict.keys()):
distr_dict[result] = 1
else:
distr_dict[result] += 1
return distr_dict
def smart_guess(wordlist, targets):
''' Returns best guess after comparing the distributions of each sampled guess '''
#Get randomized sample from targetlist
samples = sample_targets(targets)
#Get big number to start with
min_largest_value = len(wordlist)
best_guess = ""
#Iterate trough samples
for guess in samples:
#Find the biggest number in distribution
biggest_value_in_distr = max(distributions(guess, targets).values())
#Check if biggest number is the smallest of all, if so, add the guess to best_guess
if biggest_value_in_distr < min_largest_value:
min_largest_value = biggest_value_in_distr
best_guess = guess
if min_largest_value <= 2:
return best_guess
return best_guess
def sample_targets(targets):
#Get randomized sample from targetlist and add a total random word
len_word = len(targets[0])
decr = 10
if len_word == 4:
sample_size = 100
decr -= 1
if len_word == 5:
sample_size = 100
decr -= 1
if len_word == 6:
sample_size = len_word * decr
decr -= 1
if len_word == 7:
sample_size = 60
decr -= 1
if len_word == 8:
sample_size = len_word * decr
decr -= 1
if len_word == 9:
sample_size = 8
decr -= 1
if len_word == 10:
sample_size = 5
samples = set([i for i in targets[0:sample_size]])
samples.add(random.choice(targets))
samples.add(random.choice(targets))
samples.add(random.choice(targets))
return samples
def simulate_game(target, wordlist):
n = len(target)
wordlist = [w for w in wordlist if len(w) == n and w[0] == target[0]]
if target not in wordlist:
raise ValueError("Target is not in wordlist, thus impossible to guess.")
targets = wordlist.copy()
turns = 0
while True:
num_words = len(targets)
print(f"There {'is' if num_words==1 else 'are'} {num_words} possible"
f" target{'s' if num_words!=1 else ''} left.")
turns += 1
guess = smart_guess(wordlist, targets)
print("My guess is: ", guess.upper())
result = compare(guess, target)
print("Correctness: ", result)
if result == n * "X":
print(f"Target was guessed in {turns} "
f"turn{'s' if turns!=1 else ''}.")
break
else:
targets = filter_targets(targets, {guess: result})
def count_turns(target, wordlist):
n = len(target)
wordlist = [w for w in wordlist if len(w) == n and w[0]==target[0]]
targets = wordlist.copy()
turns = 0
while True:
turns += 1
if turns > 100:
raise RuntimeError("This is going nowhere: 100 turns used.")
guess = smart_guess(wordlist, targets)
result = compare(guess, target)
if result == n * "X":
break
else:
targets = filter_targets(targets, {guess: result})
return turns
def turn_count_simulation(word_length, wordlist, runs=100):
wordlist = [word for word in wordlist if len(word) == word_length]
total = 0
for _ in range(runs):
target = random.choice(wordlist)
total += count_turns(target, wordlist)
return total/runs
What you need is profiling your code to understand where it takes too long. So you don't waste time trying to optimize parts that do not really affect the total timing.
Let's try just generating a few "words" (1716: the combinations of 6 of the first 13 letters of the alphabet), and profile
smart_guess
:So the main culprit is
compare
. Try to optimize it. For instance:And now
A bit better. Now,
distributions
is the slowest part, so we should optimize it. And so on, until you reach an acceptable execution time.