Need to return array of the top-3 most occurring words. (codewars )

85 Views Asked by At
import re


def find_top_word(top, word_list, word_set):
    top_word = ''
    top_count = 0

    for word in word_set:
        counter = 0
        for char in word_list:
            if word == char:
                counter += 1
        if counter > top_count:
            top_word = word
            top_count = counter

    top.append(top_word)

    while top_word in word_list:
        word_list.remove(top_word)

    if len(top) < 3:
        find_top_word(top, word_list, word_set)


def top_3_words(word):
    top = []
    word_list = [str(re.findall(r'\b(\w+.\w+)\b', char.lower()))[2:-2] if "'" in char
                 else str(re.findall(r'\b\w+\b', char.lower()))[2:-2]
                 for char in word.split()]
    word_set = set(word_list)
    find_top_word(top, word_list, word_set)
    for word in top:
        if word == '':
            del top[top.index(word)]
    if top[0] == '':
        del top[0]
    print(top)
    return top
1

There are 1 best solutions below

0
On

A very simple approach: use a collections.Counter to count the frequency of each word, then take the 3 most_common:

>>> words = "foo foo foo bar bar baz baz asdf ola"
>>> import collections
>>> collections.Counter(words.split()).most_common(3)
[('foo', 3), ('bar', 2), ('baz', 2)]
>>> next(zip(*collections.Counter(words.split()).most_common(3)))
('foo', 'bar', 'baz')