Convert itertools powerset into columnized numpy array

126 Views Asked by At

Given a tuple items, I get the powerset with itertools:

items = tuple(('a', 'b', 'c'))
combos = powerset(items)

def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return tuple(chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)))

My goal is to convert the powerset output combos:

#combos =

(('a',), 
 ('b',), 
 ('c',), 
 ('a', 'b'), 
 ('a', 'c'), 
 ('b', 'c'), 
 ('a', 'b', 'c'))

into an np.array where for each row, each item is placed in the column corresponding to the location of that item in the original tuple cols.

Desired result:

#res = 

   [['a', '',   ''],
    ['',  'b',  ''],
    ['',  '',  'c'],
    ['a', 'b',  ''],
    ['a', '',  'c'],
    ['',  'b', 'c'],
    ['a', 'b', 'c']]

#shape = (7,3)

I was able to achieve this with the code below. However, is there a more pythonic, faster/memory efficient way to convert the output than my attempt where I loop through the array and place each individual item in the correct column in the result array?

Full code:

from itertools import combinations, chain
import numpy as np

def columnizePowerset(powerset, items):
    combo_arr = np.array([list(x) for x in powerset], dtype=object) # Convert to np.array
    res = np.empty([len(powerset), len(items)], dtype=str)          # Initialize result array

    for i in range(len(combo_arr)):                                 # Loop through rows
        for j in range(len(combo_arr[i])):                          # Loop through items in row
            col = np.where(np.array(items) == combo_arr[i][j])      # Identify column for item
            res[i][col] = combo_arr[i][j]                           # Place item in correct column

    return res

def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return tuple(chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)))

if __name__ == "__main__":
    items = tuple(('a', 'b', 'c'))                                  # Given
    combos = powerset(items)                                        # Perform powerset
    res = columnizePowerset(combos, items)                          # Convert powerset to columnized array
    print(res)
2

There are 2 best solutions below

0
AKX On BEST ANSWER

The order isn't exactly the same as powerset yields, but the pattern reminded me of binary – you can get the same items by looking at the bits of increasing integers...

I'm not a NumPy wizard, but this seems reasonably numpy-er. (See post edit history for worse solutions.)

import numpy as np


def powerset_numpy(items):
    n = len(items)
    bit_indexes = np.arange(1, 2 ** n)
    n_bits = bit_indexes.shape[0]
    item_bits = np.repeat(np.arange(0, n), n_bits).reshape((n, -1))
    item_values = np.repeat(items, n_bits).reshape((n, -1))
    n_bits = (bit_indexes & (1 << item_bits)).astype(bool)
    return np.where(n_bits, item_values, '').T

print(powerset_numpy(['a', 'b', 'c']))

prints out

[['a' '' '']
 ['' 'b' '']
 ['a' 'b' '']
 ['' '' 'c']
 ['a' '' 'c']
 ['' 'b' 'c']
 ['a' 'b' 'c']]
0
Rishabh Deep Singh On

You can check for all the bitmasks if the value is set that means we can include that in the current list else ignore it.

def powerset(s: list):
    res = []
    for mask in range(1, 2 ** len(s)):
        temp = ['' for _ in range(len(s))]
        for j in range(len(s)):
            if ((mask >> j) & 1) == 1:  # jth bit set in mask
                temp[j] = s[j]
        res.append(temp)
    return res


print(powerset(['a', 'b', 'c']))
[['a', '', ''], 
['', 'b', ''], 
['a', 'b', ''], 
['', '', 'c'], 
['a', '', 'c'], 
['', 'b', 'c'], 
['a', 'b', 'c']]