Python - reverse nested dictionaries dict in dict

98 Views Asked by At

I am trying to reverse dictionary nesting such that dict2 in dict1 becomes dict1 in dict2. For this, I followed the method from the link: Pythonic Way to reverse nested dictionaries

This dictionary nesting comes from a function such as:

#All functions and property for my_object0
class my_class0(object):
    def __init__(self, input, **kwargs):
        self.my_listA = ()
        ...

    def my_function0(self, **kwargs):
        for key, value in my_class(kwargs).my_function(kwargs):
            self.my_listA = ( (key2, value2) for key2, value2 in value.items() )
            ...
            if hasattr(self, key):
                yield from getattr(self, key)

    @properties
    def my_property(self):
        for key, value in self.my_listA:
            yield key, value


#All functions and property for queries of object0 information (what?)
class my_class(object):
    def __init__(self, **kwargs):
        ...
        self.my_listB = []

    def my_function(self, **kwargs):
        my_list3 = []
        for key, value in my_class3(kwargs).my_function3(kwargs3):
            ...
            my_list3.append(value)
        self.my_listB = my_list3
        yield from self.my_functionZ(kwargsZ) #my problem is here !!!! i need to return dict2 in dict1 and not dict1 in dict2    

    def my_functionZ(self, **kwargs):
        for key, value in my_class2(kwargs).my_function2(kwargs2):
            ...
            yield key, value


#All functions and property for data of queries (where?)
class my_class2(object):
    def __init__(self, **kwargs):
        ...
    def my_function2(self, my_list2, **kwargs):
        for item2 in my_list2: #my_list2 = dir(self)
            ...
            yield item2, getattr(self, item2)(self)

    def item2(self, self2, **kwargs):
        if call1:
            return "my_data1"
        elif call2:
            return "my_data2"
        ...
...

if __name__ == '__main__':
    ...
    for item in my_list:
        print( dict( my_class0(item).my_function0() ) )

I create a function that solves the problem:

def flip_dict_in_dict1(generator1):
    dict3 = {}
    for key, val in generator1:
        for key2, val2 in val.items():
            dict3.setdefault(key2, {}).update({key:val2})
    return dict3

For exemple: the dictionary

d0 = {
'Bob' : {'item1':3, 'item2':8, 'item3':6},
'Jim' : {'item1':6, 'item4':7},
'Amy' : {'item1':999,'item2':5,'item3':9,'item4':2}
}

corresponds to the generator

d0 = (
('Bob', {'item1':3, 'item2':8, 'item3':6}),
('Jim', {'item1':6, 'item4':7}),
('Amy', {'item1':999,'item2':5,'item3':9,'item4':2})
)

is tested with

print( flip_dict_in_dict( d0 ) )

The console displays

{'item1': {'Bob': 3, 'Jim': 6, 'Amy': 999}, 'item2': {'Bob': 8, 'Amy': 5}, 'item3': {'Bob': 6, 'Amy': 9}, 'item4': {'Jim': 7, 'Amy': 2}}

This function correctly such that

yield from flip_dict_in_dict1( self.my_functionZ(kwargsZ) ).items()

The problem comes with the size of nested dictionaries. These dictionaries can contain between 1 KB and 10 GB of data. I cannot store the entire dict3 dictionary in memory. So I created a dictionary class.

class flip_dict_in_dict2(dict):
    def __init__(self, mirror):
        self.mirror = mirror
        self.dict2 = self.mirroir_dict2
    def __contains__(self, key3) -> bool:
        return self.dict2(self, key3)
    def __getitem__(self, key):
        return self.dict2(self, key)
    def __iter__(self) -> Iterator:
        return iter( self.dict2(self) )
    class mirroir_dict2(dict):
        def __init__(self, parent, key1):
            self.parent = parent
            self.key1 = key1
        def __contains__(self, key2) -> bool:
            print( self.key1, key2 )
            return key2 in self.parent.mirror
        def __getitem__(self, key2):
            return self.parent.mirror[key2][self.key1]
        def __iter__(self) -> Iterator:
            print("****")
            for value in self.parent.mirror.values():
                yield value

But I can't seem to set all the dictionary parameters (e.g. contains, getitem, iter) to make this work. Can you help me ? If you have a better method to save CPU and memory and avoid this class definition, that would be even better. Likewise, if you have suggestions for improving the architecture described, please offer them.

1

There are 1 best solutions below

1
Ted Possible On

One of the major benefits of using generators is the reduced storage cost. In order to access the reversed dictionary it should not require loading both dictionaries in memory. Below I have included a ReverseDict class that implements the necessary methods for read-only access. If you wanted to be able to update it later so that you could write the dictionary it would have to be modified to include write methods such as update, setdefault and other methods.

from collections import UserDict


class ReverseDict(UserDict):
    def __init__(self, d: dict) -> None:
        # We set data here so that UserDict has access to underlying data
        self.data = d

    def __getitem__(self, key):
        # Overload this method to allow for reverse dict lookup
        result = {}
        for k, v in self.data.items():
            if key in v:
                result[k] = v[key]
        return result

    def __iter__(self):
       # This is necessary to be able to loop over the dict
       return iter(self.keys())

     def keys(self):
        # We modify this to return the reverse dict keys
        reversed_dict_keys = set()
        for k in self.data:
            for k in self.data[k]:
                reversed_dict_keys.add(k)
        return reversed_dict_keys

    def items(self):
        # We modify this to return the reverse dict items as a generator
        return ((k, self[k]) for k in self.keys())

    def values(self):
        # We modify this to return the reverse dict values as a generator
        return (self[k] for k in self.keys())


if __name__ == "__main__":
    d = {
        'Bob': {'item1': 3, 'item2': 8, 'item3': 6},
        'Jim': {'item1': 6, 'item4': 7},
        'Amy': {'item1': 999, 'item2': 5, 'item3': 9, 'item4': 2}
    }
    rd = ReverseDict(d)
    for k,v in rd.items():
        print(k, v)
    for k in rd:
        print(k)
    print(rd.keys())
    print(rd.values())
    print(rd.items())

You would implement this class as follows:

 rd = ReverseDict(d)
    for k,v in rd.items():
        print(k, v)
    for k in rd:
        print(k)
    print(rd.keys())
    print(rd.values())
    print(rd.items())

Output:
item1 {'Bob': 3, 'Jim': 6, 'Amy': 999}
item2 {'Bob': 8, 'Amy': 5}
item3 {'Bob': 6, 'Amy': 9}
item1
item2
item3
dict_keys(['item1', 'item2', 'item3'])
<generator object ReverseDict.values.<locals>.<genexpr> at 0x10821b3e0>
<generator object ReverseDict.items.<locals>.<genexpr> at 0x10821b3e0>