Using comparator to sort set of unique pairs with different criterias for uniqueness and less than

534 Views Asked by At

First of all I've tried to search for similar questions but I didn't find any response explaining what could my problem be.

The problem is the following: Given a set of N nodes with coordinates (x,y,z) sort them using a 4th value F as fast as possible.

I want to use a std::set with a custom comparator for this purpose because it has O(log(N)) complexity. I know I could also try a std::vector and a call to std::sort on std::vector but in theory is a slower operation.

Why this? Because I'm constantly inserting elements in the set, changing the F value (it means I change the value and to reorder the element in the container I erase and re-insert it) and I want to take the element with the less F value (that's the element at the front of the container).

But let's go with the std::set problem.

The coordinates define the uniqueness property, following the strict weak ordering rules,it means that a and b are the considered the same object if

!comp(a,b) && !comp(b,a)

The problem is related to define a uniqueness criteria based on the coordinates and a sorting criteria based on the F value. I don't want the set to store two elements with the same coordiantes, but I want it to be allow to store two elements with different coordinates but same F value

The comparator should also satisfais the following three properties:

  1. Irreflexivity x < x false
  2. Assymetry x < y true implies y < x false
  3. Transitivy x < y && y < z implies x < z true

So knowing all these properties I've been working around with the following example implementation:

Some definitions

class Node;
struct NodeComparator;
using NodePair = std::pair<Node *, int>;
using NodeSet  = std::set<NodePair, NodeComparator>;

Here I'm using pointers for convenience

Class Node

class Node
{

public:
    Node()
    {
    }
    Node(int _x, int _y, int _z, int _val) : x(_x), y(_y), z(_z), value(_val)
    {
    }

    int x, y, z;
    int value;

    friend inline std::ostream &operator<<(std::ostream &os, const Node &dt)
    {
        os << "[" << dt.x << ", " << dt.y << ", " << dt.z << "], [" << dt.value << "]";
        return os;
    }
    friend bool operator==(const Node &_lhs, const Node &_rhs){
        if( _lhs.x == _rhs.x &&
            _lhs.y == _rhs.y &&
            _lhs.z == _rhs.z ){
                return true;
            }
        return false;
    }
};

Here the operator << is overloaded only for debugging purposes

The comparator


struct NodeComparator
{
    bool operator()(const NodePair &_lhs, const NodePair &_rhs) const
    {
        if( _lhs.first == nullptr || _rhs.first == nullptr )
            return false;
        /*
        This first check implements uniqueness. 
        If _lhs == _rhs --> comp(_lhs,_rhs) == false && comp(_rhs, _lhs) == false
        So ( !comp(l,r) && !comp(r,l) ) == true
        */
        if( *_lhs.first == *_rhs.first) 
            return false;
        
        int ret = _lhs.second - _rhs.second;
        return ret < 0;
    }
};

I guess one problem could be the case of two nodes with different coordinates but same F value

Full example with concrete cases

Ìn this example I use the above classes to insert/find/erase some elements, but has it is show on the output, it's not behaving as expected:

#include <iostream>
#include <set>
#include <vector>
#include <algorithm>
#include <tuple>

class Node;
struct NodeComparator;
using NodePair = std::pair<Node *, int>;
using NodeSet  = std::set<NodePair, NodeComparator>;
class Node
{

public:
    Node()
    {
    }
    Node(int _x, int _y, int _z, int _val) : x(_x), y(_y), z(_z), value(_val)
    {
    }

    int x, y, z;
    int value;

    friend inline std::ostream &operator<<(std::ostream &os, const Node &dt)
    {
        os << "[" << dt.x << ", " << dt.y << ", " << dt.z << "], [" << dt.value << "]";
        return os;
    }
};

struct NodeComparator
{
    bool operator()(const NodePair &_lhs, const NodePair &_rhs) const
    {
        /*
        This first check implements uniqueness. 
        If _lhs == _rhs --> comp(_lhs,_rhs) == false && comp(_rhs, _lhs) == false
        So ( !comp(l,r) && !comp(r,l) ) == true
        */
        if(_lhs == _rhs) 
            return false;
        
        int ret = _lhs.second - _rhs.second;
        return ret < 0;
    }
};
int main(int argc, char **argv)
{
    Node n1(0, 2, 4, 12), 
         n2(2, 4, 5, 25), 
         n3(0, 1, 4, 34), 
         n4(0, 1, 4, 20), 
         n5(0, 1, 5, 20),
         n6(0, 2, 4, 112);

    NodeSet set;

    set.insert({&n1, n1.value});
    set.insert({&n2, n2.value});
    set.insert({&n3, n3.value});
    set.insert({&n4, n4.value}); //Should not be inserted because it already exists n3 with same coords
    set.insert({&n5, n5.value});

    //Try to insert multiple times a previously inserted node (n1 coords is == n6 coords)
    //It should not be inserted because it already exists one node with the same coords (n1)
    set.insert({&n6, n6.value});
    set.insert({&n6, n6.value});
    set.insert({&n6, n6.value});
    set.insert({&n6, n6.value});
    set.insert({&n6, 0});
    set.insert({&n6, 1});

    if (set.find({&n4, n4.value}) != set.end())
        std::cout << "Found n4" << std::endl;
    
    auto it = set.erase({&n4, 20});
    std::cout << "It value (elements erased): " << it << std::endl;

    if (set.find({&n4, n4.value}) != set.end())
        std::cout << "Found n4 after removal" << std::endl;
    
    std::cout << "Final Set content: " << std::endl;
    for (auto &it : set)
        std::cout << *it.first << std::endl;


    return 0;
}

To compile it with C++11 or above: g++ -o main main.cpp

Output:

Found n4
It value (elements erased): 1
Final Set content: 
[0, 2, 4], [12]
[2, 4, 5], [25]
[0, 1, 4], [34]
[0, 2, 4], [112]

**Expected Output: ** Correspond to elements n1, n5, n2, n3 ordered from the one with less F (n1) to the one with the higher F (n3).

Final Set content: 
[0, 2, 4], [12]
[0, 1, 5], [20]
[2, 4, 5], [25]
[0, 1, 4], [34]

I would appreciate a lot any help or ideas and alternatives of implementation. Thanks

2

There are 2 best solutions below

1
On BEST ANSWER

Unfortunately, your requirements cannot be fulfilled by one std::set alone. The std::set uses the same comparator for sorting and uniqueness. The comparator has no state, meaning, you cannot compare one time with the first and the next time with a second condition. That will not work.

So, you need to use 2 containers, like first a std::unordered_set using a comparator for equal coordinates and the a second container for the sorting, like a std::multiset..

You could also use a std::unordered_map in conjunction with a std::multiset.

Or you create your own container as a class and try to optimize performance.

Let me show you an example using the combination of std::unordered_set and std::multiset. It will be fast, because the std::unordered_set uses hashes.

#include <iostream>
#include <unordered_set>
#include <set>
#include <array>
#include <vector>

using Coordinate = std::array<int, 3>;

struct Node {
    Coordinate coordinate{};
    int value{};
    bool operator == (const Node& other) const { return coordinate == other.coordinate; }
    friend std::ostream& operator << (std::ostream& os, const Node& n) {
        return os << "[" << n.coordinate[0] << ", " << n.coordinate[1] << ", " << n.coordinate[2] << "], [" << n.value << "]"; }
};
struct CompareOnSecond { bool operator ()(const Node& n1, const Node& n2)const { return n1.value < n2.value; } };
struct Hash {size_t operator()(const Node& n) const {return n.coordinate[0] ^ n.coordinate[1] ^ n.coordinate[2];} };

using UniqueNodes = std::unordered_set<Node, Hash>;
using Sorter = std::multiset<Node, CompareOnSecond>;

int main() {
    // a vector with some test nodes
    std::vector<Node> testNodes{
    { {{0, 2, 4}}, 12 },
    { {{2, 4, 5}}, 25 },
    { {{0, 1, 4}}, 34 },
    { {{0, 1, 4}}, 20 },
    { {{0, 1, 5}}, 20 },
    { {{0, 2, 4}}, 112 } };

    // Here we will store the unique nodes
    UniqueNodes uniqueNodes{};
    for (const Node& n : testNodes) uniqueNodes.insert(n);

    // And now, do the sorting
    Sorter sortedNodes(uniqueNodes.begin(), uniqueNodes.end());

    // Some test functions
    std::cout << "\nSorted unique nodes:\n";
    for (const Node& n : sortedNodes) std::cout << n << '\n';

    // find a node
    if (sortedNodes.find({ {{0, 1, 4}}, 20 }) != sortedNodes.end())
        std::cout << "\nFound n4\n";

    // Erase a node
    auto it = sortedNodes.erase({ {{0, 1, 4}}, 20 });
    std::cout << "It value (elements erased): " << it << '\n';

    // Was it really erased?
    if (sortedNodes.find({ {{0, 1, 4}}, 20 }) != sortedNodes.end())
        std::cout << "\nFound n4 after removal\n";

    // Show final result
    std::cout << "\nFinal Set content:\n";
    for (const Node& n : sortedNodes) std::cout << n << '\n';
}
0
On

Finally, thanks to suggestions and comments of the users I implemented a solution using Boost multi index with 2 index. One hashed unique index and one ordered-non-unique index. Despite this I marked the answer above as accepted because is the most standard solution.

#include <iostream>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/identity.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/key_extractors.hpp>

using namespace ::boost;
using namespace ::boost::multi_index;

struct IndexByCost {};
struct IndexByWorldPosition {};

class Node
{

public:
    Node(int _val, int _i) : value(_val), index(_i) {}

    int value; //NON UNIQUE
    unsigned int index; //UNIQUE

    friend inline std::ostream &operator<<(std::ostream &os, const Node &dt)
    {
        os << dt.index << ": [" << dt.value << "]";
        return os;
    }

};

using MagicalMultiSet = boost::multi_index_container<
  Node*, // the data type stored
  boost::multi_index::indexed_by< // list of indexes
    boost::multi_index::hashed_unique<  //hashed index wo
      boost::multi_index::tag<IndexByWorldPosition>, // give that index a name
      boost::multi_index::member<Node, unsigned int, &Node::index> // what will be the index's key
    >,
    boost::multi_index::ordered_non_unique<  //ordered index over 'i1'
      boost::multi_index::tag<IndexByCost>, // give that index a name
      boost::multi_index::member<Node, int, &Node::value> // what will be the index's key
    >
  >
>;

int main(int argc, char const *argv[])
{
    
    MagicalMultiSet container;

    Node n1{24, 1};
    Node n2{12, 2};
    Node n3{214,3};
    Node n4{224,4};
    Node n5{221,5};
    Node n6{221,6};

    auto & indexByCost          = container.get<IndexByCost>();
    auto & indexByWorldPosition = container.get<IndexByWorldPosition>();

    indexByCost.insert(&n1);
    indexByCost.insert(&n2);
    indexByCost.insert(&n3);
    indexByCost.insert(&n4);
    indexByCost.insert(&n5);

    for(auto &it: indexByCost)
        std::cout << *it << std::endl;
    
    auto it = indexByCost.begin();
    std::cout << "Best Node " << **it << std::endl;

    indexByCost.erase(indexByCost.begin());

    it = indexByCost.begin();
    std::cout << "Best Node After erasing the first one: " << **it << std::endl;

    std::cout << "What if we modify the value of the nodes?" << std::endl;
    n2.value = 1;
    std::cout << "Container view from index by world position" << std::endl;
    for(auto &it: indexByWorldPosition)
        std::cout << *it << std::endl;
    
    auto found = indexByWorldPosition.find(2);
    if(found != indexByWorldPosition.end() )
        std::cout << "Okey found n2 by index" << std::endl;

    found = indexByWorldPosition.find(1);
    if(found != indexByWorldPosition.end() )
        std::cout << "Okey found n1 by index" << std::endl;
    
    std::cout << "Imagine we update the n1 cost" << std::endl;
    n1.value = 10000;
    indexByWorldPosition.erase(found);
    indexByWorldPosition.insert(&n1);

    std::cout << "Container view from index by cost " << std::endl;

    for(auto &it: indexByCost)
        std::cout << *it << std::endl;
    
    return 0;
}