Trying to learn boost::intrusive Q5

107 Views Asked by At

I have the following program. I built it with gcc-4.9.2 under linux. My questions are:

1) Why does the hashtable seem to be sorted the first time around, but loses the sort after the items are deleted from value?

2) How do I walk the hashtable by key myself and say std::cout each item that hashes to a bucket, e.g., the code in the #if 0 #endif section?

#include <vector>
#include <iostream>
#include <vector>
#include <functional>

#include <boost/intrusive/unordered_set.hpp>

namespace bic = boost::intrusive;

std::hash<std::string> hash_fn;

struct MyClass : bic::unordered_set_base_hook<bic::link_mode<bic::auto_unlink>>
{
    std::string name;
    int anInt1;
    mutable bool bIsMarkedToDelete;

    MyClass(std::string name, int i) : name(name), anInt1(i), bIsMarkedToDelete(false) {}

    bool operator==(MyClass const& o) const
    {
        //return anInt1 == o.anInt1 && name == o.name;
        return name == o.name;
    }

    struct hasher
    {
        size_t operator()(MyClass const& o) const
        {
            return o.anInt1;
            //return hash_fn(o.name);
        }
    };
};

std::ostream& operator << (std::ostream& out, const MyClass& ac)
{
    std::cout << ac.name << " " << ac.anInt1;

    return out;
}

typedef bic::unordered_set<MyClass, bic::hash<MyClass::hasher>, bic::constant_time_size<false> > HashTable;

int main()
{
    std::vector<MyClass> values
    {
        MyClass { "John",     0 },
        MyClass { "Mike",     0 },
        MyClass { "Dagobart", 25 },
        MyClass { "John",     5 },
        MyClass { "Mike",     25 },
        MyClass { "Dagobart", 26 },
        MyClass { "John",     10 },
        MyClass { "Mike",     25 },
        MyClass { "Dagobart", 27 },
        MyClass { "John",     15 },
        MyClass { "Mike",     27 }
    };

    HashTable::bucket_type buckets[100];
    HashTable hashtable(values.begin(), values.end(), HashTable::bucket_traits(buckets, 100));

    std::cout << "\nContents of std::vector<MyClass> values\n";

    for(auto& e: values)
        std::cout << e << " ";

    std::cout << "\nContents of HashTable hashtable\n";

    for(auto& b : hashtable)
        std::cout << b << '\n';

#if 0 // This code won't compile since there is no operator [] for hashtable
    for(int bucket = 0; bucket < 27; bucket++)
    {
        auto hit(hashtable[bucket].rbegin());
        auto hite(hashtable[bucket].rend());

        while (hit != hite)
        {
            MyClass mc = *hit;

            std::cout << mc << " ";

            hit++;
        }

        std::cout << '\n';
    }
#endif // 0

    std::cout << '\n';
    std::cout << "values size first " << values.size() << '\n';
    std::cout << "hash size fist " << hashtable.size() << '\n';

    for(auto& e: values)
        e.bIsMarkedToDelete |= ("Mike" == e.name);

    std::cout << "removing all bIsMarkedToDelete";
    for(auto& e: values)
        if(e.bIsMarkedToDelete)
            std::cout << e << " ";

    std::cout << '\n';

    values.erase(
        std::remove_if(std::begin(values), std::end(values), std::mem_fn(&MyClass::bIsMarkedToDelete)),
                       std::end(values));

    std::cout << "values size now " << values.size() << '\n';
    std::cout << "hash size now " << hashtable.size() << '\n';

    std::cout << "Contents of value after removing elements " << '\n';
    for(auto& e: values)
        std::cout << e << " ";

    std::cout << "\nContents of HashTable hashtable after delete Mike\n";

    for(auto& b : hashtable)
        std::cout << b << '\n';

    std::cout << '\n';

    values.clear();

    std::cout << values.size() << '\n';
    std::cout << hashtable.size() << '\n';

    std::cout << "Done\n";

    int j;
    std::cin >> j;
}
1

There are 1 best solutions below

23
On

Your hash and equality are inconsistent, and as such you violate the container invariants:

bool operator==(MyClass const& o) const
{
    //return anInt1 == o.anInt1 && name == o.name;
    return name == o.name;
}

struct hasher
{
    size_t operator()(MyClass const& o) const
    {
        return o.anInt1;
        //return hash_fn(o.name);
    }
};

This would be fine IFF each distinct value of name always hashed to the same bucket. Alas it doesn't: e.g. "Mike" hashes to 3 different values:

    MyClass { "Mike",     0  },
    MyClass { "Mike",     25 },
    MyClass { "Mike",     25 },
    MyClass { "Mike",     27 }

1) Why does the hashtable seem to be sorted the first time around, but loses the sort after the items are deleted from value?

I'm trying to see what you mean, but the output of the program is:

Contents of std::vector<MyClass> values
John Mike Dagobart John Mike Dagobart John Mike Dagobart John Mike 
Contents of HashTable hashtable
Mike 0
John 0
John 5
John 10
John 15
Mike 25
Dagobart 25
Dagobart 26
Mike 27
Dagobart 27

values size first 11
hash size fist 10
removing all bIsMarkedToDeleteMike Mike Mike Mike 
values size now 7
hash size now 7
Contents of value after removing elements 
John Dagobart John Dagobart John Dagobart John 
Contents of HashTable hashtable after delete Mike
Dagobart 25
John 0
Dagobart 26
John 15
John 10
John 5
Dagobart 27

0
0
Done

I'm having to assume the "first time around" would be the part "Contents of HashTable hashtable". Indeed if you look closely that would seem to be "sorted by bucket". It could make a lot of sense that the container is iterated bucket-by-bucket.

The fact that after removal it no longer is might have a n awful lot to do with the fact that your hash/equality implementations don't match (see above).

2) How do I walk the hashtable by key myself and say std::cout each item that hashes to a bucket, e.g., the code in the #if 0 #endif section?

There's no direct (public API) way. You can build a map for debug purposes by using hashtable.bucket(key) though:

Live On Coliru

#include <vector>
#include <iostream>
#include <vector>
#include <map>
#include <functional>

#include <boost/intrusive/unordered_set.hpp>

namespace bic = boost::intrusive;

std::hash<std::string> hash_fn;

struct MyClass : bic::unordered_set_base_hook<bic::link_mode<bic::auto_unlink>>
{
    std::string name;
    int anInt1;
    mutable bool bIsMarkedToDelete;

    MyClass(std::string name, int i) : name(name), anInt1(i), bIsMarkedToDelete(false) {}

    bool operator==(MyClass const& o) const
    {
        return anInt1 == o.anInt1 && name == o.name;
    }

    struct hasher
    {
        size_t operator()(MyClass const& o) const
        {
            return hash_fn(o.name);
        }
    };
};

std::ostream& operator << (std::ostream& out, const MyClass& ac) {
    return out << ac.name << " " << ac.anInt1;
}

typedef bic::unordered_set<MyClass, bic::hash<MyClass::hasher>, bic::constant_time_size<false> > HashTable;

int main()
{
    std::vector<MyClass> values {
        MyClass { "Dagobart", 25 },
        MyClass { "Dagobart", 26 },
        MyClass { "Dagobart", 27 },
        MyClass { "John",     0  },
        MyClass { "John",     10 },
        MyClass { "John",     15 },
        MyClass { "John",     5  },
        MyClass { "Mike",     0  },
        MyClass { "Mike",     25 },
        MyClass { "Mike",     25 },
        MyClass { "Mike",     27 }
    };

    HashTable::bucket_type buckets[100];
    HashTable hashtable(values.begin(), values.end(), HashTable::bucket_traits(buckets, 100));

    std::cout << "\nDebugging buckets of hashtable\n";

    std::multimap<size_t, MyClass const*> debug_map;
    std::transform(hashtable.begin(), hashtable.end(), 
            std::inserter(debug_map, debug_map.end()), 
            [&](MyClass const& mc) { return std::make_pair(hashtable.bucket(mc), &mc); }
        );

    for (auto& entry : debug_map)
        std::cout << "Debug bucket: " << entry.first << " -> " << *entry.second << "\n";
}

Prints

Debugging buckets of hashtable
Debug bucket: 16 -> Mike 27
Debug bucket: 16 -> Mike 25
Debug bucket: 16 -> Mike 0
Debug bucket: 21 -> Dagobart 27
Debug bucket: 21 -> Dagobart 26
Debug bucket: 21 -> Dagobart 25
Debug bucket: 59 -> John 5
Debug bucket: 59 -> John 15
Debug bucket: 59 -> John 10
Debug bucket: 59 -> John 0

Of course the output depends on the actual implementation of std::hash<std::string> and the tuning of the hash-table.