Fast relation search

87 Views Asked by At

There is list of many-to-many relations (N = 1000000). I need as fast as possible to determine index of relation in the list and enumerate all relations for a specific item.

I know that it is possible to make lookup table (with time O(1)) for from/to, but it has too big size (N*N). And I know that I can use binary search (with time O(log(N))), but it is still very slow. Are there other solutions?

C# code:

public class Relation
{
    public int From;
    public int To;
}

public class Table
{
    public List<Relation> Relations { get; } = new List<Relation>();

    public void Add(int from, int to)
    {
        if (IndexOf(from, to) == -1)
        {
            Relations.Add(new Relation() { From = from, To = to });
        }
    }

    public int IndexOf(int from, int to)
    {
        // this algorithm make O(N) comparisons, but I need O(1)
        for (int i = 0; i < Relations.Count; i++)
        {
            if (Relations[i].From == from && Relations[i].To == to) return i;
        }
        return -1;
    }

    public IEnumerable<int> FromsOf(int to)
    {
        // this algorithm make O(N) comparisons, but I need O(1)
        for (int i = 0; i < Relations.Count; i++)
        {
            if (Relations[i].To == to) yield return Relations[i].From;
        }
    }
}

class Program
{
    static void Main(string[] args)
    {
        Random r = new Random();
        Table t = new Table();

        int N = 1000000;

        for (int i = 0; i < N; i++) t.Add(r.Next(N), r.Next(N));

        DateTime t1 = DateTime.Now;
        for (int i = 0; i < N; i++)
        {
            if (t.IndexOf(r.Next(N), r.Next(N)) != -1)
            {
                // do something
            }
        }

        DateTime t2 = DateTime.Now;
        for (int i = 0; i < N; i++)
        {
            foreach (int j in t.FromsOf(r.Next(N)))
            {
                // do something
            }
        }

        DateTime t3 = DateTime.Now;

        Console.WriteLine($"IndexOf speed = {(t2 - t1).TotalMilliseconds / N}ms");
        Console.WriteLine($"FromsOf speed = {(t3 - t2).TotalMilliseconds / N}ms");
    }
}
2

There are 2 best solutions below

1
On

Here is the version with 2 dictionaries:

public class Table
{
    public Dictionary<int, HashSet<int>> froms { get; } = new Dictionary<int, HashSet<int>>();
    public Dictionary<int, HashSet<int>> tos { get; } = new Dictionary<int, HashSet<int>>();

    public void Add(int from, int to)
    {
        if (!Contains(from, to))
        {
            if (!froms.ContainsKey(from))
            {
                froms.Add(from, new HashSet<int> { to });
            }
            else
            {
                froms[from].Add(to);
            }

            if (!tos.ContainsKey(to))
            {
                tos.Add(to, new HashSet<int> { from });
            }
            else
            {
                tos[to].Add(from);
            }
        }
    }

    public bool Contains(int from, int to)
    {
        if (!froms.ContainsKey(from))
            return false;

        if (!froms[from].Contains(to))
            return false;

        return true;
    }

    public IEnumerable<int> FromsOf(int to)
    {
        if(tos.ContainsKey(to))
            return tos[to];
        else
            return new List<int>();
    }
}

class Program
{
    static void Main(string[] args)
    {
        Random r = new Random();
        Table t = new Table();

        int N = 1000000;

        for (int i = 0; i < N; i++)
            t.Add(r.Next(N), r.Next(N));

        DateTime t1 = DateTime.Now;
        for (int i = 0; i < N; i++)
        {
            if (t.Contains(r.Next(N), r.Next(N)))
            {
                // do something
            }
        }

        DateTime t2 = DateTime.Now;
        for (int i = 0; i < N; i++)
        {
            foreach (int j in t.FromsOf(r.Next(N)))
            {
                // do something
            }
        }

        DateTime t3 = DateTime.Now;

        Console.WriteLine($"IndexOf speed = {(t2 - t1).TotalMilliseconds / N}ms");
        Console.WriteLine($"FromsOf speed = {(t3 - t2).TotalMilliseconds / N}ms");
        Console.ReadKey();
    }
}

Output:

IndexOf speed = 0.0003220099ms

FromsOf speed = 0.0003799996ms

0
On

I have trying use dictionary in dictionary and it also very fast, but memory usage about 33*N (for N=10000000 memory usage in task manager 132 Mb).

Code:

    public class Relation
{
    public int From;
    public int To;
}

public class Table
{
    public List<Relation> Relations { get; } = new List<Relation>();
    public Dictionary<int, Dictionary<int, int>> FromDic = new Dictionary<int, Dictionary<int, int>>();

    public void Add(int from, int to)
    {
        if (IndexOf(from, to) == -1)
        {
            int index = Relations.Count;
            Relations.Add(new Relation() { From = from, To = to });

            Dictionary<int, int> innerDic;
            if (!FromDic.TryGetValue(from, out innerDic))
            {
                innerDic = new Dictionary<int, int>();
                FromDic[from] = innerDic;
            }
            innerDic[to] = index;
        }
    }

    public int IndexOf(int from, int to)
    {
        Dictionary<int, int> toDic;
        int index;
        if (FromDic.TryGetValue(from, out toDic) && toDic.TryGetValue(to, out index))
            return index;
        return -1;
    }

    public IEnumerable<int> TosOf(int from)
    {
        Dictionary<int, int> innerDic;
        if (FromDic.TryGetValue(from, out innerDic)) return innerDic.Keys;
        return new List<int>();
    }
}

class Program
{
    static void Main(string[] args)
    {
        Random r = new Random();
        Table t = new Table();

        int N = 100000;

        DateTime t0 = DateTime.Now;

        for (int i = 0; i < N; i++) t.Add(r.Next(N), r.Next(N));

        DateTime t1 = DateTime.Now;
        Console.WriteLine($"Add speed = {(t1 - t0).TotalMilliseconds * 1000 / N}mks");
        for (int i = 0; i < N; i++)
        {
            if (t.IndexOf(r.Next(N), r.Next(N)) != -1)
            {
                // do something
            }
        }

        DateTime t2 = DateTime.Now;
        Console.WriteLine($"IndexOf speed = {(t2 - t1).TotalMilliseconds * 1000 / N}mks");

        for (int i = 0; i < N; i++)
        {
            foreach (int j in t.TosOf(r.Next(N)))
            {
                // do something
            }
        }

        DateTime t3 = DateTime.Now;
        Console.WriteLine($"TosOf speed = {(t3 - t2).TotalMilliseconds * 1000 / N}mks");

        Console.ReadKey();
    }
}