LINQ Distinct not using the IEqualityComparer?

2.4k Views Asked by At

I have the following model:

public class Word {
      public string Original { get; set; }
      public string Normalized { get; set; }
      public string Root { get; set; }
      public string Subroot { get; set; }
      public Regex SubrootRegex { get; set; }
}

I have created the following three IEqualityComparer<Word>:

public class NormalizedWordComparer : IEqualityComparer<Word> {
     public bool Equals(Word x, Word y) {
          return x.Normalized == y.Normalized;
     }
     public int GetHashCode(Word obj) {
          return obj.GetHashCode();
     }
}

public class RootWordComparer : IEqualityComparer<Word> {
     public bool Equals(Word x, Word y) {
          return x.Root == y.Root;
     }
     public int GetHashCode(Word obj) {
          return obj.GetHashCode();
     }
}

public class SubrootWordComparer : IEqualityComparer<Word> {
     public bool Equals(Word x, Word y) {
          return x.Subroot == y.Subroot;
     }
     public int GetHashCode(Word obj) {
          return obj.GetHashCode();
     }
}

In another class, I am trying to do the following:

_normalizedWords = ConfigurationFacade.Words.Select(w => {
     Word word = new Word() { Original = w };
     word.Normalized = Normalize(word, _filters);
     word.Root = GetRoot(word.Normalized, ConfigurationFacade.WordRootPercentage);
     word.Subroot = GetRoot(word.Root, ConfigurationFacade.WordSubrootPercentage);
     word.SubrootRegex = null; //Complicated regex here
     return word;
}).Distinct(new NormalizedWordComparer());

_wordRoots = _normalizedWords.Distinct(new RootWordComparer());
_wordSubroots = _wordRoots.Distinct(new SubrootWordComparer());

However, _normalizedWords, _wordRoots and _wordSubroots all end up with the same amount of elements, as if the Distinct() method didn't work or the comparer is being ignored.

I checked the elements with the debugger, and there are a lot that have the same Root value, so there should only be one of them in _wordRoots, but that's not the case, they are not removed or filtered.

Why is my Distinct() not working?

1

There are 1 best solutions below

1
On BEST ANSWER

Why is my Distinct() not working?

Because Distinct first checks the hash code (since it's a quick check to see if two object could be equal) and then calls Equals. Since your GetHashCode implementations are all the same (and do not correspond to your Equals methods), Distinct is not working as you would expect.

Change your GetHashCode methods to correspond with Equals:

public class NormalizedWordComparer : IEqualityComparer<Word> {
     public bool Equals(Word x, Word y) {
          return x.Normalized == y.Normalized;
     }
     public int GetHashCode(Word obj) {
          return obj.Normalized.GetHashCode();
     }
}

public class RootWordComparer: IEqualityComparer<Word> {
     public bool Equals(Word x, Word y) {
          return x.Root == y.Root;
     }
     public int GetHashCode(Word obj) {
          return obj.Root.GetHashCode();
     }
}

public class SubrootWordComparer : IEqualityComparer<Word> {
     public bool Equals(Word x, Word y) {
          return x.Subroot == y.Subroot;
     }
     public int GetHashCode(Word obj) {
          return obj.Subroot.GetHashCode();
     }
}