Time difference between iterating over a file tree created during a scan vs. one created by duplicating the scanned tree

105 Views Asked by At

(Title and question have been significantly changed, as the none of the important parts ended up being relevant to the problem)

I have a generated file tree of a hard drive, and I'm creating a function to highlight every instance of an extension in the file tree. For some reason iterating over any duplicate file tree other than the one created during the scan can take at least twice as long. Please note that I am not trying to iterate over the file tree during the scan.

What exactly is causing the slowdown? List<FileNode> seems to be the culprit, but I'm not sure what internal mechanism is at fault.

I've created a gist with 4 files to enumerate a file tree and show the inconsistencies in iteration times: FileTreeIterationSpeedTest

Performance for drive with 2m files and 200k directories:

enter image description here

Output from gist:

Scanning...
RAM Used:   300.6 MB
   Bytes:   443.7 GB
   Files:  1,925,131
 Folders:    156,311
Progress:     100.0%
Duration:   00:00:17

Scan complete!
Duplicating file tree...
Duplication complete!
RAM Used: 311.4 MB

Iterating: 1000
Scanned Tree: 00:03.857
  Duped Tree: 00:01.409
Duped Tree is 173.6% faster

Press any key to continue...

Relevant Code from FileNode.cs:

public class FileNode {
    public enum FileNodeType {
        Root,
        Directory,
        FileCollection,
        File,
    }

    private readonly List<FileNode> children = new List<FileNode>();
    private FileNode fileCollection;

    public FileNode Parent { get; private set; }
    public FileNodeType Type { get; }
    public long Size { get; private set; }
    public string Extension { get; } = string.Empty;
    public string Name { get; }

    // File Collection
    private FileNode() {
        Type = FileNodeType.FileCollection;
        Name = "<Files>";
    }

    // Root Node
    public FileNode(string drivePath) {
        Type = FileNodeType.Root;
        Name = drivePath;
    }

    // File or Directory Node
    public FileNode(Win32FindData find) {
        if (!find.IsDirectory) {
            Type = FileNodeType.File;
            Extension = Path.GetExtension(find.cFileName);
        }
        else {
            Type = FileNodeType.Directory;
        }
        Name = find.cFileName;
        Size = find.Size;
    }

    // Duplicate Tree \w Parent
    public FileNode(FileNode root) : this(root, null) {
    }

    // Duplicate Tree \w Parent
    private FileNode(FileNode file, FileNode parent) {
        Parent = parent;
        Type = file.Type;
        Size = file.Size;
        Extension = file.Extension;
        Name = file.Name;

        int count = file.children.Count;
        children = new List<FileNode>(count);
        for (int i = 0; i < count; i++)
            children.Add(new FileNode(file[i], this));
    }

    public void AddChild(FileNode item) {
        if (item.Type == FileNodeType.File && Type != FileNodeType.FileCollection) {
            if (fileCollection == null)
                fileCollection = new FileNode();
            fileCollection.AddChild(item);
        }
        else {
            children.Add(item);
            item.Parent = this;
        }
    }

    public bool IsLeaf => children.Count == 0;

    public int Count => children.Count;

    public FileNode this[int index] => children[index];
}
0

There are 0 best solutions below