C# Fastest Way to Get Total Directory Size With 75,00 Files

208 Views Asked by At

I am looking for a faster way to get the total size of all files in subfolders with over 75,000 files with C#. The following takes about 8 seconds on my laptop with an SSD drive.

 FileInfo[] files;
 long fileMBs;


 files = di.GetFiles("*.*", SearchOption.AllDirectories);
 foreach (FileInfo fi in files)
 {
     fileMBs += fi.Length;
 }

I looked into using Win32 CreateFile and GetFileSize, but could not find examples on how to call them from C#. Would this be faster if called for each file given its full path? If so, I do I use them from C#? Is there a faster way?

2

There are 2 best solutions below

0
Jim D On

I did a number of various tests. The test folder was created by extracting a Drupal 10 tar.gz 3 times at different depths. Each Drupal 10 installation has a bit over 25,000 files.

My limited excursion into Win32 was very slow, not a starter, and not considered worth my effort to understand why.

Using LINQ was the next slowest.

 var total = di.GetFiles("*.*", SearchOption.AllDirectories).Sum(f => f.Length);

My testing, showed this is the fastest way, assuming path is the full path to a directory:

 DirectoryInfo di = new DirectoryInfo(path);
 FileInfo[] total = di.GetFiles("*.*", SearchOption.AllDirectories);      
 long totalMBs = 0;
 
  foreach ( var f in total )
  {
      totalMBs += f.Length;
  }

Don't have the energy to post all the testing code. Tried LINQ, simple foreach as above, and a full recursion algorithm. The simple foreach was the fastest. Each test was run after a laptop shutdown for at least 30 seconds to clear the cache. Simple timing using DateTime start, end. Just an eyeball of what might make a difference.

enter image description here

0
RbMm On

if the disk have not incurs a seek penalty for speed up search we can do severel I/O request to disk at once and not wait in place, intil I/O is finished, but do asynchronous I/O. this possible do with NtQueryDirectoryFile api. open folder, bind it to IOCP (thread pool) and process results in callbacks, when I/O finished. handle at once several I/O requests

struct uIRP;

struct __declspec(novtable) uObject 
{
    HANDLE _M_hFile = 0;
    PTP_IO _M_Io = 0;
    LONG _M_dwRefCount = 1;

    virtual ~uObject()
    {
        if (_M_Io) TpReleaseIoCompletion(_M_Io); //CloseThreadpoolIo
        if (_M_hFile) NtClose(_M_hFile);
    }

    virtual void OnIoComplete(uIRP* irp, NTSTATUS status, ULONG_PTR dwBytes) = 0;

    void AddRef()
    {
        InterlockedIncrementNoFence(&_M_dwRefCount);
    }

    void Release()
    {
        if (!InterlockedDecrement(&_M_dwRefCount))
        {
            delete this;
        }
    }
};

struct uIRP : IO_STATUS_BLOCK
{
    uIRP* _M_pNext = 0;

    VOID OnIoComplete(
        _Inout_opt_ uObject* pObj,
        _In_ NTSTATUS status,
        _In_ ULONG_PTR dwNumberOfBytesTransfered
        )
    {
        pObj->OnIoComplete(this, status, dwNumberOfBytesTransfered);
        pObj->Release();
    }

    static VOID NTAPI _S_OnIoComplete(
        _Inout_ PTP_CALLBACK_INSTANCE /*Instance*/,
        _Inout_opt_ PVOID Context,
        _In_ PVOID ApcContext,
        _In_ PIO_STATUS_BLOCK IoSB,
        _In_ PTP_IO /*Io*/
        )
    {
        static_cast<uIRP*>(ApcContext)->OnIoComplete(reinterpret_cast<uObject*>(Context), IoSB->Status, IoSB->Information);
    }
};

struct FsDirectory;

struct Task 
{
    unsigned __int64 _M_Size;
    SRWLOCK _M_lock;
    PTP_POOL _M_Pool;
    uIRP* _M_irp;
    FsDirectory* _M_pObj;
    LONG _M_FileCount;
    LONG _M_FolderCount;
    LONG _M_ReparseCount;
    LONG _M_PendingCount;
    LONG _M_IoCount;
    LONG _M_FailCount;
    LONG _M_IoFailCount;
    LONG _M_dwRefCount = 1;
    ULONG _M_time = GetTickCount();
    ULONG _M_dwThreadId = GetCurrentThreadId();

    void* operator new(size_t s)
    {
        return LocalAlloc(LMEM_FIXED|LMEM_ZEROINIT, s);
    }

    void operator delete(void* p)
    {
        LocalFree(p);
    }

    void AddRef()
    {
        InterlockedIncrementNoFence(&_M_dwRefCount);
    }

    void Release()
    {
        if (!InterlockedDecrement(&_M_dwRefCount))
        {
            delete this;
        }
    }

    BOOL Init(ULONG n)
    {
        do 
        {
            if (uIRP* irp = new uIRP)
            {
                irp->_M_pNext = _M_irp;
                _M_irp = irp;
                continue;
            }

            return FALSE;

        } while (--n);

        if (0 <= TpAllocPool(&_M_Pool, 0))
        {
            SYSTEM_INFO si;
            GetSystemInfo(&si);
            TpSetPoolMaxThreads(_M_Pool, si.dwNumberOfProcessors);
            return TRUE;
        }

        return FALSE;
    }

    void push(uIRP* irp)
    {
        irp->_M_pNext = _M_irp;
        _M_irp = irp;
    }

    ~Task()
    {
        DbgPrint(">> %u %u/%u %u %I64u\r\n", 
            GetTickCount() - _M_time, _M_FileCount, _M_FolderCount, _M_ReparseCount, _M_Size);

        if (uIRP* irp = _M_irp)
        {
            do 
            {
                uIRP* p = irp;
                irp = irp->_M_pNext;
                delete p;
            } while (irp);
        }

        if (_M_Pool) TpReleasePool(_M_Pool);

        NtAlertThreadByThreadId((HANDLE)(ULONG_PTR)_M_dwThreadId);
    }

    void Start(POBJECT_ATTRIBUTES poa);
};

struct __declspec(align(__alignof(SLIST_ENTRY))) FsDirectory : public uObject 
{
    enum { cbStruct = 0x10000 };

    Task* _M_pTask;
    FsDirectory* _M_pNext;
    PFILE_DIRECTORY_INFORMATION _M_pfdi;
    FILE_DIRECTORY_INFORMATION _M_buf;

    inline static SLIST_HEADER _S_head;
    inline static PVOID _S_Mem;
    inline static ULONG _S_cb;

    static BOOL InitLookaSide (ULONG n)
    {
        RtlInitializeSListHead(&_S_head);

        union {
            PSLIST_ENTRY Entry;
            PVOID buf;
            ULONG_PTR up;
        };

        if (buf = LocalAlloc(LMEM_FIXED, _S_cb = n*cbStruct))
        {
            _S_Mem = buf;
            do 
            {
                RtlInterlockedPushEntrySList(&_S_head, Entry);
                up += cbStruct;
            } while (--n);

            return TRUE;
        }

        return FALSE;
    }

    static void FreeLookaSide()
    {
        LocalFree(_S_Mem);
    }

    void* operator new(size_t /*s*/)
    {
        if (PVOID pv = RtlInterlockedPopEntrySList(&_S_head))
        {
            return pv;
        }

        return LocalAlloc(LMEM_FIXED, cbStruct);
    }

    void operator delete(void* p)
    {
        if ((ULONG_PTR)p - (ULONG_PTR)_S_Mem < _S_cb)
        {
            RtlInterlockedPushEntrySList(&_S_head, (PSLIST_ENTRY)p);
        }
        else
        {
            LocalFree(p);
        }
    }

    FsDirectory(Task* pTask) : _M_pTask(pTask)
    {
        pTask->AddRef();
    }

    ~FsDirectory()
    {
        _M_pTask->Release();
    }

    uIRP* pop(Task* pTask, PFILE_DIRECTORY_INFORMATION pfdi, uIRP* irp)
    {
        if (irp)
        {
            return irp;
        }

        AcquireSRWLockExclusive(&pTask->_M_lock);

        if (irp = pTask->_M_irp)
        {
            // pop IRP
            pTask->_M_irp = irp->_M_pNext;
        }
        else
        {
            // no more IRPs, push pending Directory
            AddRef();
            _M_pfdi = pfdi;
            _M_pNext = pTask->_M_pObj;
            pTask->_M_pObj = this;
        }

        ReleaseSRWLockExclusive(&pTask->_M_lock);

        return irp;
    }

    void Process(Task* pTask, PFILE_DIRECTORY_INFORMATION pfdi, uIRP* irp)
    {
        FsDirectory* pObj;

        UNICODE_STRING ObjectName;
        OBJECT_ATTRIBUTES oa = { sizeof(oa), _M_hFile, &ObjectName, OBJ_CASE_INSENSITIVE };

        ULONG NextEntryOffset = 0;

        do 
        {
            (ULONG_PTR&)pfdi += NextEntryOffset;

            if (pfdi->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
            {
                InterlockedIncrementNoFence(&pTask->_M_ReparseCount);
                continue;
            }

            if (pfdi->FileAttributes & FILE_ATTRIBUTE_DIRECTORY)
            {
                switch (pfdi->FileNameLength)
                {
                case 2*sizeof(WCHAR): // ".."
                    if ('.' != pfdi->FileName[1])
                    {
                        break;
                    }
                case sizeof(WCHAR): // "."
                    if ('.' != pfdi->FileName[0])
                    {
                        break;
                    }
                    continue;
                }

                if (irp = pop(pTask, pfdi, irp))
                {
                    InterlockedIncrementNoFence(&pTask->_M_FolderCount);

                    if (pObj = new FsDirectory(pTask))
                    {
                        ObjectName.Buffer = pfdi->FileName;
                        ObjectName.MaximumLength = ObjectName.Length = (USHORT)pfdi->FileNameLength;

                        if (0 <= pObj->Create(&oa))
                        {
                            pObj->Enum(irp);
                            irp = 0;
                        }
                        else
                        {
                            InterlockedIncrementNoFence(&pTask->_M_FailCount);
                        }

                        pObj->Release();
                    }
                }
                else
                {
                    return ;
                }
            }
            else
            {
                InterlockedIncrementNoFence(&pTask->_M_FileCount);
                InterlockedExchangeAddNoFence(&pTask->_M_Size, pfdi->EndOfFile.QuadPart);
            }

        } while (NextEntryOffset = pfdi->NextEntryOffset);

        if (irp = pop(pTask, 0, irp))
        {
            Enum(irp);
        }
    }

    virtual void OnIoComplete(uIRP* Irp, NTSTATUS status, ULONG_PTR /*dwBytes*/)
    {
        Task* pTask = _M_pTask;

        if (0 > status)
        {
            // we stop process directory

            if (STATUS_NO_MORE_FILES != status)
            {
                DbgPrint("!! %x\r\n", status);
                InterlockedIncrementNoFence(&pTask->_M_IoFailCount);
            }

            FsDirectory* pObj;

            AcquireSRWLockExclusive(&pTask->_M_lock);

            if (pObj = pTask->_M_pObj)
            {
                // exist pending directory
                pTask->_M_pObj = pObj->_M_pNext;
            }
            else
            {
                pTask->push(Irp);
            }

            ReleaseSRWLockExclusive(&pTask->_M_lock);

            if (pObj)
            {
                if (PFILE_DIRECTORY_INFORMATION pfdi = pObj->_M_pfdi)
                {
                    pObj->Process(pTask, pfdi, Irp);
                }
                else
                {
                    pObj->Enum(Irp);
                }

                pObj->Release();
            }

            return;
        }

        Process(pTask, &_M_buf, Irp);
    }

    NTSTATUS Create(POBJECT_ATTRIBUTES poa)
    {
        IO_STATUS_BLOCK iosb;
        NTSTATUS status = NtOpenFile(&_M_hFile, FILE_LIST_DIRECTORY, poa, &iosb, FILE_SHARE_VALID_FLAGS, 
            FILE_DIRECTORY_FILE|FILE_OPEN_FOR_BACKUP_INTENT);

        TP_CALLBACK_ENVIRON cbe;
        TpInitializeCallbackEnviron(&cbe);
        TpSetCallbackThreadpool(&cbe, _M_pTask->_M_Pool);
        return 0 > status ? status : TpAllocIoCompletion(&_M_Io, _M_hFile, uIRP::_S_OnIoComplete, this, &cbe);
    }

    void Enum(uIRP* irp)
    {
        AddRef();
        TpStartAsyncIoOperation(_M_Io);

        NTSTATUS status = NtQueryDirectoryFile(_M_hFile, 0, 0, irp, irp, 
            &_M_buf, cbStruct - offsetof(FsDirectory, _M_buf), 
            FileDirectoryInformation, FALSE, 0, FALSE);

        InterlockedIncrementNoFence(&_M_pTask->_M_IoCount);

        if (STATUS_PENDING == status)
        {
            InterlockedIncrementNoFence(&_M_pTask->_M_PendingCount);
        }

        if (0 > status)
        {
            TpCancelAsyncIoOperation(_M_Io);
            irp->OnIoComplete(this, status, 0);
        }
    }
};

void Task::Start(POBJECT_ATTRIBUTES poa)
{
    if (uIRP* irp = _M_irp)
    {
        if (FsDirectory* p = new FsDirectory(this))
        {
            if (0 <= p->Create(poa))
            {
                _M_irp = irp->_M_pNext;
                p->Enum(irp);
            }
            p->Release();
        }
    }
}

void TestEnum(POBJECT_ATTRIBUTES poa, ULONG IoRequests)
{
    if (Task* pTask = new Task)
    {
        FsDirectory::InitLookaSide(0x100);

        if (pTask->Init(IoRequests))
        {
            pTask->Start(poa);
        }

        pTask->Release();

        NtWaitForAlertByThreadId(0, 0);

        FsDirectory::FreeLookaSide();
    }
}