Duplicate File Finder

From Baranoski.ca
Jump to navigation Jump to search

Originally from Reddit but I added a .bat wrapper

duplicate.ps1

$srcDir = 'C:\test'
$maxThreads = 6 # Tweak this value for more or less threads
$rs = [runspacefactory]::CreateRunspacePool(1, $maxThreads)
$rs.Open()

$tasks = Get-ChildItem -Path $srcDir -File -Recurse | Group-Object Length |
    Where-Object Count -GT 1 | ForEach-Object {
        $ps = [powershell]::Create().AddScript({
            $args[0] | Get-FileHash -Algorithm MD5 |
                Group-Object Hash |
                Where-Object Count -GT 1
        }).AddArgument($_.Group)

        $ps.RunspacePool = $rs
        
        @{ ps = $ps; iasync = $ps.BeginInvoke() }
    }

$tasks | ForEach-Object {
    try {
        $_.ps.EndInvoke($_.iasync)
    }
    finally {
        if($_.ps) {
            $_.ps.Dispose()
        }
    }
}

if($rs) {
    $rs.Dispose()
}

run.bat

powershell -ExecutionPolicy Bypass -File duplicate.ps1 > out.txt