Difference between revisions of "Duplicate File Finder"

From Baranoski.ca
Jump to navigation Jump to search
Line 1: Line 1:
 
Originally from [https://stackoverflow.com/questions/76242708/find-duplicate-files-with-hash-and-length-but-use-other-algorithm Reddit] but I added a .bat wrapper
 
Originally from [https://stackoverflow.com/questions/76242708/find-duplicate-files-with-hash-and-length-but-use-other-algorithm Reddit] but I added a .bat wrapper
 +
 +
==duplicate.ps1==
 +
<PRE>
 +
$srcDir = 'C:\test'
 +
$maxThreads = 6 # Tweak this value for more or less threads
 +
$rs = [runspacefactory]::CreateRunspacePool(1, $maxThreads)
 +
$rs.Open()
 +
 +
$tasks = Get-ChildItem -Path $srcDir -File -Recurse | Group-Object Length |
 +
    Where-Object Count -GT 1 | ForEach-Object {
 +
        $ps = [powershell]::Create().AddScript({
 +
            $args[0] | Get-FileHash -Algorithm MD5 |
 +
                Group-Object Hash |
 +
                Where-Object Count -GT 1
 +
        }).AddArgument($_.Group)
 +
 +
        $ps.RunspacePool = $rs
 +
       
 +
        @{ ps = $ps; iasync = $ps.BeginInvoke() }
 +
    }
 +
 +
$tasks | ForEach-Object {
 +
    try {
 +
        $_.ps.EndInvoke($_.iasync)
 +
    }
 +
    finally {
 +
        if($_.ps) {
 +
            $_.ps.Dispose()
 +
        }
 +
    }
 +
}
 +
 +
if($rs) {
 +
    $rs.Dispose()
 +
}
 +
</PRE>
 +
 +
==run.bat==
 +
<PRE>
 +
powershell -ExecutionPolicy Bypass -File duplicate.ps1 > out.txt
 +
</PRE>

Revision as of 19:15, 7 March 2025

Originally from Reddit but I added a .bat wrapper

duplicate.ps1

$srcDir = 'C:\test'
$maxThreads = 6 # Tweak this value for more or less threads
$rs = [runspacefactory]::CreateRunspacePool(1, $maxThreads)
$rs.Open()

$tasks = Get-ChildItem -Path $srcDir -File -Recurse | Group-Object Length |
    Where-Object Count -GT 1 | ForEach-Object {
        $ps = [powershell]::Create().AddScript({
            $args[0] | Get-FileHash -Algorithm MD5 |
                Group-Object Hash |
                Where-Object Count -GT 1
        }).AddArgument($_.Group)

        $ps.RunspacePool = $rs
        
        @{ ps = $ps; iasync = $ps.BeginInvoke() }
    }

$tasks | ForEach-Object {
    try {
        $_.ps.EndInvoke($_.iasync)
    }
    finally {
        if($_.ps) {
            $_.ps.Dispose()
        }
    }
}

if($rs) {
    $rs.Dispose()
}

run.bat

powershell -ExecutionPolicy Bypass -File duplicate.ps1 > out.txt