Difference between revisions of "Duplicate File Finder"
Jump to navigation
Jump to search
Line 1: | Line 1: | ||
Originally from [https://stackoverflow.com/questions/76242708/find-duplicate-files-with-hash-and-length-but-use-other-algorithm Reddit] but I added a .bat wrapper | Originally from [https://stackoverflow.com/questions/76242708/find-duplicate-files-with-hash-and-length-but-use-other-algorithm Reddit] but I added a .bat wrapper | ||
+ | |||
+ | ==duplicate.ps1== | ||
+ | <PRE> | ||
+ | $srcDir = 'C:\test' | ||
+ | $maxThreads = 6 # Tweak this value for more or less threads | ||
+ | $rs = [runspacefactory]::CreateRunspacePool(1, $maxThreads) | ||
+ | $rs.Open() | ||
+ | |||
+ | $tasks = Get-ChildItem -Path $srcDir -File -Recurse | Group-Object Length | | ||
+ | Where-Object Count -GT 1 | ForEach-Object { | ||
+ | $ps = [powershell]::Create().AddScript({ | ||
+ | $args[0] | Get-FileHash -Algorithm MD5 | | ||
+ | Group-Object Hash | | ||
+ | Where-Object Count -GT 1 | ||
+ | }).AddArgument($_.Group) | ||
+ | |||
+ | $ps.RunspacePool = $rs | ||
+ | |||
+ | @{ ps = $ps; iasync = $ps.BeginInvoke() } | ||
+ | } | ||
+ | |||
+ | $tasks | ForEach-Object { | ||
+ | try { | ||
+ | $_.ps.EndInvoke($_.iasync) | ||
+ | } | ||
+ | finally { | ||
+ | if($_.ps) { | ||
+ | $_.ps.Dispose() | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | |||
+ | if($rs) { | ||
+ | $rs.Dispose() | ||
+ | } | ||
+ | </PRE> | ||
+ | |||
+ | ==run.bat== | ||
+ | <PRE> | ||
+ | powershell -ExecutionPolicy Bypass -File duplicate.ps1 > out.txt | ||
+ | </PRE> |
Revision as of 19:15, 7 March 2025
Originally from Reddit but I added a .bat wrapper
duplicate.ps1
$srcDir = 'C:\test' $maxThreads = 6 # Tweak this value for more or less threads $rs = [runspacefactory]::CreateRunspacePool(1, $maxThreads) $rs.Open() $tasks = Get-ChildItem -Path $srcDir -File -Recurse | Group-Object Length | Where-Object Count -GT 1 | ForEach-Object { $ps = [powershell]::Create().AddScript({ $args[0] | Get-FileHash -Algorithm MD5 | Group-Object Hash | Where-Object Count -GT 1 }).AddArgument($_.Group) $ps.RunspacePool = $rs @{ ps = $ps; iasync = $ps.BeginInvoke() } } $tasks | ForEach-Object { try { $_.ps.EndInvoke($_.iasync) } finally { if($_.ps) { $_.ps.Dispose() } } } if($rs) { $rs.Dispose() }
run.bat
powershell -ExecutionPolicy Bypass -File duplicate.ps1 > out.txt