Difference between revisions of "Duplicate File Finder"
Jump to navigation
Jump to search
| Line 1: | Line 1: | ||
Originally from [https://stackoverflow.com/questions/76242708/find-duplicate-files-with-hash-and-length-but-use-other-algorithm Reddit] but I added a .bat wrapper | Originally from [https://stackoverflow.com/questions/76242708/find-duplicate-files-with-hash-and-length-but-use-other-algorithm Reddit] but I added a .bat wrapper | ||
| + | |||
| + | ==duplicate.ps1== | ||
| + | <PRE> | ||
| + | $srcDir = 'C:\test' | ||
| + | $maxThreads = 6 # Tweak this value for more or less threads | ||
| + | $rs = [runspacefactory]::CreateRunspacePool(1, $maxThreads) | ||
| + | $rs.Open() | ||
| + | |||
| + | $tasks = Get-ChildItem -Path $srcDir -File -Recurse | Group-Object Length | | ||
| + | Where-Object Count -GT 1 | ForEach-Object { | ||
| + | $ps = [powershell]::Create().AddScript({ | ||
| + | $args[0] | Get-FileHash -Algorithm MD5 | | ||
| + | Group-Object Hash | | ||
| + | Where-Object Count -GT 1 | ||
| + | }).AddArgument($_.Group) | ||
| + | |||
| + | $ps.RunspacePool = $rs | ||
| + | |||
| + | @{ ps = $ps; iasync = $ps.BeginInvoke() } | ||
| + | } | ||
| + | |||
| + | $tasks | ForEach-Object { | ||
| + | try { | ||
| + | $_.ps.EndInvoke($_.iasync) | ||
| + | } | ||
| + | finally { | ||
| + | if($_.ps) { | ||
| + | $_.ps.Dispose() | ||
| + | } | ||
| + | } | ||
| + | } | ||
| + | |||
| + | if($rs) { | ||
| + | $rs.Dispose() | ||
| + | } | ||
| + | </PRE> | ||
| + | |||
| + | ==run.bat== | ||
| + | <PRE> | ||
| + | powershell -ExecutionPolicy Bypass -File duplicate.ps1 > out.txt | ||
| + | </PRE> | ||
Revision as of 18:15, 7 March 2025
Originally from Reddit but I added a .bat wrapper
duplicate.ps1
$srcDir = 'C:\test'
$maxThreads = 6 # Tweak this value for more or less threads
$rs = [runspacefactory]::CreateRunspacePool(1, $maxThreads)
$rs.Open()
$tasks = Get-ChildItem -Path $srcDir -File -Recurse | Group-Object Length |
Where-Object Count -GT 1 | ForEach-Object {
$ps = [powershell]::Create().AddScript({
$args[0] | Get-FileHash -Algorithm MD5 |
Group-Object Hash |
Where-Object Count -GT 1
}).AddArgument($_.Group)
$ps.RunspacePool = $rs
@{ ps = $ps; iasync = $ps.BeginInvoke() }
}
$tasks | ForEach-Object {
try {
$_.ps.EndInvoke($_.iasync)
}
finally {
if($_.ps) {
$_.ps.Dispose()
}
}
}
if($rs) {
$rs.Dispose()
}
run.bat
powershell -ExecutionPolicy Bypass -File duplicate.ps1 > out.txt