Find identical files

OUTF=rem-duplicates.sh; echo "#! /bin/sh" > $OUTF; find "$@" -type f -print0 | xargs -0 -n1 md5sum | sort
--key=1,32 | uniq -w 32 -d --all-repeated=separate | sed -r 's/^[0-9a-f]*( )*//;s/([^a-zA-Z0-9./_-])/\\\1
/g;s/(.+)/#rm \1/' >> $OUTF; chmod a+x $OUTF; ls -l $OUTF
OUTF=rem-duplicates.sh;
echo "#! /bin/sh" > $OUTF;
find "$@" -type f -print0 |
  xargs -0 -n1 md5sum |
    sort --key=1,32 | uniq -w 32 -d --all-repeated=separate |
    sed -r 's/^[0-9a-f]*( )*//;s/([^a-zA-Z0-9./_-])/\\\1/g;s/(.+)/#rm \1/' >> $OUTF;
chmod a+x $OUTF
ls -l $OUTF

Original article: http://elonen.iki.fi/code/misc-notes/remove-duplicate-files/

This entry was posted in Bash and tagged . Bookmark the permalink.

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.