import java.io.*; import java.nio.*; import java.nio.file.*; import java.nio.file.attribute.*; import java.security.*; import java.util.*; public class DuplicateFiles { public static void main(String[] args) { if (args.length != 2) { System.err.println("Directory name and minimum file size are required."); System.exit(1); } try { findDuplicateFiles(args[0], Long.parseLong(args[1])); } catch (Exception e) { e.printStackTrace(); } } private static void findDuplicateFiles(String directory, long minimumSize) throws IOException, NoSuchAlgorithmException { System.out.println("Directory: '" + directory + "', minimum size: " + minimumSize + " bytes."); Path path = FileSystems.getDefault().getPath(directory); FileVisitor visitor = new FileVisitor(path, minimumSize); Files.walkFileTree(path, visitor); System.out.println("The following sets of files have the same size and checksum:"); for (Map.Entry>> e : visitor.fileMap_.entrySet()) { Map> map = e.getValue(); if (!containsDuplicates(map)) continue; List> fileSets = new ArrayList<>(map.values()); for (List files : fileSets) Collections.sort(files); Collections.sort(fileSets, new StringListComparator()); FileKey key = e.getKey(); System.out.println(); System.out.println("Size: " + key.size_ + " bytes"); for (List files : fileSets) { for (int i = 0, n = files.size(); i < n; ++i) { if (i > 0) System.out.print(" = "); System.out.print(files.get(i)); } System.out.println(); } } } private static class StringListComparator implements Comparator> { public int compare(List a, List b) { int len1 = a.size(), len2 = b.size(); for (int i = 0; i < len1 && i < len2; ++i) { int c = a.get(i).compareTo(b.get(i)); if (c != 0) return c; } return Integer.compare(len1, len2); } } private static boolean containsDuplicates(Map> map) { if (map.size() > 1) return true; for (List files : map.values()) { if (files.size() > 1) return true; } return false; } private static class FileVisitor extends SimpleFileVisitor { private MessageDigest digest_; private Path directory_; private long minimumSize_; private Map>> fileMap_ = new TreeMap<>(); private FileVisitor(Path directory, long minimumSize) throws NoSuchAlgorithmException { directory_ = directory; minimumSize_ = minimumSize; digest_ = MessageDigest.getInstance("MD5"); } public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { if (attrs.size() >= minimumSize_) { FileKey key = new FileKey(file, attrs, getMD5Sum(file)); Map> map = fileMap_.get(key); if (map == null) fileMap_.put(key, map = new HashMap<>()); List files = map.get(attrs.fileKey()); if (files == null) map.put(attrs.fileKey(), files = new ArrayList<>()); Path relative = directory_.relativize(file); files.add(relative.toString()); } return FileVisitResult.CONTINUE; } private byte[] getMD5Sum(Path file) throws IOException { digest_.reset(); try (InputStream in = new FileInputStream(file.toString())) { byte[] buffer = new byte[8192]; int bytes; while ((bytes = in.read(buffer)) != -1) { digest_.update(buffer, 0, bytes); } } return digest_.digest(); } } private static class FileKey implements Comparable { private byte[] hash_; private long size_; private FileKey(Path file, BasicFileAttributes attrs, byte[] hash) throws IOException { size_ = attrs.size(); hash_ = hash; } public int compareTo(FileKey other) { int c = Long.compare(other.size_, size_); if (c == 0) c = hashCompare(hash_, other.hash_); return c; } } private static int hashCompare(byte[] a, byte[] b) { int len1 = a.length, len2 = b.length; for (int i = 0; i < len1 && i < len2; ++i) { int c = Byte.compare(a[i], b[i]); if (c != 0) return c; } return Integer.compare(len1, len2); } }