Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

This is why Python is God

Name: Anonymous 2011-07-03 23:41

#!/usr/bin/env python

import hashlib
import os
import sys


def visit(files_by_size, dirname, names):
  for name in names:
    path = os.path.join(dirname, name)
    if os.path.isfile(path):
      size = os.path.getsize(path)
      if size in files_by_size:
        files_by_size[size].append(path)
      else:
        files_by_size[size] = [path]


if __name__ == '__main__':
  for dirname in sys.argv[1:]:
    files_by_size = {}
    files_by_hash = {}
    os.path.walk(dirname, visit, files_by_size)
    for size, paths in files_by_size.items():
      if len(paths) > 1:
        for path in paths:
          with open(path) as f:
            hash = hashlib.md5(f.read()).hexdigest()
            f.close()
          if hash in files_by_hash:
            files_by_hash[hash].append(path)
          else:
            files_by_hash[hash] = [path]
    for hash, paths in files_by_hash.items():
      if len(paths) > 1:
        for path in paths:
          print '%s  %s' % (hash, path)

Name: Anonymous 2011-07-05 12:03

#lang racket
(require file/md5)

(define (group-by-size dir)
  (fold-files (lambda (path type result)
                (if (eq? type 'file)
                    (hash-update result (file-size path) (curry cons path) '())
                    result))
              (hash) dir))

(define (group-by-md5 files)
  (foldl (lambda (paths result)
           (if (> (length paths) 1)
               (foldl (lambda (path result)
                        (hash-update result (md5 (file->bytes path)) (curry cons path) '()))
                      result paths)
               result))
         (hash) (hash-values files)))

(define (print-duplicates files)
  (hash-for-each files (lambda (md5 paths)
                         (when (> (length paths) 1)
                           (for-each (lambda (path) (printf "~a: ~a\n" md5 path)) paths)))))

(print-duplicates (group-by-md5 (group-by-size (command-line #:args (dir) dir))))

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List