// // pool-import // // Copyright (C) 2016 Christian Pointner // // This file is part of pool-import. // // pool-import is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // any later version. // // pool-import is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with pool-import. If not, see . // package main import ( "encoding/base64" "hash" "io" "log" "os" "path/filepath" "sync" "crypto/md5" "crypto/sha1" "crypto/sha256" "crypto/sha512" "golang.org/x/crypto/blake2b" ) type FileMap map[string]string type Hasher struct { numThreads uint stdlog *log.Logger newHash func() (hash.Hash, error) filesMtx sync.Mutex Files FileMap } func (h *Hasher) computeHash(path string) (string, error) { hash, err := h.newHash() if err != nil { return "", err } file, err := os.Open(path) if err != nil { return "", err } defer file.Close() if _, err := io.Copy(hash, file); err != nil { return "", err } return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil } func (h *Hasher) collectHashes(C <-chan string) (wg *sync.WaitGroup) { wg = &sync.WaitGroup{} for i := uint(0); i < h.numThreads; i++ { wg.Add(1) go func(num uint) { defer wg.Done() for { file, ok := <-C if !ok { return } if hash, err := h.computeHash(file); err != nil { h.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file)) } else { h.stdlog.Printf(" - hashed: %s", filepath.Base(file)) h.filesMtx.Lock() h.Files[hash] = file h.filesMtx.Unlock() } } }(i) } return } func (h *Hasher) ComputeHashes(w Walker) (err error) { C := make(chan string, h.numThreads*2) wg := h.collectHashes(C) err = w.Walk(C) if err != nil { return } wg.Wait() return } func NewHasher(algo string, numThreads uint, stdlog *log.Logger) (h *Hasher) { h = &Hasher{numThreads: numThreads, stdlog: stdlog} switch algo { case "md5": h.newHash = func() (hash.Hash, error) { return md5.New(), nil } case "sha1": h.newHash = func() (hash.Hash, error) { return sha1.New(), nil } case "sha256": h.newHash = func() (hash.Hash, error) { return sha256.New(), nil } case "sha512": h.newHash = func() (hash.Hash, error) { return sha512.New(), nil } case "blake2b": fallthrough case "": algo = "blake2b" h.newHash = func() (hash.Hash, error) { return blake2b.New256(nil) } default: stdlog.Printf("*** invalid hashing algorithm: '%s'", algo) return nil } h.Files = make(map[string]string) if h.numThreads < 1 { h.numThreads = 4 } stdlog.Printf("*** Created hasher (%s, %d threads)", algo, h.numThreads) return }