diff options
author | Christian Pointner <equinox@helsinki.at> | 2017-01-27 19:46:41 (GMT) |
---|---|---|
committer | Christian Pointner <equinox@helsinki.at> | 2017-01-27 19:46:41 (GMT) |
commit | fbcf3776c59aae5e2e6c6e4c700af3da1489af1f (patch) | |
tree | 6a7c9caa459e84a8dd19e14dec77b044729636e6 /src/file-hasher/hasher.go | |
parent | 056b224e3241c6cf889ba1c7b0eba85a8f33cc4a (diff) |
seprate walker from hasher
Diffstat (limited to 'src/file-hasher/hasher.go')
-rw-r--r-- | src/file-hasher/hasher.go | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/src/file-hasher/hasher.go b/src/file-hasher/hasher.go new file mode 100644 index 0000000..afc35fd --- /dev/null +++ b/src/file-hasher/hasher.go @@ -0,0 +1,110 @@ +// +// pool-import +// +// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at> +// +// This file is part of pool-import. +// +// pool-import is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// any later version. +// +// pool-import is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with pool-import. If not, see <http://www.gnu.org/licenses/>. +// + +package main + +import ( + "encoding/base64" + "io" + "log" + "os" + "path/filepath" + "sync" + + "golang.org/x/crypto/blake2b" +) + +type FileMap map[string]string + +func (m FileMap) Merge(m2 FileMap) { + for key, value := range m2 { + m[key] = value + } +} + +type Hasher struct { + numThreads uint + stdlog *log.Logger + filesMtx sync.Mutex + Files FileMap +} + +func computeHash(path string) (string, error) { + hash, err := blake2b.New256(nil) + if err != nil { + return "", err + } + file, err := os.Open(path) + if err != nil { + return "", err + } + defer file.Close() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil +} + +func (h *Hasher) collectHashes(C <-chan string) (wg *sync.WaitGroup) { + wg = &sync.WaitGroup{} + + for i := uint(0); i < h.numThreads; i++ { + wg.Add(1) + go func(num uint) { + defer wg.Done() + for { + file, ok := <-C + if !ok { + return + } + if hash, err := computeHash(file); err != nil { + h.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file)) + } else { + h.stdlog.Printf(" - hashed: %s", filepath.Base(file)) + h.filesMtx.Lock() + h.Files[hash] = file + h.filesMtx.Unlock() + } + } + }(i) + } + return +} + +func (h *Hasher) ComputeHashes(w Walker) (err error) { + C := make(chan string, 10) + + wg := h.collectHashes(C) + + err = w.Walk(C) + if err != nil { + return + } + + wg.Wait() + return +} + +func NewHasher(numThreads uint, stdlog *log.Logger) (h *Hasher) { + h = &Hasher{numThreads: numThreads, stdlog: stdlog} + h.Files = make(map[string]string) + return +} |