summaryrefslogtreecommitdiff
path: root/src/file-hasher/hasher.go
diff options
context:
space:
mode:
authorChristian Pointner <equinox@helsinki.at>2017-01-27 19:46:41 (GMT)
committerChristian Pointner <equinox@helsinki.at>2017-01-27 19:46:41 (GMT)
commitfbcf3776c59aae5e2e6c6e4c700af3da1489af1f (patch)
tree6a7c9caa459e84a8dd19e14dec77b044729636e6 /src/file-hasher/hasher.go
parent056b224e3241c6cf889ba1c7b0eba85a8f33cc4a (diff)
seprate walker from hasher
Diffstat (limited to 'src/file-hasher/hasher.go')
-rw-r--r--src/file-hasher/hasher.go110
1 files changed, 110 insertions, 0 deletions
diff --git a/src/file-hasher/hasher.go b/src/file-hasher/hasher.go
new file mode 100644
index 0000000..afc35fd
--- /dev/null
+++ b/src/file-hasher/hasher.go
@@ -0,0 +1,110 @@
+//
+// pool-import
+//
+// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at>
+//
+// This file is part of pool-import.
+//
+// pool-import is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// any later version.
+//
+// pool-import is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with pool-import. If not, see <http://www.gnu.org/licenses/>.
+//
+
+package main
+
+import (
+ "encoding/base64"
+ "io"
+ "log"
+ "os"
+ "path/filepath"
+ "sync"
+
+ "golang.org/x/crypto/blake2b"
+)
+
+type FileMap map[string]string
+
+func (m FileMap) Merge(m2 FileMap) {
+ for key, value := range m2 {
+ m[key] = value
+ }
+}
+
+type Hasher struct {
+ numThreads uint
+ stdlog *log.Logger
+ filesMtx sync.Mutex
+ Files FileMap
+}
+
+func computeHash(path string) (string, error) {
+ hash, err := blake2b.New256(nil)
+ if err != nil {
+ return "", err
+ }
+ file, err := os.Open(path)
+ if err != nil {
+ return "", err
+ }
+ defer file.Close()
+ if _, err := io.Copy(hash, file); err != nil {
+ return "", err
+ }
+ return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil
+}
+
+func (h *Hasher) collectHashes(C <-chan string) (wg *sync.WaitGroup) {
+ wg = &sync.WaitGroup{}
+
+ for i := uint(0); i < h.numThreads; i++ {
+ wg.Add(1)
+ go func(num uint) {
+ defer wg.Done()
+ for {
+ file, ok := <-C
+ if !ok {
+ return
+ }
+ if hash, err := computeHash(file); err != nil {
+ h.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file))
+ } else {
+ h.stdlog.Printf(" - hashed: %s", filepath.Base(file))
+ h.filesMtx.Lock()
+ h.Files[hash] = file
+ h.filesMtx.Unlock()
+ }
+ }
+ }(i)
+ }
+ return
+}
+
+func (h *Hasher) ComputeHashes(w Walker) (err error) {
+ C := make(chan string, 10)
+
+ wg := h.collectHashes(C)
+
+ err = w.Walk(C)
+ if err != nil {
+ return
+ }
+
+ wg.Wait()
+ return
+}
+
+func NewHasher(numThreads uint, stdlog *log.Logger) (h *Hasher) {
+ h = &Hasher{numThreads: numThreads, stdlog: stdlog}
+ h.Files = make(map[string]string)
+ return
+}