summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Pointner <equinox@helsinki.at>2017-01-27 19:46:41 (GMT)
committerChristian Pointner <equinox@helsinki.at>2017-01-27 19:46:41 (GMT)
commitfbcf3776c59aae5e2e6c6e4c700af3da1489af1f (patch)
tree6a7c9caa459e84a8dd19e14dec77b044729636e6
parent056b224e3241c6cf889ba1c7b0eba85a8f33cc4a (diff)
seprate walker from hasher
-rw-r--r--src/file-hasher/dir-hasher.go152
-rw-r--r--src/file-hasher/dir-walker.go70
-rw-r--r--src/file-hasher/hasher.go110
-rw-r--r--src/file-hasher/main.go11
-rw-r--r--src/file-hasher/walker.go51
5 files changed, 236 insertions, 158 deletions
diff --git a/src/file-hasher/dir-hasher.go b/src/file-hasher/dir-hasher.go
deleted file mode 100644
index 7f5e45b..0000000
--- a/src/file-hasher/dir-hasher.go
+++ /dev/null
@@ -1,152 +0,0 @@
-//
-// pool-import
-//
-// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at>
-//
-// This file is part of pool-import.
-//
-// pool-import is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// any later version.
-//
-// pool-import is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with pool-import. If not, see <http://www.gnu.org/licenses/>.
-//
-
-package main
-
-import (
- "encoding/base64"
- "io"
- "log"
- "os"
- "path/filepath"
- "strings"
- "sync"
-
- "golang.org/x/crypto/blake2b"
-)
-
-type FileMap map[string]string
-
-func (m FileMap) Merge(m2 FileMap) {
- for key, value := range m2 {
- m[key] = value
- }
-}
-
-type MusicDir struct {
- root string
- stdlog *log.Logger
- filesMtx sync.Mutex
- Files FileMap
-}
-
-func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error {
- if err != nil {
- return err
- }
- if info.IsDir() {
- stdlog.Printf("entering directory: %s", path)
- return nil
- }
- if !info.Mode().IsRegular() {
- stdlog.Printf(" - skipping (special file): %s", info.Name())
- return nil
- }
- switch strings.ToLower(filepath.Ext(path)) {
- case ".flac":
- fallthrough
- case ".ogg":
- fallthrough
- case ".wav":
- fallthrough
- case ".mp3":
- fallthrough
- case ".aac":
- fallthrough
- case ".mp4":
- fallthrough
- case ".m4a":
- if absPath, err := filepath.Abs(path); err != nil {
- stdlog.Printf(" - skipping (%v): %s", err, info.Name())
- } else {
- C <- absPath
- }
- default:
- stdlog.Printf(" - skipping (unknown extension): %s", info.Name())
- }
- return nil
-}
-
-func computeHash(path string) (string, error) {
- hash, err := blake2b.New256(nil)
- if err != nil {
- return "", err
- }
- file, err := os.Open(path)
- if err != nil {
- return "", err
- }
- defer file.Close()
- if _, err := io.Copy(hash, file); err != nil {
- return "", err
- }
- return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil
-}
-
-func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.WaitGroup) {
- wg = &sync.WaitGroup{}
-
- for i := 0; i < numThreads; i++ {
- wg.Add(1)
- go func(num int) {
- defer wg.Done()
- for {
- file, ok := <-C
- if !ok {
- return
- }
- if hash, err := computeHash(file); err != nil {
- d.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file))
- } else {
- d.stdlog.Printf(" - hashed: %s", filepath.Base(file))
- d.filesMtx.Lock()
- d.Files[hash] = file
- d.filesMtx.Unlock()
- }
- }
- }(i)
- }
- return
-}
-
-func (d *MusicDir) ComputeHashes() (err error) {
- C := make(chan string, 10)
-
- wg := d.collectHashes(C, 4) // TODO: make number of hashing threads configurable
-
- err = filepath.Walk(d.root, func(path string, info os.FileInfo, err error) error {
- return handleEntry(C, path, info, err, d.stdlog)
- })
- close(C)
-
- if err != nil {
- return
- }
-
- wg.Wait()
- return
-}
-
-func NewMusicDir(root string, stdlog *log.Logger) (dir *MusicDir) {
- dir = &MusicDir{root: root, stdlog: stdlog}
- dir.Files = make(map[string]string)
- return
-}
diff --git a/src/file-hasher/dir-walker.go b/src/file-hasher/dir-walker.go
new file mode 100644
index 0000000..0aa7ba5
--- /dev/null
+++ b/src/file-hasher/dir-walker.go
@@ -0,0 +1,70 @@
+//
+// pool-import
+//
+// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at>
+//
+// This file is part of pool-import.
+//
+// pool-import is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// any later version.
+//
+// pool-import is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with pool-import. If not, see <http://www.gnu.org/licenses/>.
+//
+
+package main
+
+import (
+ "log"
+ "os"
+ "path/filepath"
+)
+
+type Dir struct {
+ root string
+ stdlog *log.Logger
+}
+
+func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error {
+ if err != nil {
+ return err
+ }
+ if info.IsDir() {
+ stdlog.Printf("entering directory: %s", path)
+ return nil
+ }
+ if !info.Mode().IsRegular() {
+ stdlog.Printf(" - skipping (special file): %s", info.Name())
+ return nil
+ }
+ if checkFileExt(path) {
+ if absPath, err := filepath.Abs(path); err != nil {
+ stdlog.Printf(" - skipping (%v): %s", err, info.Name())
+ } else {
+ C <- absPath
+ }
+ } else {
+ stdlog.Printf(" - skipping (unknown extension): %s", info.Name())
+ }
+ return nil
+}
+
+func (d *Dir) Walk(C chan<- string) (err error) {
+ defer close(C)
+
+ return filepath.Walk(d.root, func(path string, info os.FileInfo, err error) error {
+ return handleEntry(C, path, info, err, d.stdlog)
+ })
+}
+
+func NewDir(root string, stdlog *log.Logger) (dir *Dir) {
+ dir = &Dir{root: root, stdlog: stdlog}
+ return
+}
diff --git a/src/file-hasher/hasher.go b/src/file-hasher/hasher.go
new file mode 100644
index 0000000..afc35fd
--- /dev/null
+++ b/src/file-hasher/hasher.go
@@ -0,0 +1,110 @@
+//
+// pool-import
+//
+// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at>
+//
+// This file is part of pool-import.
+//
+// pool-import is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// any later version.
+//
+// pool-import is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with pool-import. If not, see <http://www.gnu.org/licenses/>.
+//
+
+package main
+
+import (
+ "encoding/base64"
+ "io"
+ "log"
+ "os"
+ "path/filepath"
+ "sync"
+
+ "golang.org/x/crypto/blake2b"
+)
+
+type FileMap map[string]string
+
+func (m FileMap) Merge(m2 FileMap) {
+ for key, value := range m2 {
+ m[key] = value
+ }
+}
+
+type Hasher struct {
+ numThreads uint
+ stdlog *log.Logger
+ filesMtx sync.Mutex
+ Files FileMap
+}
+
+func computeHash(path string) (string, error) {
+ hash, err := blake2b.New256(nil)
+ if err != nil {
+ return "", err
+ }
+ file, err := os.Open(path)
+ if err != nil {
+ return "", err
+ }
+ defer file.Close()
+ if _, err := io.Copy(hash, file); err != nil {
+ return "", err
+ }
+ return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil
+}
+
+func (h *Hasher) collectHashes(C <-chan string) (wg *sync.WaitGroup) {
+ wg = &sync.WaitGroup{}
+
+ for i := uint(0); i < h.numThreads; i++ {
+ wg.Add(1)
+ go func(num uint) {
+ defer wg.Done()
+ for {
+ file, ok := <-C
+ if !ok {
+ return
+ }
+ if hash, err := computeHash(file); err != nil {
+ h.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file))
+ } else {
+ h.stdlog.Printf(" - hashed: %s", filepath.Base(file))
+ h.filesMtx.Lock()
+ h.Files[hash] = file
+ h.filesMtx.Unlock()
+ }
+ }
+ }(i)
+ }
+ return
+}
+
+func (h *Hasher) ComputeHashes(w Walker) (err error) {
+ C := make(chan string, 10)
+
+ wg := h.collectHashes(C)
+
+ err = w.Walk(C)
+ if err != nil {
+ return
+ }
+
+ wg.Wait()
+ return
+}
+
+func NewHasher(numThreads uint, stdlog *log.Logger) (h *Hasher) {
+ h = &Hasher{numThreads: numThreads, stdlog: stdlog}
+ h.Files = make(map[string]string)
+ return
+}
diff --git a/src/file-hasher/main.go b/src/file-hasher/main.go
index 1bb0132..051bb33 100644
--- a/src/file-hasher/main.go
+++ b/src/file-hasher/main.go
@@ -56,25 +56,24 @@ func main() {
}()
stdlog.Println("********************************************")
- allFiles := make(FileMap)
+ h := NewHasher(4, stdlog) // TODO: make number of threads configurable
for _, root := range directories {
+ d := NewDir(root, stdlog)
stdlog.Printf("*** hashing all the files in '%s'", root)
stdlog.Println("")
- md := NewMusicDir(root, stdlog)
- if err := md.ComputeHashes(); err != nil {
+ if err := h.ComputeHashes(d); err != nil {
return
}
- allFiles.Merge(md.Files)
stdlog.Println("")
stdlog.Println("******************************")
}
stdlog.Println("")
stdlog.Println("***************************************************************")
- stdlog.Printf("*** hashed %d files in %v", len(allFiles), time.Since(starttime))
+ stdlog.Printf("*** hashed %d files in %v", len(h.Files), time.Since(starttime))
stdlog.Println("***************************************************************")
enc := json.NewEncoder(os.Stdout)
- if err := enc.Encode(allFiles); err != nil {
+ if err := enc.Encode(h.Files); err != nil {
stdlog.Printf("Error encoding JSON: %v", err)
}
}()
diff --git a/src/file-hasher/walker.go b/src/file-hasher/walker.go
new file mode 100644
index 0000000..cc543a2
--- /dev/null
+++ b/src/file-hasher/walker.go
@@ -0,0 +1,51 @@
+//
+// pool-import
+//
+// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at>
+//
+// This file is part of pool-import.
+//
+// pool-import is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// any later version.
+//
+// pool-import is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with pool-import. If not, see <http://www.gnu.org/licenses/>.
+//
+
+package main
+
+import (
+ "path/filepath"
+ "strings"
+)
+
+func checkFileExt(path string) bool {
+ switch strings.ToLower(filepath.Ext(path)) {
+ case ".flac":
+ return true
+ case ".ogg":
+ return true
+ case ".wav":
+ return true
+ case ".mp3":
+ return true
+ case ".aac":
+ return true
+ case ".mp4":
+ return true
+ case ".m4a":
+ return true
+ }
+ return false
+}
+
+type Walker interface {
+ Walk(C chan<- string) error
+}