diff options
author | Christian Pointner <equinox@helsinki.at> | 2017-01-27 19:46:41 (GMT) |
---|---|---|
committer | Christian Pointner <equinox@helsinki.at> | 2017-01-27 19:46:41 (GMT) |
commit | fbcf3776c59aae5e2e6c6e4c700af3da1489af1f (patch) | |
tree | 6a7c9caa459e84a8dd19e14dec77b044729636e6 /src/file-hasher | |
parent | 056b224e3241c6cf889ba1c7b0eba85a8f33cc4a (diff) |
seprate walker from hasher
Diffstat (limited to 'src/file-hasher')
-rw-r--r-- | src/file-hasher/dir-hasher.go | 152 | ||||
-rw-r--r-- | src/file-hasher/dir-walker.go | 70 | ||||
-rw-r--r-- | src/file-hasher/hasher.go | 110 | ||||
-rw-r--r-- | src/file-hasher/main.go | 11 | ||||
-rw-r--r-- | src/file-hasher/walker.go | 51 |
5 files changed, 236 insertions, 158 deletions
diff --git a/src/file-hasher/dir-hasher.go b/src/file-hasher/dir-hasher.go deleted file mode 100644 index 7f5e45b..0000000 --- a/src/file-hasher/dir-hasher.go +++ /dev/null @@ -1,152 +0,0 @@ -// -// pool-import -// -// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at> -// -// This file is part of pool-import. -// -// pool-import is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// any later version. -// -// pool-import is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with pool-import. If not, see <http://www.gnu.org/licenses/>. -// - -package main - -import ( - "encoding/base64" - "io" - "log" - "os" - "path/filepath" - "strings" - "sync" - - "golang.org/x/crypto/blake2b" -) - -type FileMap map[string]string - -func (m FileMap) Merge(m2 FileMap) { - for key, value := range m2 { - m[key] = value - } -} - -type MusicDir struct { - root string - stdlog *log.Logger - filesMtx sync.Mutex - Files FileMap -} - -func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error { - if err != nil { - return err - } - if info.IsDir() { - stdlog.Printf("entering directory: %s", path) - return nil - } - if !info.Mode().IsRegular() { - stdlog.Printf(" - skipping (special file): %s", info.Name()) - return nil - } - switch strings.ToLower(filepath.Ext(path)) { - case ".flac": - fallthrough - case ".ogg": - fallthrough - case ".wav": - fallthrough - case ".mp3": - fallthrough - case ".aac": - fallthrough - case ".mp4": - fallthrough - case ".m4a": - if absPath, err := filepath.Abs(path); err != nil { - stdlog.Printf(" - skipping (%v): %s", err, info.Name()) - } else { - C <- absPath - } - default: - stdlog.Printf(" - skipping (unknown extension): %s", info.Name()) - } - return nil -} - -func computeHash(path string) (string, error) { - hash, err := blake2b.New256(nil) - if err != nil { - return "", err - } - file, err := os.Open(path) - if err != nil { - return "", err - } - defer file.Close() - if _, err := io.Copy(hash, file); err != nil { - return "", err - } - return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil -} - -func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.WaitGroup) { - wg = &sync.WaitGroup{} - - for i := 0; i < numThreads; i++ { - wg.Add(1) - go func(num int) { - defer wg.Done() - for { - file, ok := <-C - if !ok { - return - } - if hash, err := computeHash(file); err != nil { - d.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file)) - } else { - d.stdlog.Printf(" - hashed: %s", filepath.Base(file)) - d.filesMtx.Lock() - d.Files[hash] = file - d.filesMtx.Unlock() - } - } - }(i) - } - return -} - -func (d *MusicDir) ComputeHashes() (err error) { - C := make(chan string, 10) - - wg := d.collectHashes(C, 4) // TODO: make number of hashing threads configurable - - err = filepath.Walk(d.root, func(path string, info os.FileInfo, err error) error { - return handleEntry(C, path, info, err, d.stdlog) - }) - close(C) - - if err != nil { - return - } - - wg.Wait() - return -} - -func NewMusicDir(root string, stdlog *log.Logger) (dir *MusicDir) { - dir = &MusicDir{root: root, stdlog: stdlog} - dir.Files = make(map[string]string) - return -} diff --git a/src/file-hasher/dir-walker.go b/src/file-hasher/dir-walker.go new file mode 100644 index 0000000..0aa7ba5 --- /dev/null +++ b/src/file-hasher/dir-walker.go @@ -0,0 +1,70 @@ +// +// pool-import +// +// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at> +// +// This file is part of pool-import. +// +// pool-import is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// any later version. +// +// pool-import is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with pool-import. If not, see <http://www.gnu.org/licenses/>. +// + +package main + +import ( + "log" + "os" + "path/filepath" +) + +type Dir struct { + root string + stdlog *log.Logger +} + +func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error { + if err != nil { + return err + } + if info.IsDir() { + stdlog.Printf("entering directory: %s", path) + return nil + } + if !info.Mode().IsRegular() { + stdlog.Printf(" - skipping (special file): %s", info.Name()) + return nil + } + if checkFileExt(path) { + if absPath, err := filepath.Abs(path); err != nil { + stdlog.Printf(" - skipping (%v): %s", err, info.Name()) + } else { + C <- absPath + } + } else { + stdlog.Printf(" - skipping (unknown extension): %s", info.Name()) + } + return nil +} + +func (d *Dir) Walk(C chan<- string) (err error) { + defer close(C) + + return filepath.Walk(d.root, func(path string, info os.FileInfo, err error) error { + return handleEntry(C, path, info, err, d.stdlog) + }) +} + +func NewDir(root string, stdlog *log.Logger) (dir *Dir) { + dir = &Dir{root: root, stdlog: stdlog} + return +} diff --git a/src/file-hasher/hasher.go b/src/file-hasher/hasher.go new file mode 100644 index 0000000..afc35fd --- /dev/null +++ b/src/file-hasher/hasher.go @@ -0,0 +1,110 @@ +// +// pool-import +// +// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at> +// +// This file is part of pool-import. +// +// pool-import is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// any later version. +// +// pool-import is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with pool-import. If not, see <http://www.gnu.org/licenses/>. +// + +package main + +import ( + "encoding/base64" + "io" + "log" + "os" + "path/filepath" + "sync" + + "golang.org/x/crypto/blake2b" +) + +type FileMap map[string]string + +func (m FileMap) Merge(m2 FileMap) { + for key, value := range m2 { + m[key] = value + } +} + +type Hasher struct { + numThreads uint + stdlog *log.Logger + filesMtx sync.Mutex + Files FileMap +} + +func computeHash(path string) (string, error) { + hash, err := blake2b.New256(nil) + if err != nil { + return "", err + } + file, err := os.Open(path) + if err != nil { + return "", err + } + defer file.Close() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil +} + +func (h *Hasher) collectHashes(C <-chan string) (wg *sync.WaitGroup) { + wg = &sync.WaitGroup{} + + for i := uint(0); i < h.numThreads; i++ { + wg.Add(1) + go func(num uint) { + defer wg.Done() + for { + file, ok := <-C + if !ok { + return + } + if hash, err := computeHash(file); err != nil { + h.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file)) + } else { + h.stdlog.Printf(" - hashed: %s", filepath.Base(file)) + h.filesMtx.Lock() + h.Files[hash] = file + h.filesMtx.Unlock() + } + } + }(i) + } + return +} + +func (h *Hasher) ComputeHashes(w Walker) (err error) { + C := make(chan string, 10) + + wg := h.collectHashes(C) + + err = w.Walk(C) + if err != nil { + return + } + + wg.Wait() + return +} + +func NewHasher(numThreads uint, stdlog *log.Logger) (h *Hasher) { + h = &Hasher{numThreads: numThreads, stdlog: stdlog} + h.Files = make(map[string]string) + return +} diff --git a/src/file-hasher/main.go b/src/file-hasher/main.go index 1bb0132..051bb33 100644 --- a/src/file-hasher/main.go +++ b/src/file-hasher/main.go @@ -56,25 +56,24 @@ func main() { }() stdlog.Println("********************************************") - allFiles := make(FileMap) + h := NewHasher(4, stdlog) // TODO: make number of threads configurable for _, root := range directories { + d := NewDir(root, stdlog) stdlog.Printf("*** hashing all the files in '%s'", root) stdlog.Println("") - md := NewMusicDir(root, stdlog) - if err := md.ComputeHashes(); err != nil { + if err := h.ComputeHashes(d); err != nil { return } - allFiles.Merge(md.Files) stdlog.Println("") stdlog.Println("******************************") } stdlog.Println("") stdlog.Println("***************************************************************") - stdlog.Printf("*** hashed %d files in %v", len(allFiles), time.Since(starttime)) + stdlog.Printf("*** hashed %d files in %v", len(h.Files), time.Since(starttime)) stdlog.Println("***************************************************************") enc := json.NewEncoder(os.Stdout) - if err := enc.Encode(allFiles); err != nil { + if err := enc.Encode(h.Files); err != nil { stdlog.Printf("Error encoding JSON: %v", err) } }() diff --git a/src/file-hasher/walker.go b/src/file-hasher/walker.go new file mode 100644 index 0000000..cc543a2 --- /dev/null +++ b/src/file-hasher/walker.go @@ -0,0 +1,51 @@ +// +// pool-import +// +// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at> +// +// This file is part of pool-import. +// +// pool-import is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// any later version. +// +// pool-import is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with pool-import. If not, see <http://www.gnu.org/licenses/>. +// + +package main + +import ( + "path/filepath" + "strings" +) + +func checkFileExt(path string) bool { + switch strings.ToLower(filepath.Ext(path)) { + case ".flac": + return true + case ".ogg": + return true + case ".wav": + return true + case ".mp3": + return true + case ".aac": + return true + case ".mp4": + return true + case ".m4a": + return true + } + return false +} + +type Walker interface { + Walk(C chan<- string) error +} |