diff options
author | Christian Pointner <equinox@helsinki.at> | 2017-01-25 23:49:54 (GMT) |
---|---|---|
committer | Christian Pointner <equinox@helsinki.at> | 2017-01-25 23:49:54 (GMT) |
commit | 5624f3ac623ed2caa8c3c3933e66d63aafc09914 (patch) | |
tree | 7ee90a92df50e50a4d994001bb9d8751f1f88fa9 /src/file-hasher | |
parent | 9d1b81e5997ee9ec8cc5bfd16f6c62a40044006a (diff) |
file-hasher as seperate binary
Diffstat (limited to 'src/file-hasher')
-rw-r--r-- | src/file-hasher/dir-hasher.go | 148 | ||||
-rw-r--r-- | src/file-hasher/main.go | 81 |
2 files changed, 229 insertions, 0 deletions
diff --git a/src/file-hasher/dir-hasher.go b/src/file-hasher/dir-hasher.go new file mode 100644 index 0000000..aa204a0 --- /dev/null +++ b/src/file-hasher/dir-hasher.go @@ -0,0 +1,148 @@ +// +// pool-import +// +// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at> +// +// This file is part of pool-import. +// +// pool-import is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// any later version. +// +// pool-import is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with pool-import. If not, see <http://www.gnu.org/licenses/>. +// + +package main + +import ( + "encoding/base64" + "io" + "log" + "os" + "path/filepath" + "strings" + "sync" + + "golang.org/x/crypto/blake2b" +) + +type FileMap map[string]string + +func (m FileMap) Merge(m2 FileMap) { + for key, value := range m2 { + m[key] = value + } +} + +type MusicDir struct { + root string + stdlog *log.Logger + filesMtx sync.Mutex + Files FileMap +} + +func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error { + if err != nil { + return err + } + if info.IsDir() { + stdlog.Printf("entering directory: %s", path) + return nil + } + if !info.Mode().IsRegular() { + stdlog.Printf(" - skipping (special file): %s", info.Name()) + return nil + } + switch strings.ToLower(filepath.Ext(path)) { + case ".flac": + fallthrough + case ".ogg": + fallthrough + case ".wav": + fallthrough + case ".mp3": + fallthrough + case ".aac": + fallthrough + case ".mp4": + fallthrough + case ".m4a": + C <- path + default: + stdlog.Printf(" - skipping (unknown extension): %s", info.Name()) + } + return nil +} + +func computeHash(path string) (string, error) { + hash, err := blake2b.New256(nil) + if err != nil { + return "", err + } + file, err := os.Open(path) + if err != nil { + return "", err + } + defer file.Close() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil +} + +func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.WaitGroup) { + wg = &sync.WaitGroup{} + + for i := 0; i < numThreads; i++ { + wg.Add(1) + go func(num int) { + defer wg.Done() + for { + file, ok := <-C + if !ok { + return + } + if hash, err := computeHash(file); err != nil { + d.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file)) + } else { + d.stdlog.Printf(" - hashed: %s", filepath.Base(file)) + d.filesMtx.Lock() + d.Files[hash] = file + d.filesMtx.Unlock() + } + } + }(i) + } + return +} + +func (d *MusicDir) ComputeHashes() (err error) { + C := make(chan string, 10) + + wg := d.collectHashes(C, 4) // TODO: make number of hashing threads configurable + + err = filepath.Walk(d.root, func(path string, info os.FileInfo, err error) error { + return handleEntry(C, path, info, err, d.stdlog) + }) + close(C) + + if err != nil { + return + } + + wg.Wait() + return +} + +func NewMusicDir(root string, stdlog *log.Logger) (dir *MusicDir) { + dir = &MusicDir{root: root, stdlog: stdlog} + dir.Files = make(map[string]string) + return +} diff --git a/src/file-hasher/main.go b/src/file-hasher/main.go new file mode 100644 index 0000000..c25f690 --- /dev/null +++ b/src/file-hasher/main.go @@ -0,0 +1,81 @@ +// +// pool-import +// +// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at> +// +// This file is part of pool-import. +// +// pool-import is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// any later version. +// +// pool-import is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with pool-import. If not, see <http://www.gnu.org/licenses/>. +// + +package main + +import ( + "log" + "os" + "os/signal" + "syscall" + "time" +) + +const ( + RD_CONF = "/etc/rd.conf" +) + +func main() { + if len(os.Args) < 2 { + log.Fatal("Usage: file-hasher <directory> [ <directory [ .. ] ]") + } + directories := os.Args[1:] + + stdlog := log.New(os.Stderr, "[std] ", log.LstdFlags) + + C := make(chan os.Signal, 1) + signal.Notify(C, os.Interrupt, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGTERM) + + stdlog.Println("***************************************************************") + stdlog.Printf("*** will hash files' from %d directories", len(directories)) + stdlog.Println("***************************************************************") + + starttime := time.Now() + go func() { + defer func() { + C <- syscall.SIGTERM + }() + + stdlog.Println("********************************************") + allFiles := make(FileMap) + for _, root := range directories { + stdlog.Printf("*** hashing all the files in '%s'", root) + stdlog.Println("") + md := NewMusicDir(root, stdlog) + if err := md.ComputeHashes(); err != nil { + return + } + allFiles.Merge(md.Files) + stdlog.Println("") + stdlog.Println("******************************") + } + stdlog.Println("") + for hash, file := range allFiles { + stdlog.Printf("%s: %s", hash, file) + } + stdlog.Println("") + stdlog.Println("***************************************************************") + stdlog.Printf("*** hashed %d files in %v", len(allFiles), time.Since(starttime)) + stdlog.Println("***************************************************************") + }() + + <-C +} |