summaryrefslogtreecommitdiff
path: root/src/file-hasher
diff options
context:
space:
mode:
authorChristian Pointner <equinox@helsinki.at>2017-01-25 23:49:54 (GMT)
committerChristian Pointner <equinox@helsinki.at>2017-01-25 23:49:54 (GMT)
commit5624f3ac623ed2caa8c3c3933e66d63aafc09914 (patch)
tree7ee90a92df50e50a4d994001bb9d8751f1f88fa9 /src/file-hasher
parent9d1b81e5997ee9ec8cc5bfd16f6c62a40044006a (diff)
file-hasher as seperate binary
Diffstat (limited to 'src/file-hasher')
-rw-r--r--src/file-hasher/dir-hasher.go148
-rw-r--r--src/file-hasher/main.go81
2 files changed, 229 insertions, 0 deletions
diff --git a/src/file-hasher/dir-hasher.go b/src/file-hasher/dir-hasher.go
new file mode 100644
index 0000000..aa204a0
--- /dev/null
+++ b/src/file-hasher/dir-hasher.go
@@ -0,0 +1,148 @@
+//
+// pool-import
+//
+// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at>
+//
+// This file is part of pool-import.
+//
+// pool-import is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// any later version.
+//
+// pool-import is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with pool-import. If not, see <http://www.gnu.org/licenses/>.
+//
+
+package main
+
+import (
+ "encoding/base64"
+ "io"
+ "log"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+
+ "golang.org/x/crypto/blake2b"
+)
+
+type FileMap map[string]string
+
+func (m FileMap) Merge(m2 FileMap) {
+ for key, value := range m2 {
+ m[key] = value
+ }
+}
+
+type MusicDir struct {
+ root string
+ stdlog *log.Logger
+ filesMtx sync.Mutex
+ Files FileMap
+}
+
+func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error {
+ if err != nil {
+ return err
+ }
+ if info.IsDir() {
+ stdlog.Printf("entering directory: %s", path)
+ return nil
+ }
+ if !info.Mode().IsRegular() {
+ stdlog.Printf(" - skipping (special file): %s", info.Name())
+ return nil
+ }
+ switch strings.ToLower(filepath.Ext(path)) {
+ case ".flac":
+ fallthrough
+ case ".ogg":
+ fallthrough
+ case ".wav":
+ fallthrough
+ case ".mp3":
+ fallthrough
+ case ".aac":
+ fallthrough
+ case ".mp4":
+ fallthrough
+ case ".m4a":
+ C <- path
+ default:
+ stdlog.Printf(" - skipping (unknown extension): %s", info.Name())
+ }
+ return nil
+}
+
+func computeHash(path string) (string, error) {
+ hash, err := blake2b.New256(nil)
+ if err != nil {
+ return "", err
+ }
+ file, err := os.Open(path)
+ if err != nil {
+ return "", err
+ }
+ defer file.Close()
+ if _, err := io.Copy(hash, file); err != nil {
+ return "", err
+ }
+ return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil
+}
+
+func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.WaitGroup) {
+ wg = &sync.WaitGroup{}
+
+ for i := 0; i < numThreads; i++ {
+ wg.Add(1)
+ go func(num int) {
+ defer wg.Done()
+ for {
+ file, ok := <-C
+ if !ok {
+ return
+ }
+ if hash, err := computeHash(file); err != nil {
+ d.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file))
+ } else {
+ d.stdlog.Printf(" - hashed: %s", filepath.Base(file))
+ d.filesMtx.Lock()
+ d.Files[hash] = file
+ d.filesMtx.Unlock()
+ }
+ }
+ }(i)
+ }
+ return
+}
+
+func (d *MusicDir) ComputeHashes() (err error) {
+ C := make(chan string, 10)
+
+ wg := d.collectHashes(C, 4) // TODO: make number of hashing threads configurable
+
+ err = filepath.Walk(d.root, func(path string, info os.FileInfo, err error) error {
+ return handleEntry(C, path, info, err, d.stdlog)
+ })
+ close(C)
+
+ if err != nil {
+ return
+ }
+
+ wg.Wait()
+ return
+}
+
+func NewMusicDir(root string, stdlog *log.Logger) (dir *MusicDir) {
+ dir = &MusicDir{root: root, stdlog: stdlog}
+ dir.Files = make(map[string]string)
+ return
+}
diff --git a/src/file-hasher/main.go b/src/file-hasher/main.go
new file mode 100644
index 0000000..c25f690
--- /dev/null
+++ b/src/file-hasher/main.go
@@ -0,0 +1,81 @@
+//
+// pool-import
+//
+// Copyright (C) 2016 Christian Pointner <equinox@helsinki.at>
+//
+// This file is part of pool-import.
+//
+// pool-import is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// any later version.
+//
+// pool-import is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with pool-import. If not, see <http://www.gnu.org/licenses/>.
+//
+
+package main
+
+import (
+ "log"
+ "os"
+ "os/signal"
+ "syscall"
+ "time"
+)
+
+const (
+ RD_CONF = "/etc/rd.conf"
+)
+
+func main() {
+ if len(os.Args) < 2 {
+ log.Fatal("Usage: file-hasher <directory> [ <directory [ .. ] ]")
+ }
+ directories := os.Args[1:]
+
+ stdlog := log.New(os.Stderr, "[std] ", log.LstdFlags)
+
+ C := make(chan os.Signal, 1)
+ signal.Notify(C, os.Interrupt, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGTERM)
+
+ stdlog.Println("***************************************************************")
+ stdlog.Printf("*** will hash files' from %d directories", len(directories))
+ stdlog.Println("***************************************************************")
+
+ starttime := time.Now()
+ go func() {
+ defer func() {
+ C <- syscall.SIGTERM
+ }()
+
+ stdlog.Println("********************************************")
+ allFiles := make(FileMap)
+ for _, root := range directories {
+ stdlog.Printf("*** hashing all the files in '%s'", root)
+ stdlog.Println("")
+ md := NewMusicDir(root, stdlog)
+ if err := md.ComputeHashes(); err != nil {
+ return
+ }
+ allFiles.Merge(md.Files)
+ stdlog.Println("")
+ stdlog.Println("******************************")
+ }
+ stdlog.Println("")
+ for hash, file := range allFiles {
+ stdlog.Printf("%s: %s", hash, file)
+ }
+ stdlog.Println("")
+ stdlog.Println("***************************************************************")
+ stdlog.Printf("*** hashed %d files in %v", len(allFiles), time.Since(starttime))
+ stdlog.Println("***************************************************************")
+ }()
+
+ <-C
+}