From 5624f3ac623ed2caa8c3c3933e66d63aafc09914 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 26 Jan 2017 00:49:54 +0100 Subject: file-hasher as seperate binary diff --git a/Makefile b/Makefile index 48e88d4..a6ea748 100644 --- a/Makefile +++ b/Makefile @@ -20,9 +20,14 @@ ## curdir:= $(shell pwd) +ifndef GOROOT GOCMD := GOPATH=$(curdir) go +else +GOCMD := GOPATH=$(curdir) $(GOROOT)/bin/go +endif EXECUTEABLE := pool-import +EXECUTEABLE_HASHER := file-hasher LIBS := "code.helsinki.at/rhrd-go/rhimport" \ "code.helsinki.at/rhrd-go/rddb" \ @@ -43,19 +48,26 @@ updatelibs: vet: @echo "vetting: $(EXECUTEABLE)" @$(GOCMD) vet $(EXECUTEABLE) + @echo "vetting: $(EXECUTEABLE_HASHER)" + @$(GOCMD) vet $(EXECUTEABLE_HASHER) format: @echo "formating: $(EXECUTEABLE)" @$(GOCMD) fmt $(EXECUTEABLE) + @echo "formating: $(EXECUTEABLE_HASHER)" + @$(GOCMD) format $(EXECUTEABLE_HASHER) build: getlibs @echo "installing: $(EXECUTEABLE)" @$(GOCMD) install $(EXECUTEABLE) + @echo "installing: $(EXECUTEABLE_HASHER)" + @$(GOCMD) install $(EXECUTEABLE_HASHER) clean: rm -rf pkg/*/$(EXECUTEABLE) + rm -rf pkg/*/$(EXECUTEABLE_HASHER) rm -rf bin distclean: clean - @$(foreach dir,$(shell ls src/),$(if $(subst $(EXECUTEABLE),,$(dir)),$(shell rm -rf src/$(dir)))) + @$(foreach dir,$(shell ls src/),$(if $(and $(subst $(EXECUTEABLE),,$(dir)), $(subst $(EXECUTEABLE_HASHER),,$(dir))),$(shell rm -rf src/$(dir)))) rm -rf pkg diff --git a/src/file-hasher/dir-hasher.go b/src/file-hasher/dir-hasher.go new file mode 100644 index 0000000..aa204a0 --- /dev/null +++ b/src/file-hasher/dir-hasher.go @@ -0,0 +1,148 @@ +// +// pool-import +// +// Copyright (C) 2016 Christian Pointner +// +// This file is part of pool-import. +// +// pool-import is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// any later version. +// +// pool-import is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with pool-import. If not, see . +// + +package main + +import ( + "encoding/base64" + "io" + "log" + "os" + "path/filepath" + "strings" + "sync" + + "golang.org/x/crypto/blake2b" +) + +type FileMap map[string]string + +func (m FileMap) Merge(m2 FileMap) { + for key, value := range m2 { + m[key] = value + } +} + +type MusicDir struct { + root string + stdlog *log.Logger + filesMtx sync.Mutex + Files FileMap +} + +func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error { + if err != nil { + return err + } + if info.IsDir() { + stdlog.Printf("entering directory: %s", path) + return nil + } + if !info.Mode().IsRegular() { + stdlog.Printf(" - skipping (special file): %s", info.Name()) + return nil + } + switch strings.ToLower(filepath.Ext(path)) { + case ".flac": + fallthrough + case ".ogg": + fallthrough + case ".wav": + fallthrough + case ".mp3": + fallthrough + case ".aac": + fallthrough + case ".mp4": + fallthrough + case ".m4a": + C <- path + default: + stdlog.Printf(" - skipping (unknown extension): %s", info.Name()) + } + return nil +} + +func computeHash(path string) (string, error) { + hash, err := blake2b.New256(nil) + if err != nil { + return "", err + } + file, err := os.Open(path) + if err != nil { + return "", err + } + defer file.Close() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil +} + +func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.WaitGroup) { + wg = &sync.WaitGroup{} + + for i := 0; i < numThreads; i++ { + wg.Add(1) + go func(num int) { + defer wg.Done() + for { + file, ok := <-C + if !ok { + return + } + if hash, err := computeHash(file); err != nil { + d.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file)) + } else { + d.stdlog.Printf(" - hashed: %s", filepath.Base(file)) + d.filesMtx.Lock() + d.Files[hash] = file + d.filesMtx.Unlock() + } + } + }(i) + } + return +} + +func (d *MusicDir) ComputeHashes() (err error) { + C := make(chan string, 10) + + wg := d.collectHashes(C, 4) // TODO: make number of hashing threads configurable + + err = filepath.Walk(d.root, func(path string, info os.FileInfo, err error) error { + return handleEntry(C, path, info, err, d.stdlog) + }) + close(C) + + if err != nil { + return + } + + wg.Wait() + return +} + +func NewMusicDir(root string, stdlog *log.Logger) (dir *MusicDir) { + dir = &MusicDir{root: root, stdlog: stdlog} + dir.Files = make(map[string]string) + return +} diff --git a/src/file-hasher/main.go b/src/file-hasher/main.go new file mode 100644 index 0000000..c25f690 --- /dev/null +++ b/src/file-hasher/main.go @@ -0,0 +1,81 @@ +// +// pool-import +// +// Copyright (C) 2016 Christian Pointner +// +// This file is part of pool-import. +// +// pool-import is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// any later version. +// +// pool-import is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with pool-import. If not, see . +// + +package main + +import ( + "log" + "os" + "os/signal" + "syscall" + "time" +) + +const ( + RD_CONF = "/etc/rd.conf" +) + +func main() { + if len(os.Args) < 2 { + log.Fatal("Usage: file-hasher [ -// -// This file is part of pool-import. -// -// pool-import is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// any later version. -// -// pool-import is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with pool-import. If not, see . -// - -package main - -import ( - "encoding/base64" - "io" - "log" - "os" - "path/filepath" - "strings" - "sync" - - "golang.org/x/crypto/blake2b" -) - -type FileMap map[string]string - -func (m FileMap) Merge(m2 FileMap) { - for key, value := range m2 { - m[key] = value - } -} - -type MusicDir struct { - root string - stdlog *log.Logger - filesMtx sync.Mutex - Files FileMap -} - -func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error { - if err != nil { - return err - } - if info.IsDir() { - stdlog.Printf("entering directory: %s", path) - return nil - } - if !info.Mode().IsRegular() { - stdlog.Printf(" - skipping (special file): %s", info.Name()) - return nil - } - switch strings.ToLower(filepath.Ext(path)) { - case ".flac": - fallthrough - case ".ogg": - fallthrough - case ".wav": - fallthrough - case ".mp3": - fallthrough - case ".aac": - fallthrough - case ".mp4": - fallthrough - case ".m4a": - C <- path - default: - stdlog.Printf(" - skipping (unknown extension): %s", info.Name()) - } - return nil -} - -func computeHash(path string) (string, error) { - hash, err := blake2b.New256(nil) - if err != nil { - return "", err - } - file, err := os.Open(path) - if err != nil { - return "", err - } - defer file.Close() - if _, err := io.Copy(hash, file); err != nil { - return "", err - } - return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil -} - -func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.WaitGroup) { - wg = &sync.WaitGroup{} - - for i := 0; i < numThreads; i++ { - wg.Add(1) - go func(num int) { - defer wg.Done() - for { - file, ok := <-C - if !ok { - return - } - if hash, err := computeHash(file); err != nil { - d.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file)) - } else { - d.stdlog.Printf(" - hashed: %s", filepath.Base(file)) - d.filesMtx.Lock() - d.Files[hash] = file - d.filesMtx.Unlock() - } - } - }(i) - } - return -} - -func (d *MusicDir) ComputeHashes() (err error) { - C := make(chan string, 10) - - wg := d.collectHashes(C, 4) // TODO: make number of hashing threads configurable - - err = filepath.Walk(d.root, func(path string, info os.FileInfo, err error) error { - return handleEntry(C, path, info, err, d.stdlog) - }) - close(C) - - if err != nil { - return - } - - wg.Wait() - return -} - -func NewMusicDir(root string, stdlog *log.Logger) (dir *MusicDir) { - dir = &MusicDir{root: root, stdlog: stdlog} - dir.Files = make(map[string]string) - return -} diff --git a/src/pool-import/main.go b/src/pool-import/main.go index 10a59ec..0963ed0 100644 --- a/src/pool-import/main.go +++ b/src/pool-import/main.go @@ -96,25 +96,7 @@ func main() { C <- syscall.SIGTERM }() - stdlog.Println("********************************************") - allFiles := make(FileMap) - for _, root := range directories { - stdlog.Printf("*** hashing all the files in '%s'", root) - stdlog.Println("") - md := NewMusicDir(root, stdlog) - if err := md.ComputeHashes(); err != nil { - return - } - allFiles.Merge(md.Files) - stdlog.Println("") - stdlog.Println("******************************") - } - stdlog.Println("") - for hash, file := range allFiles { - stdlog.Printf("%s: %s", hash, file) - } - stdlog.Println("") - stdlog.Println("********************************************") + // TODO: use file-hasher to get file hashes }() <-C -- cgit v0.10.2