From 9d1b81e5997ee9ec8cc5bfd16f6c62a40044006a Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Wed, 25 Jan 2017 23:57:06 +0100 Subject: actually create file hasher diff --git a/.gitignore b/.gitignore index bc88ec7..ff7ecdf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ bin/ pkg/ src/github.com src/code.helsinki.at +src/golang.org diff --git a/Makefile b/Makefile index 27218b6..48e88d4 100644 --- a/Makefile +++ b/Makefile @@ -24,8 +24,10 @@ GOCMD := GOPATH=$(curdir) go EXECUTEABLE := pool-import -LIBS := "code.helsinki.at/rhrd-go/rhimport" \ - "code.helsinki.at/rhrd-go/rddb" +LIBS := "code.helsinki.at/rhrd-go/rhimport" \ + "code.helsinki.at/rhrd-go/rddb" \ + "golang.org/x/crypto/blake2b" + .PHONY: getlibs updatelibs vet format build clean distclean diff --git a/src/pool-import/dir-hasher.go b/src/pool-import/dir-hasher.go index 6c1bf9b..aa204a0 100644 --- a/src/pool-import/dir-hasher.go +++ b/src/pool-import/dir-hasher.go @@ -22,18 +22,30 @@ package main import ( + "encoding/base64" + "io" "log" - "math/rand" "os" "path/filepath" + "strings" "sync" - "time" + + "golang.org/x/crypto/blake2b" ) +type FileMap map[string]string + +func (m FileMap) Merge(m2 FileMap) { + for key, value := range m2 { + m[key] = value + } +} + type MusicDir struct { - root string - stdlog *log.Logger - Files map[string]string + root string + stdlog *log.Logger + filesMtx sync.Mutex + Files FileMap } func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error { @@ -45,17 +57,44 @@ func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdl return nil } if !info.Mode().IsRegular() { - stdlog.Printf(" - skipping special file: %s", path) + stdlog.Printf(" - skipping (special file): %s", info.Name()) return nil } - // TODO: check file extensions - C <- path + switch strings.ToLower(filepath.Ext(path)) { + case ".flac": + fallthrough + case ".ogg": + fallthrough + case ".wav": + fallthrough + case ".mp3": + fallthrough + case ".aac": + fallthrough + case ".mp4": + fallthrough + case ".m4a": + C <- path + default: + stdlog.Printf(" - skipping (unknown extension): %s", info.Name()) + } return nil } -func computeHash(file string) string { - time.Sleep(time.Duration(rand.Float64() * float64(5*time.Second))) // TODO: faking it is nice but the real hash would be nicer - return "" +func computeHash(path string) (string, error) { + hash, err := blake2b.New256(nil) + if err != nil { + return "", err + } + file, err := os.Open(path) + if err != nil { + return "", err + } + defer file.Close() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil } func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.WaitGroup) { @@ -63,17 +102,21 @@ func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.Wait for i := 0; i < numThreads; i++ { wg.Add(1) - d.stdlog.Printf("Starting hashing thread #%d", i) go func(num int) { defer wg.Done() for { file, ok := <-C if !ok { - d.stdlog.Printf("Stopping hashing thread #%d", num) return } - d.stdlog.Printf(" - hashing: %s", file) - computeHash(file) + if hash, err := computeHash(file); err != nil { + d.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file)) + } else { + d.stdlog.Printf(" - hashed: %s", filepath.Base(file)) + d.filesMtx.Lock() + d.Files[hash] = file + d.filesMtx.Unlock() + } } }(i) } diff --git a/src/pool-import/main.go b/src/pool-import/main.go index 0a2ce7a..10a59ec 100644 --- a/src/pool-import/main.go +++ b/src/pool-import/main.go @@ -96,14 +96,25 @@ func main() { C <- syscall.SIGTERM }() + stdlog.Println("********************************************") + allFiles := make(FileMap) for _, root := range directories { - stdlog.Printf("hashing all the files in '%s'", root) + stdlog.Printf("*** hashing all the files in '%s'", root) + stdlog.Println("") md := NewMusicDir(root, stdlog) if err := md.ComputeHashes(); err != nil { return } + allFiles.Merge(md.Files) + stdlog.Println("") + stdlog.Println("******************************") } - stdlog.Printf("all directires hashed") + stdlog.Println("") + for hash, file := range allFiles { + stdlog.Printf("%s: %s", hash, file) + } + stdlog.Println("") + stdlog.Println("********************************************") }() <-C -- cgit v0.10.2