summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Pointner <equinox@helsinki.at>2017-01-25 22:57:06 (GMT)
committerChristian Pointner <equinox@helsinki.at>2017-01-25 22:57:06 (GMT)
commit9d1b81e5997ee9ec8cc5bfd16f6c62a40044006a (patch)
tree0f37df2bcc354601943a12ff97fb1ca9c31b9234
parentbdff8e99eb4062fbdc175da075634f063a5b8f42 (diff)
actually create file hasher
-rw-r--r--.gitignore1
-rw-r--r--Makefile6
-rw-r--r--src/pool-import/dir-hasher.go73
-rw-r--r--src/pool-import/main.go15
4 files changed, 76 insertions, 19 deletions
diff --git a/.gitignore b/.gitignore
index bc88ec7..ff7ecdf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ bin/
pkg/
src/github.com
src/code.helsinki.at
+src/golang.org
diff --git a/Makefile b/Makefile
index 27218b6..48e88d4 100644
--- a/Makefile
+++ b/Makefile
@@ -24,8 +24,10 @@ GOCMD := GOPATH=$(curdir) go
EXECUTEABLE := pool-import
-LIBS := "code.helsinki.at/rhrd-go/rhimport" \
- "code.helsinki.at/rhrd-go/rddb"
+LIBS := "code.helsinki.at/rhrd-go/rhimport" \
+ "code.helsinki.at/rhrd-go/rddb" \
+ "golang.org/x/crypto/blake2b"
+
.PHONY: getlibs updatelibs vet format build clean distclean
diff --git a/src/pool-import/dir-hasher.go b/src/pool-import/dir-hasher.go
index 6c1bf9b..aa204a0 100644
--- a/src/pool-import/dir-hasher.go
+++ b/src/pool-import/dir-hasher.go
@@ -22,18 +22,30 @@
package main
import (
+ "encoding/base64"
+ "io"
"log"
- "math/rand"
"os"
"path/filepath"
+ "strings"
"sync"
- "time"
+
+ "golang.org/x/crypto/blake2b"
)
+type FileMap map[string]string
+
+func (m FileMap) Merge(m2 FileMap) {
+ for key, value := range m2 {
+ m[key] = value
+ }
+}
+
type MusicDir struct {
- root string
- stdlog *log.Logger
- Files map[string]string
+ root string
+ stdlog *log.Logger
+ filesMtx sync.Mutex
+ Files FileMap
}
func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdlog *log.Logger) error {
@@ -45,17 +57,44 @@ func handleEntry(C chan<- string, path string, info os.FileInfo, err error, stdl
return nil
}
if !info.Mode().IsRegular() {
- stdlog.Printf(" - skipping special file: %s", path)
+ stdlog.Printf(" - skipping (special file): %s", info.Name())
return nil
}
- // TODO: check file extensions
- C <- path
+ switch strings.ToLower(filepath.Ext(path)) {
+ case ".flac":
+ fallthrough
+ case ".ogg":
+ fallthrough
+ case ".wav":
+ fallthrough
+ case ".mp3":
+ fallthrough
+ case ".aac":
+ fallthrough
+ case ".mp4":
+ fallthrough
+ case ".m4a":
+ C <- path
+ default:
+ stdlog.Printf(" - skipping (unknown extension): %s", info.Name())
+ }
return nil
}
-func computeHash(file string) string {
- time.Sleep(time.Duration(rand.Float64() * float64(5*time.Second))) // TODO: faking it is nice but the real hash would be nicer
- return ""
+func computeHash(path string) (string, error) {
+ hash, err := blake2b.New256(nil)
+ if err != nil {
+ return "", err
+ }
+ file, err := os.Open(path)
+ if err != nil {
+ return "", err
+ }
+ defer file.Close()
+ if _, err := io.Copy(hash, file); err != nil {
+ return "", err
+ }
+ return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil
}
func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.WaitGroup) {
@@ -63,17 +102,21 @@ func (d *MusicDir) collectHashes(C <-chan string, numThreads int) (wg *sync.Wait
for i := 0; i < numThreads; i++ {
wg.Add(1)
- d.stdlog.Printf("Starting hashing thread #%d", i)
go func(num int) {
defer wg.Done()
for {
file, ok := <-C
if !ok {
- d.stdlog.Printf("Stopping hashing thread #%d", num)
return
}
- d.stdlog.Printf(" - hashing: %s", file)
- computeHash(file)
+ if hash, err := computeHash(file); err != nil {
+ d.stdlog.Printf(" - skipping (%v): %s", err, filepath.Base(file))
+ } else {
+ d.stdlog.Printf(" - hashed: %s", filepath.Base(file))
+ d.filesMtx.Lock()
+ d.Files[hash] = file
+ d.filesMtx.Unlock()
+ }
}
}(i)
}
diff --git a/src/pool-import/main.go b/src/pool-import/main.go
index 0a2ce7a..10a59ec 100644
--- a/src/pool-import/main.go
+++ b/src/pool-import/main.go
@@ -96,14 +96,25 @@ func main() {
C <- syscall.SIGTERM
}()
+ stdlog.Println("********************************************")
+ allFiles := make(FileMap)
for _, root := range directories {
- stdlog.Printf("hashing all the files in '%s'", root)
+ stdlog.Printf("*** hashing all the files in '%s'", root)
+ stdlog.Println("")
md := NewMusicDir(root, stdlog)
if err := md.ComputeHashes(); err != nil {
return
}
+ allFiles.Merge(md.Files)
+ stdlog.Println("")
+ stdlog.Println("******************************")
}
- stdlog.Printf("all directires hashed")
+ stdlog.Println("")
+ for hash, file := range allFiles {
+ stdlog.Printf("%s: %s", hash, file)
+ }
+ stdlog.Println("")
+ stdlog.Println("********************************************")
}()
<-C