summaryrefslogtreecommitdiff
path: root/src/file-hasher/hasher.go
blob: afc35fd2b21b0588dc45246bad083cedb8f6b897 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
//
//  pool-import
//
//  Copyright (C) 2016 Christian Pointner <equinox@helsinki.at>
//
//  This file is part of pool-import.
//
//  pool-import is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  any later version.
//
//  pool-import is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with pool-import. If not, see <http://www.gnu.org/licenses/>.
//

package main

import (
	"encoding/base64"
	"io"
	"log"
	"os"
	"path/filepath"
	"sync"

	"golang.org/x/crypto/blake2b"
)

type FileMap map[string]string

func (m FileMap) Merge(m2 FileMap) {
	for key, value := range m2 {
		m[key] = value
	}
}

type Hasher struct {
	numThreads uint
	stdlog     *log.Logger
	filesMtx   sync.Mutex
	Files      FileMap
}

func computeHash(path string) (string, error) {
	hash, err := blake2b.New256(nil)
	if err != nil {
		return "", err
	}
	file, err := os.Open(path)
	if err != nil {
		return "", err
	}
	defer file.Close()
	if _, err := io.Copy(hash, file); err != nil {
		return "", err
	}
	return base64.URLEncoding.EncodeToString(hash.Sum(nil)), nil
}

func (h *Hasher) collectHashes(C <-chan string) (wg *sync.WaitGroup) {
	wg = &sync.WaitGroup{}

	for i := uint(0); i < h.numThreads; i++ {
		wg.Add(1)
		go func(num uint) {
			defer wg.Done()
			for {
				file, ok := <-C
				if !ok {
					return
				}
				if hash, err := computeHash(file); err != nil {
					h.stdlog.Printf(" - skipping (%v):  %s", err, filepath.Base(file))
				} else {
					h.stdlog.Printf(" - hashed:  %s", filepath.Base(file))
					h.filesMtx.Lock()
					h.Files[hash] = file
					h.filesMtx.Unlock()
				}
			}
		}(i)
	}
	return
}

func (h *Hasher) ComputeHashes(w Walker) (err error) {
	C := make(chan string, 10)

	wg := h.collectHashes(C)

	err = w.Walk(C)
	if err != nil {
		return
	}

	wg.Wait()
	return
}

func NewHasher(numThreads uint, stdlog *log.Logger) (h *Hasher) {
	h = &Hasher{numThreads: numThreads, stdlog: stdlog}
	h.Files = make(map[string]string)
	return
}