Skip to content

Commit e34e3b3

Browse files
authored
Merge pull request #19 from ZetaTwo/deb-importer
.deb archive importer
2 parents 5f93caf + cc897ee commit e34e3b3

19 files changed

+789
-1
lines changed

README.md

+8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
- [Setting up Cloud Spanner](#setting-up-cloud-spanner)
1717
- [Setting up importers](#setting-up-importers)
1818
- [TarGz](#targz)
19+
- [Deb](#deb)
1920
- [GCP](#gcp)
2021
- [Windows](#windows)
2122
- [WSUS](#wsus)
@@ -205,6 +206,13 @@ This is a simple importer that traverses repositories and looks for `.tar.gz` fi
205206

206207
1. `-targz_repo_path` which should point to the path on the local file system that contains `.tar.gz` files
207208

209+
#### Deb
210+
211+
This is very similar to the TarGz importer except that it looks for `.deb` packages. Once found it will hash the first and the last 10MB of the file to check if it was already processed. This is done to prevent hashing the whole file every time the repository is scanned for new sources. To use this importer you need to specify the following flag(s):
212+
213+
1. `-deb_repo_path` which should point to the path on the local file system that contains `.deb` files
214+
215+
208216
#### GCP
209217

210218
This importer can extract files from GCP disk [images](https://cloud.google.com/compute/docs/images). This is done in few steps:

go.mod

+6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ require (
1818
require (
1919
cloud.google.com/go v0.100.2 // indirect
2020
cloud.google.com/go/compute v1.6.1 // indirect
21+
github.com/DataDog/zstd v1.4.8 // indirect
2122
github.com/census-instrumentation/opencensus-proto v0.3.0 // indirect
2223
github.com/cespare/xxhash/v2 v2.1.1 // indirect
2324
github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4 // indirect
@@ -27,11 +28,16 @@ require (
2728
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
2829
github.com/golang/protobuf v1.5.2 // indirect
2930
github.com/googleapis/gax-go/v2 v2.3.0 // indirect
31+
github.com/kjk/lzma v0.0.0-20161016003348-3fd93898850d // indirect
32+
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
3033
go.opencensus.io v0.23.0 // indirect
34+
golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897 // indirect
3135
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 // indirect
3236
golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 // indirect
3337
golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 // indirect
3438
golang.org/x/text v0.3.7 // indirect
3539
golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect
3640
google.golang.org/appengine v1.6.7 // indirect
41+
pault.ag/go/debian v0.12.0 // indirect
42+
pault.ag/go/topsort v0.0.0-20160530003732-f98d2ad46e1a // indirect
3743
)

go.sum

+12
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
5858
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
5959
github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60=
6060
github.com/DATA-DOG/go-sqlmock v1.5.0/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
61+
github.com/DataDog/zstd v1.4.8 h1:Rpmta4xZ/MgZnriKNd24iZMhGpP5dvUcs/uqfBapKZY=
62+
github.com/DataDog/zstd v1.4.8/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
6163
github.com/Microsoft/go-winio v0.5.3-0.20220712145307-8fca75951feb h1:mYouFl1H94ZXkNVZ4/b4gQgaig4ch4G1f/oTWdTPhN8=
6264
github.com/Microsoft/go-winio v0.5.3-0.20220712145307-8fca75951feb/go.mod h1:9ZRWkpdsaDaHBql4MK5YereVcy6vkcO0xVhq5B1THlk=
6365
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
@@ -188,6 +190,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:
188190
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
189191
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
190192
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
193+
github.com/kjk/lzma v0.0.0-20161016003348-3fd93898850d h1:RnWZeH8N8KXfbwMTex/KKMYMj0FJRCF6tQubUuQ02GM=
194+
github.com/kjk/lzma v0.0.0-20161016003348-3fd93898850d/go.mod h1:phT/jsRPBAEqjAibu1BurrabCBNTYiVI+zbmyCZJY6Q=
191195
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
192196
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
193197
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
@@ -203,6 +207,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
203207
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
204208
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
205209
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
210+
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
211+
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
206212
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
207213
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
208214
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@@ -222,6 +228,8 @@ golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8U
222228
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
223229
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
224230
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
231+
golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897 h1:pLI5jrR7OSLijeIDcmRxNmw2api+jEfxLoykJVice/E=
232+
golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
225233
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
226234
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
227235
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -637,6 +645,10 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
637645
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
638646
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
639647
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
648+
pault.ag/go/debian v0.12.0 h1:b8ctSdBSGJ98NE1VLn06aSx70EUpczlP2qqSHEiYYJA=
649+
pault.ag/go/debian v0.12.0/go.mod h1:UbnMr3z/KZepjq7VzbYgBEfz8j4+Pyrm2L5X1fzhy/k=
650+
pault.ag/go/topsort v0.0.0-20160530003732-f98d2ad46e1a h1:WwS7vlB5H2AtwKj1jsGwp2ZLud1x6WXRXh2fXsRqrcA=
651+
pault.ag/go/topsort v0.0.0-20160530003732-f98d2ad46e1a/go.mod h1:INqx0ClF7kmPAMk2zVTX8DRnhZ/yaA/Mg52g8KFKE7k=
640652
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
641653
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
642654
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=

hashr.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/google/hashr/core/hashr"
2727
gcpExporter "github.com/google/hashr/exporters/gcp"
2828
postgresExporter "github.com/google/hashr/exporters/postgres"
29+
"github.com/google/hashr/importers/deb"
2930
"github.com/google/hashr/importers/gcp"
3031
"github.com/google/hashr/importers/targz"
3132
"github.com/google/hashr/importers/windows"
@@ -41,7 +42,7 @@ import (
4142

4243
var (
4344
processingWorkerCount = flag.Int("processing_worker_count", 2, "Number of processing workers.")
44-
importersToRun = flag.String("importers", strings.Join([]string{}, ","), fmt.Sprintf("Importers to be run: %s,%s,%s,%s", gcp.RepoName, targz.RepoName, windows.RepoName, wsus.RepoName))
45+
importersToRun = flag.String("importers", strings.Join([]string{}, ","), fmt.Sprintf("Importers to be run: %s,%s,%s,%s,%s", gcp.RepoName, targz.RepoName, windows.RepoName, wsus.RepoName, deb.RepoName))
4546
exportersToRun = flag.String("exporters", strings.Join([]string{}, ","), fmt.Sprintf("Exporters to be run: %s,%s", gcpExporter.Name, postgresExporter.Name))
4647
jobStorage = flag.String("storage", "", "Storage that should be used for storing data about processing jobs, can have one of the two values: postgres, cloudspanner")
4748
cacheDir = flag.String("cache_dir", "/tmp/", "Path to cache dir used to store local cache.")
@@ -69,6 +70,8 @@ var (
6970
windowsRepoPath = flag.String("windows_iso_repo_path", "", "Path to Windows ISO repository.")
7071
// tarGz importer flags
7172
tarGzRepoPath = flag.String("targz_repo_path", "", "Path to TarGz repository.")
73+
// deb importer flags
74+
debRepoPath = flag.String("deb_repo_path", "", "Path to Deb repository.")
7275
)
7376

7477
func main() {
@@ -123,6 +126,8 @@ func main() {
123126
}
124127
case targz.RepoName:
125128
importers = append(importers, targz.NewRepo(*tarGzRepoPath))
129+
case deb.RepoName:
130+
importers = append(importers, deb.NewRepo(*debRepoPath))
126131
}
127132
}
128133

0 commit comments

Comments
 (0)