From d58d65d7d731c48b3d68fa6f1408987f3ef0769b Mon Sep 17 00:00:00 2001 From: Gareth Jones Date: Fri, 4 Apr 2025 07:39:20 +1300 Subject: [PATCH] feat: support extracting `pylock.toml` files --- .../language/python/pylock/pylock.go | 111 +++++++++++++ .../language/python/pylock/pylock_test.go | 151 ++++++++++++++++++ .../python/pylock/testdata/example.toml | 41 +++++ .../python/pylock/testdata/not-toml.txt | 1 + 4 files changed, 304 insertions(+) create mode 100644 extractor/filesystem/language/python/pylock/pylock.go create mode 100644 extractor/filesystem/language/python/pylock/pylock_test.go create mode 100644 extractor/filesystem/language/python/pylock/testdata/example.toml create mode 100644 extractor/filesystem/language/python/pylock/testdata/not-toml.txt diff --git a/extractor/filesystem/language/python/pylock/pylock.go b/extractor/filesystem/language/python/pylock/pylock.go new file mode 100644 index 00000000..3114d52d --- /dev/null +++ b/extractor/filesystem/language/python/pylock/pylock.go @@ -0,0 +1,111 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pylock extracts pylock.toml files +package pylock + +import ( + "context" + "fmt" + "path/filepath" + "regexp" + + "github.com/BurntSushi/toml" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/internal/pypipurl" + "github.com/google/osv-scalibr/plugin" + "github.com/google/osv-scalibr/purl" +) + +const ( + // Name is the unique name of this extractor. + Name = "python/pylock" +) + +type pylockPackage struct { + Name string `toml:"name"` + Version string `toml:"version"` +} + +type pylockLockfile struct { + Version string `toml:"lock-version"` + Packages []pylockPackage `toml:"packages"` +} + +// Extractor extracts python packages from pylock.toml files. +type Extractor struct{} + +var _ filesystem.Extractor = Extractor{} + +// New returns a new instance of the extractor. +func New() filesystem.Extractor { return &Extractor{} } + +// Name of the extractor +func (e Extractor) Name() string { return Name } + +// Version of the extractor +func (e Extractor) Version() int { return 0 } + +// Requirements of the extractor +func (e Extractor) Requirements() *plugin.Capabilities { + return &plugin.Capabilities{} +} + +var ( + pylockFilePattern = regexp.MustCompile(`^pylock\.([^.]+)\.toml$`) +) + +// FileRequired returns true if the specified file matches pylock lockfile patterns +func (e Extractor) FileRequired(api filesystem.FileAPI) bool { + base := filepath.Base(api.Path()) + + return base == "pylock.toml" || pylockFilePattern.MatchString(filepath.Base(api.Path())) +} + +// Extract extracts packages from pylock.toml files passed through the scan input. +func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) { + var parsedLockfile *pylockLockfile + + _, err := toml.NewDecoder(input.Reader).Decode(&parsedLockfile) + + if err != nil { + return nil, fmt.Errorf("could not extract from %s: %w", input.Path, err) + } + + packages := make([]*extractor.Inventory, 0, len(parsedLockfile.Packages)) + + for _, lockPackage := range parsedLockfile.Packages { + pkgDetails := &extractor.Inventory{ + Name: lockPackage.Name, + Version: lockPackage.Version, + Locations: []string{input.Path}, + } + packages = append(packages, pkgDetails) + } + + return packages, nil +} + +// ToPURL converts an inventory created by this extractor into a PURL. +func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL { + return pypipurl.MakePackageURL(i) +} + +// Ecosystem returns the OSV ecosystem ('PyPI') of the software extracted by this extractor. +func (e Extractor) Ecosystem(i *extractor.Inventory) string { + return "PyPI" +} + +var _ filesystem.Extractor = Extractor{} diff --git a/extractor/filesystem/language/python/pylock/pylock_test.go b/extractor/filesystem/language/python/pylock/pylock_test.go new file mode 100644 index 00000000..eaf66b6a --- /dev/null +++ b/extractor/filesystem/language/python/pylock/pylock_test.go @@ -0,0 +1,151 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pylock_test + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pylock" + "github.com/google/osv-scalibr/extractor/filesystem/simplefileapi" + "github.com/google/osv-scalibr/testing/extracttest" +) + +func TestExtractor_FileRequired(t *testing.T) { + tests := []struct { + name string + inputPath string + want bool + }{ + { + name: "", + inputPath: "", + want: false, + }, + { + name: "", + inputPath: "pylock.toml", + want: true, + }, + { + name: "", + inputPath: "pylock.spam.toml", + want: true, + }, + { + name: "", + inputPath: "pylock.beans.toml", + want: true, + }, + { + name: "", + inputPath: "PYLOCK.spam.toml", + want: false, + }, + { + name: "", + inputPath: "path/to/my/pylock.toml", + want: true, + }, + { + name: "", + inputPath: "path/to/my/pylock.spam.toml", + want: true, + }, + { + name: "", + inputPath: "path/to/my/pylock.toml/file", + want: false, + }, + { + name: "", + inputPath: "path/to/my/pylock.toml.file", + want: false, + }, + { + name: "", + inputPath: "path.to.my.pylock.toml", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + e := pylock.Extractor{} + got := e.FileRequired(simplefileapi.New(tt.inputPath, nil)) + if got != tt.want { + t.Errorf("FileRequired(%q, FileInfo) got = %v, want %v", tt.inputPath, got, tt.want) + } + }) + } +} + +func TestExtractor_Extract(t *testing.T) { + tests := []extracttest.TestTableEntry{ + { + Name: "invalid toml", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/not-toml.txt", + }, + WantErr: extracttest.ContainsErrStr{Str: "could not extract from"}, + WantInventory: nil, + }, + { + Name: "example", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/example.toml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "attrs", + Version: "25.1.0", + Locations: []string{"testdata/example.toml"}, + }, + { + Name: "cattrs", + Version: "24.1.2", + Locations: []string{"testdata/example.toml"}, + }, + { + Name: "numpy", + Version: "2.2.3", + Locations: []string{"testdata/example.toml"}, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + extr := pylock.Extractor{} + + scanInput := extracttest.GenerateScanInputMock(t, tt.InputConfig) + defer extracttest.CloseTestScanInput(t, scanInput) + + got, err := extr.Extract(context.Background(), &scanInput) + + if diff := cmp.Diff(tt.WantErr, err, cmpopts.EquateErrors()); diff != "" { + t.Errorf("%s.Extract(%q) error diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + return + } + + if diff := cmp.Diff(tt.WantInventory, got, cmpopts.SortSlices(extracttest.InventoryCmpLess)); diff != "" { + t.Errorf("%s.Extract(%q) diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + } + }) + } +} diff --git a/extractor/filesystem/language/python/pylock/testdata/example.toml b/extractor/filesystem/language/python/pylock/testdata/example.toml new file mode 100644 index 00000000..40f00574 --- /dev/null +++ b/extractor/filesystem/language/python/pylock/testdata/example.toml @@ -0,0 +1,41 @@ +lock-version = '1.0' +environments = ["sys_platform == 'win32'", "sys_platform == 'linux'"] +requires-python = '==3.12' +created-by = 'mousebender' + +[[packages]] +name = 'attrs' +version = '25.1.0' +requires-python = '>=3.8' +wheels = [ + {name = 'attrs-25.1.0-py3-none-any.whl', upload-time = 2025-01-25T11:30:10.164985+00:00, url = 'https://files.pythonhosted.org/packages/fc/30/d4986a882011f9df997a55e6becd864812ccfcd821d64aac8570ee39f719/attrs-25.1.0-py3-none-any.whl', size = 63152, hashes = {sha256 = 'c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a'}}, +] +[[packages.attestation-identities]] +environment = 'release-pypi' +kind = 'GitHub' +repository = 'python-attrs/attrs' +workflow = 'pypi-package.yml' + +[[packages]] +name = 'cattrs' +version = '24.1.2' +requires-python = '>=3.8' +dependencies = [ + {name = 'attrs'}, +] +wheels = [ + {name = 'cattrs-24.1.2-py3-none-any.whl', upload-time = 2024-09-22T14:58:34.812643+00:00, url = 'https://files.pythonhosted.org/packages/c8/d5/867e75361fc45f6de75fe277dd085627a9db5ebb511a87f27dc1396b5351/cattrs-24.1.2-py3-none-any.whl', size = 66446, hashes = {sha256 = '67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0'}}, +] + +[[packages]] +name = 'numpy' +version = '2.2.3' +requires-python = '>=3.10' +wheels = [ + {name = 'numpy-2.2.3-cp312-cp312-win_amd64.whl', upload-time = 2025-02-13T16:51:21.821880+00:00, url = 'https://files.pythonhosted.org/packages/42/6e/55580a538116d16ae7c9aa17d4edd56e83f42126cb1dfe7a684da7925d2c/numpy-2.2.3-cp312-cp312-win_amd64.whl', size = 12626357, hashes = {sha256 = '83807d445817326b4bcdaaaf8e8e9f1753da04341eceec705c001ff342002e5d'}}, + {name = 'numpy-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl', upload-time = 2025-02-13T16:50:00.079662+00:00, url = 'https://files.pythonhosted.org/packages/39/04/78d2e7402fb479d893953fb78fa7045f7deb635ec095b6b4f0260223091a/numpy-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl', size = 16116679, hashes = {sha256 = '3b787adbf04b0db1967798dba8da1af07e387908ed1553a0d6e74c084d1ceafe'}}, +] + +[tool.mousebender] +command = ['.', 'lock', '--platform', 'cpython3.12-windows-x64', '--platform', 'cpython3.12-manylinux2014-x64', 'cattrs', 'numpy'] +run-on = 2025-03-06T12:28:57.760769 diff --git a/extractor/filesystem/language/python/pylock/testdata/not-toml.txt b/extractor/filesystem/language/python/pylock/testdata/not-toml.txt new file mode 100644 index 00000000..ddd3cb93 --- /dev/null +++ b/extractor/filesystem/language/python/pylock/testdata/not-toml.txt @@ -0,0 +1 @@ +this is not valid toml! (I think)