Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fall back to license from package if not present in versioned package #109

Merged
merged 8 commits into from
Feb 10, 2025
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ You can also return raw JSON information about a specific repository:
parlay ecosystems repo https://github.com/open-policy-agent/conftest
```

### License data

parlay enriches components and packages with their license information from ecosyste.ms on a best-effort basis. It prefers the license data of the package version at hand; however, it may not always be possible to retrieve the license for a specific version (see [ecosyste.ms issue here](https://github.com/ecosyste-ms/packages/issues/1027) for more info). In this case, parlay will fall back to enriching with the license data of the package's latest release. In rare cases — where the licensing model of a package changed over time — this may result in license data inaccuracies.


## Enriching with Snyk

Expand Down
16 changes: 13 additions & 3 deletions internal/utils/spdx.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,19 @@ func GetPurlFromSPDXPackage(pkg *spdx_2_3.Package) (*packageurl.PackageURL, erro
return &purl, nil
}

func GetSPDXLicenseExpressionFromEcosystemsLicense(data *packages.VersionWithDependencies) string {
if data == nil || data.Licenses == nil || *data.Licenses == "" {
func GetLicensesFromEcosystemsLicense(pkgVersionData *packages.VersionWithDependencies, pkgData *packages.Package) []string {
if pkgVersionData != nil && pkgVersionData.Licenses != nil && *pkgVersionData.Licenses != "" {
return strings.Split(*pkgVersionData.Licenses, ",")
} else if pkgData != nil && len(pkgData.NormalizedLicenses) > 0 {
return pkgData.NormalizedLicenses
}
return nil
}

func GetLicenseExpressionFromEcosystemsLicense(pkgVersionData *packages.VersionWithDependencies, pkgData *packages.Package) string {
licenses := GetLicensesFromEcosystemsLicense(pkgVersionData, pkgData)
if len(licenses) == 0 {
return ""
}
return fmt.Sprintf("(%s)", strings.Join(strings.Split(*data.Licenses, ","), " OR "))
return fmt.Sprintf("(%s)", strings.Join(licenses, " OR "))
}
41 changes: 32 additions & 9 deletions internal/utils/spdx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,52 @@ import (

func TestGetSPDXLicenseExpressionFromEcosystemsLicense(t *testing.T) {
assert := assert.New(t)
licenses := "GPLv2,MIT"
data := packages.VersionWithDependencies{Licenses: &licenses}
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(&data)
versionedLicenses := "GPLv2,MIT"
pkgVersionData := packages.VersionWithDependencies{Licenses: &versionedLicenses}
latestLicenses := []string{"Apache-2.0"}
pkgData := packages.Package{NormalizedLicenses: latestLicenses}
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
assert.Equal("(GPLv2 OR MIT)", expression)
}

func TestGetSPDXLicenseExpressionFromEcosystemsLicense_NoData(t *testing.T) {
assert := assert.New(t)
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(nil)
expression := utils.GetLicenseExpressionFromEcosystemsLicense(nil, nil)
assert.Equal("", expression)
}

func TestGetSPDXLicenseExpressionFromEcosystemsLicense_NoVersionedData(t *testing.T) {
assert := assert.New(t)
pkgVersionData := packages.VersionWithDependencies{}
latestLicenses := []string{"Apache-2.0"}
pkgData := packages.Package{NormalizedLicenses: latestLicenses}
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
assert.Equal("(Apache-2.0)", expression)
}

func TestGetSPDXLicenseExpressionFromEcosystemsLicense_NoLatestData(t *testing.T) {
assert := assert.New(t)
versionedLicenses := "GPLv2,MIT"
pkgVersionData := packages.VersionWithDependencies{Licenses: &versionedLicenses}
pkgData := packages.Package{}
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
assert.Equal("(GPLv2 OR MIT)", expression)
}

func TestGetSPDXLicenseExpressionFromEcosystemsLicense_NoLicenses(t *testing.T) {
assert := assert.New(t)
data := packages.VersionWithDependencies{}
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(&data)
pkgVersionData := packages.VersionWithDependencies{}
pkgData := packages.Package{}
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
assert.Equal("", expression)
}

func TestGetSPDXLicenseExpressionFromEcosystemsLicense_EmptyLicenses(t *testing.T) {
assert := assert.New(t)
licenses := ""
data := packages.VersionWithDependencies{Licenses: &licenses}
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(&data)
versionedLicenses := ""
pkgVersionData := packages.VersionWithDependencies{Licenses: &versionedLicenses}
latestLicenses := []string{}
pkgData := packages.Package{NormalizedLicenses: latestLicenses}
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
assert.Equal("", expression)
}
8 changes: 4 additions & 4 deletions lib/ecosystems/enrich_cyclonedx.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
)

type cdxPackageEnricher = func(*cdx.Component, *packages.Package)
type cdxPackageVersionEnricher = func(*cdx.Component, *packages.VersionWithDependencies)
type cdxPackageVersionEnricher = func(*cdx.Component, *packages.VersionWithDependencies, *packages.Package)

var cdxPackageEnrichers = []cdxPackageEnricher{
enrichCDXDescription,
Expand All @@ -58,8 +58,8 @@ func enrichCDXDescription(comp *cdx.Component, data *packages.Package) {
}
}

func enrichCDXLicense(comp *cdx.Component, data *packages.VersionWithDependencies) {
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(data)
func enrichCDXLicense(comp *cdx.Component, pkgVersionData *packages.VersionWithDependencies, pkgData *packages.Package) {
expression := utils.GetLicenseExpressionFromEcosystemsLicense(pkgVersionData, pkgData)
if expression != "" {
licenses := cdx.LicenseChoice{Expression: expression}
comp.Licenses = &cdx.Licenses{licenses}
Expand Down Expand Up @@ -248,7 +248,7 @@ func enrichCDX(bom *cdx.BOM, logger *zerolog.Logger) {
}

for _, enrichFunc := range cdxPackageVersionEnrichers {
enrichFunc(comp, packageVersionResp.JSON200)
enrichFunc(comp, packageVersionResp.JSON200, packageResp.JSON200)
}

}(comps[i])
Expand Down
47 changes: 43 additions & 4 deletions lib/ecosystems/enrich_cyclonedx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,54 @@ func TestEnrichLicense(t *testing.T) {
Name: "cyclonedx-go",
Version: "v0.3.0",
}
lic := "BSD-3-Clause"
pack := &packages.VersionWithDependencies{
Licenses: &lic,
versionedLicenses := "BSD-3-Clause"
pkgVersionData := &packages.VersionWithDependencies{Licenses: &versionedLicenses}
latestLicenses := []string{"Apache-2.0"}
pkgData := &packages.Package{NormalizedLicenses: latestLicenses}

enrichCDXLicense(component, pkgVersionData, pkgData)

licenses := *component.Licenses
comp := cdx.LicenseChoice(cdx.LicenseChoice{Expression: "(BSD-3-Clause)"})
assert.Equal(t, 1, len(licenses))
assert.Equal(t, comp, licenses[0])
}

func TestEnrichLicenseNoVersionedLicense(t *testing.T) {
component := &cdx.Component{
Type: cdx.ComponentTypeLibrary,
Name: "cyclonedx-go",
Version: "v0.3.0",
}
versionedLicenses := ""
pkgVersionData := &packages.VersionWithDependencies{Licenses: &versionedLicenses}
latestLicenses := []string{"Apache-2.0"}
pkgData := &packages.Package{NormalizedLicenses: latestLicenses}

enrichCDXLicense(component, pkgVersionData, pkgData)

licenses := *component.Licenses
comp := cdx.LicenseChoice(cdx.LicenseChoice{Expression: "(Apache-2.0)"})
assert.Equal(t, 1, len(licenses))
assert.Equal(t, comp, licenses[0])
}

func TestEnrichLicenseNoLatestLicense(t *testing.T) {
component := &cdx.Component{
Type: cdx.ComponentTypeLibrary,
Name: "cyclonedx-go",
Version: "v0.3.0",
}
versionedLicenses := "BSD-3-Clause"
pkgVersionData := &packages.VersionWithDependencies{Licenses: &versionedLicenses}
latestLicenses := []string{""}
pkgData := &packages.Package{NormalizedLicenses: latestLicenses}

enrichCDXLicense(component, pack)
enrichCDXLicense(component, pkgVersionData, pkgData)

licenses := *component.Licenses
comp := cdx.LicenseChoice(cdx.LicenseChoice{Expression: "(BSD-3-Clause)"})
assert.Equal(t, 1, len(licenses))
assert.Equal(t, comp, licenses[0])
}

Expand Down
11 changes: 6 additions & 5 deletions lib/ecosystems/enrich_spdx.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package ecosystems

import (
"errors"
"strings"

"github.com/package-url/packageurl-go"
"github.com/rs/zerolog"
Expand Down Expand Up @@ -64,7 +65,7 @@ func enrichSPDX(bom *spdx.Document, logger *zerolog.Logger) {
continue
}

enrichSPDXLicense(pkg, pkgVersionData)
enrichSPDXLicense(pkg, pkgVersionData, pkgData)
}
}

Expand Down Expand Up @@ -96,10 +97,10 @@ func enrichSPDXSupplier(pkg *v2_3.Package, data *packages.Package) {
}
}

func enrichSPDXLicense(pkg *v2_3.Package, data *packages.VersionWithDependencies) {
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(data)
if expression != "" {
pkg.PackageLicenseConcluded = *data.Licenses
func enrichSPDXLicense(pkg *v2_3.Package, pkgVersionData *packages.VersionWithDependencies, pkgData *packages.Package) {
licenses := utils.GetLicensesFromEcosystemsLicense(pkgVersionData, pkgData)
if len(licenses) > 0 {
pkg.PackageLicenseConcluded = strings.Join(licenses, ",")
}
}

Expand Down
149 changes: 108 additions & 41 deletions lib/ecosystems/enrich_spdx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package ecosystems

import (
"bytes"
"encoding/json"
"net/http"
"testing"

Expand All @@ -31,34 +32,49 @@ import (
"github.com/snyk/parlay/lib/sbom"
)

func TestEnrichSBOM_SPDX(t *testing.T) {
func parseJson(t *testing.T, jsonStr string) map[string]any {
t.Helper()
var result map[string]any
require.NoError(t, json.Unmarshal([]byte(jsonStr), &result))
return result
}

func setupHttpmock(t *testing.T, packageVersionsResponse, packageResponse *string) {
t.Helper()
httpmock.Activate()
defer httpmock.DeactivateAndReset()

httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries/.*/packages/.*/versions`,
func(r *http.Request) (*http.Response, error) {
return httpmock.NewJsonResponse(200, map[string]interface{}{
// This is the license we expect to see for the specific package version
"licenses": "MIT",
})
},
)
httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries`,
func(req *http.Request) (*http.Response, error) {
return httpmock.NewJsonResponse(200, map[string]interface{}{
"description": "description",
"normalized_licenses": []string{
// This license should be ignored as it corresponds to the latest version of the package
"BSD-3-Clause",
},
"homepage": "https://github.com/spdx/tools-golang",
"repo_metadata": map[string]interface{}{
"owner_record": map[string]interface{}{
"name": "Acme Corp",
},
},
if packageVersionsResponse != nil {
httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries/.*/packages/.*/versions`,
func(r *http.Request) (*http.Response, error) {
return httpmock.NewJsonResponse(200, parseJson(t, *packageVersionsResponse))
},
)
}

if packageResponse != nil {
httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries`,
func(req *http.Request) (*http.Response, error) {
return httpmock.NewJsonResponse(200, parseJson(t, *packageResponse))
})
})
}
}

func TestEnrichSBOM_SPDX(t *testing.T) {
packageVersionResponse := `{
"licenses": "MIT"
}`
packageResponse := `{
"description": "description",
"normalized_licenses": ["BSD-3-Clause"],
"homepage": "https://github.com/spdx/tools-golang",
"repo_metadata": {
"owner_record": {
"name": "Acme Corp"
}
}
}`
setupHttpmock(t, &packageVersionResponse, &packageResponse)
defer httpmock.DeactivateAndReset()

doc, err := sbom.DecodeSBOMDocument([]byte(`{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT"}`))
require.NoError(t, err)
Expand Down Expand Up @@ -100,25 +116,76 @@ func TestEnrichSBOM_SPDX(t *testing.T) {
require.NoError(t, doc.Encode(buf))
}

func TestEnrichSBOM_SPDX_NoSupplierName(t *testing.T) {
httpmock.Activate()
func TestEnrichSBOM_MissingVersionedLicense(t *testing.T) {
packageVersionResponse := `{
"licenses": ""
}`
packageResponse := `{
"description": "description",
"normalized_licenses": ["BSD-3-Clause", "Apache-2.0"],
"homepage": "https://github.com/spdx/tools-golang",
"repo_metadata": {
"owner_record": {
"name": "Acme Corp"
}
}
}`
setupHttpmock(t, &packageVersionResponse, &packageResponse)
defer httpmock.DeactivateAndReset()

httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries`,
func(req *http.Request) (*http.Response, error) {
return httpmock.NewJsonResponse(200, map[string]interface{}{
"description": "description",
"normalized_licenses": []string{
"BSD-3-Clause",
},
"homepage": "https://github.com/spdx/tools-golang",
"repo_metadata": map[string]interface{}{
"owner_record": map[string]interface{}{
"name": "",
},
doc, err := sbom.DecodeSBOMDocument([]byte(`{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT"}`))
require.NoError(t, err)

bom, ok := doc.BOM.(*v2_3.Document)
require.True(t, ok)

bom.Packages = []*v2_3.Package{
{
PackageSPDXIdentifier: "pkg:golang/github.com/spdx/tools-golang@v0.5.2",
PackageName: "github.com/spdx/tools-golang",
PackageVersion: "v0.5.2",
PackageExternalReferences: []*v2_3.PackageExternalReference{
{
Category: common.CategoryPackageManager,
RefType: "purl",
Locator: "pkg:golang/github.com/spdx/tools-golang@v0.5.2",
},
})
})
},
},
}
logger := zerolog.Nop()

EnrichSBOM(doc, &logger)

pkgs := bom.Packages

assert.Equal(t, "description", pkgs[0].PackageDescription)
assert.Equal(t, "BSD-3-Clause,Apache-2.0", pkgs[0].PackageLicenseConcluded)
assert.Equal(t, "https://github.com/spdx/tools-golang", pkgs[0].PackageHomePage)
assert.Equal(t, "Organization", pkgs[0].PackageSupplier.SupplierType)
assert.Equal(t, "Acme Corp", pkgs[0].PackageSupplier.Supplier)

httpmock.GetTotalCallCount()
calls := httpmock.GetCallCountInfo()
assert.Equal(t, len(pkgs), calls[`GET =~^https://packages.ecosyste.ms/api/v1/registries`])

buf := bytes.NewBuffer(nil)
require.NoError(t, doc.Encode(buf))
}

func TestEnrichSBOM_SPDX_NoSupplierName(t *testing.T) {
packageResponse := `{
"description": "description",
"normalized_licenses": ["BSD-3-Clause"],
"homepage": "https://github.com/spdx/tools-golang",
"repo_metadata": {
"owner_record": {
"name": ""
}
}
}`
setupHttpmock(t, nil, &packageResponse)
defer httpmock.DeactivateAndReset()

doc, err := sbom.DecodeSBOMDocument([]byte(`{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT"}`))
require.NoError(t, err)
Expand Down