From 3d0c2afb906cbb54a70848295b88e83ad73f78e7 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Fri, 8 Nov 2024 12:06:44 -0500 Subject: [PATCH 1/2] specifications: create living copy of PEP 740 Signed-off-by: William Woodruff --- source/conf.py | 1 + .../index-hosted-attestations.rst | 368 ++++++++++++++++++ .../section-package-indices.rst | 1 + .../specifications/simple-repository-api.rst | 17 +- 4 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 source/specifications/index-hosted-attestations.rst diff --git a/source/conf.py b/source/conf.py index c777550ce..cd0f41302 100644 --- a/source/conf.py +++ b/source/conf.py @@ -193,6 +193,7 @@ "tox": ("https://tox.wiki/en/latest/", None), "twine": ("https://twine.readthedocs.io/en/stable/", None), "virtualenv": ("https://virtualenv.pypa.io/en/stable/", None), + "warehouse": ("https://warehouse.pypa.io/", None), } # -- Options for todo extension -------------------------------------------------------- diff --git a/source/specifications/index-hosted-attestations.rst b/source/specifications/index-hosted-attestations.rst new file mode 100644 index 000000000..395ffc10c --- /dev/null +++ b/source/specifications/index-hosted-attestations.rst @@ -0,0 +1,368 @@ + +.. _index-hosted-attestations: + +========================= +Index hosted attestations +========================= + +.. note:: This specification was originally defined in :pep:`740`. + +.. note:: + + :pep:`740` includes changes to the HTML and JSON index APIs. + These changes are documented in the :ref:`simple-repository-api` + under :ref:`simple-repository-api-base` and :ref:`json-serialization`. + +Specification +============= + +.. _upload-endpoint: + +Upload endpoint changes +----------------------- + +.. important:: + + The "legacy" upload API is not standardized. + See :ref:`Warehouse's Upload API documentation + ` for how attestations are uploaded. + +.. _attestation-object: + +Attestation objects +------------------- + +An attestation object is a JSON object with several required keys; applications +or signers may include additional keys so long as all explicitly +listed keys are provided. The required layout of an attestation +object is provided as pseudocode below. + +.. code-block:: python + + @dataclass + class Attestation: + version: Literal[1] + """ + The attestation object's version, which is always 1. + """ + + verification_material: VerificationMaterial + """ + Cryptographic materials used to verify `envelope`. + """ + + envelope: Envelope + """ + The enveloped attestation statement and signature. + """ + + + @dataclass + class Envelope: + statement: bytes + """ + The attestation statement. + + This is represented as opaque bytes on the wire (encoded as base64), + but it MUST be an JSON in-toto v1 Statement. + """ + + signature: bytes + """ + A signature for the above statement, encoded as base64. + """ + + @dataclass + class VerificationMaterial: + certificate: str + """ + The signing certificate, as `base64(DER(cert))`. + """ + + transparency_entries: list[object] + """ + One or more transparency log entries for this attestation's signature + and certificate. + """ + +A full data model for each object in ``transparency_entries`` is provided in +:ref:`appendix`. Attestation objects **SHOULD** include one or more +transparency log entries, and **MAY** include additional keys for other +sources of signed time (such as an :rfc:`3161` Time Stamping Authority or a +`Roughtime `__ server). + +Attestation objects are versioned; this PEP specifies version 1. Each version +is tied to a single cryptographic suite to minimize unnecessary cryptographic +agility. In version 1, the suite is as follows: + +* Certificates are specified as X.509 certificates, and comply with the + profile in :rfc:`5280`. +* The message signature algorithm is ECDSA, with the P-256 curve for public keys + and SHA-256 as the cryptographic digest function. + +Future PEPs may change this suite (and the overall shape of the attestation +object) by selecting a new version number. + +.. _payload-and-signature-generation: + +Attestation statement and signature generation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The *attestation statement* is the actual claim that is cryptographically signed +over within the attestation object (i.e., the ``envelope.statement``). + +The attestation statement is encoded as a +`v1 in-toto Statement object `__, +in JSON form. When serialized the statement is treated as an opaque binary blob, +avoiding the need for canonicalization. + +In addition to being a v1 in-toto Statement, the attestation statement is constrained +in the following ways: + +* The in-toto ``subject`` **MUST** contain only a single subject. +* ``subject[0].name`` is the distribution's filename, which **MUST** be + a valid :ref:`source distribution ` or + :ref:`wheel distribution ` filename. +* ``subject[0].digest`` **MUST** contain a SHA-256 digest. Other digests + **MAY** be present. The digests **MUST** be represented as hexadecimal strings. +* The following ``predicateType`` values are supported: + + * `SLSA Provenance `__: ``https://slsa.dev/provenance/v1`` + * `PyPI Publish Attestation `__: ``https://docs.pypi.org/attestations/publish/v1`` + +The signature over this statement is constructed using the +`v1 DSSE signature protocol `__, +with a ``PAYLOAD_TYPE`` of ``application/vnd.in-toto+json`` and a ``PAYLOAD_BODY`` of the JSON-encoded +statement above. No other ``PAYLOAD_TYPE`` is permitted. + +.. _provenance-object: + +Provenance objects +------------------ + +The index will serve uploaded attestations along with metadata that can assist +in verifying them in the form of JSON serialized objects. + +These *provenance objects* will be available via both the Simple Index +and JSON-based Simple API as described above, and will have the following layout: + +.. code-block:: json + + { + "version": 1, + "attestation_bundles": [ + { + "publisher": { + "kind": "important-ci-service", + "claims": {}, + "vendor-property": "foo", + "another-property": 123 + }, + "attestations": [ + { /* attestation 1 ... */ }, + { /* attestation 2 ... */ } + ] + } + ] + } + +or, as pseudocode: + +.. code-block:: python + + @dataclass + class Publisher: + kind: string + """ + The kind of Trusted Publisher. + """ + + claims: object | None + """ + Any context-specific claims retained by the index during Trusted Publisher + authentication. + """ + + _rest: object + """ + Each publisher object is open-ended, meaning that it MAY contain additional + fields beyond the ones specified explicitly above. This field signals that, + but is not itself present. + """ + + @dataclass + class AttestationBundle: + publisher: Publisher + """ + The publisher associated with this set of attestations. + """ + + attestations: list[Attestation] + """ + The set of attestations included in this bundle. + """ + + @dataclass + class Provenance: + version: Literal[1] + """ + The provenance object's version, which is always 1. + """ + + attestation_bundles: list[AttestationBundle] + """ + One or more attestation "bundles". + """ + +* ``version`` is ``1``. Like attestation objects, provenance objects are + versioned, and this PEP only defines version ``1``. +* ``attestation_bundles`` is a **required** JSON array, containing one + or more "bundles" of attestations. Each bundle corresponds to a + signing identity (such as a Trusted Publishing identity), and contains + one or more attestation objects. + + As noted in the ``Publisher`` model, + each ``AttestationBundle.publisher`` object is specific to its Trusted Publisher + but must include at minimum: + + * A ``kind`` key, which **MUST** be a JSON string that uniquely identifies the + kind of Trusted Publisher. + * A ``claims`` key, which **MUST** be a JSON object containing any context-specific + claims retained by the index during Trusted Publisher authentication. + + All other keys in the publisher object are publisher-specific. + + Each array of attestation objects is a superset of the ``attestations`` + array supplied by the uploaded through the ``attestations`` field at upload + time, as described in :ref:`upload-endpoint` and + :ref:`changes-to-provenance-objects`. + +.. _changes-to-provenance-objects: + +Changes to provenance objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Provenance objects are *not* immutable, and may change over time. Reasons +for changes to the provenance object include but are not limited to: + +* Addition of new attestations for a pre-existing signing identity: the index + **MAY** choose to allow additional attestations by pre-existing signing + identities, such as newer attestation versions for already uploaded + files. + +* Addition of new signing identities and associated attestations: the index + **MAY** choose to support attestations from sources other than the file's + uploader, such as third-party auditors or the index itself. These attestations + may be performed asynchronously, requiring the index to insert them into + the provenance object *post facto*. + +.. _attestation-verification: + +Attestation verification +------------------------ + +Verifying an attestation object against a distribution file requires verification of each of the +following: + +* ``version`` is ``1``. The verifier **MUST** reject any other version. +* ``verification_material.certificate`` is a valid signing certificate, as + issued by an *a priori* trusted authority (such as a root of trust already + present within the verifying client). +* ``verification_material.certificate`` identifies an appropriate signing + subject, such as the machine identity of the Trusted Publisher that published + the package. +* ``envelope.statement`` is a valid in-toto v1 Statement, with a subject + and digest that **MUST** match the distribution's filename and contents. + For the distribution's filename, matching **MUST** be performed by parsing + using the appropriate source distribution or wheel filename format, as + the statement's subject may be equivalent but normalized. +* ``envelope.signature`` is a valid signature for ``envelope.statement`` + corresponding to ``verification_material.certificate``, + as reconstituted via the + `v1 DSSE signature protocol `__. + +In addition to the above required steps, a verifier **MAY** additionally verify +``verification_material.transparency_entries`` on a policy basis, e.g. requiring +at least one transparency log entry or a threshold of entries. When verifying +transparency entries, the verifier **MUST** confirm that the inclusion time for +each entry lies within the signing certificate's validity period. + +.. _appendix: + +Appendix: Data models for Transparency Log Entries +==================================================== + +This appendix contains pseudocoded data models for transparency log entries +in attestation objects. Each transparency log entry serves as a source +of signed inclusion time, and can be verified either online or offline. + +.. code-block:: python + + @dataclass + class TransparencyLogEntry: + log_index: int + """ + The global index of the log entry, used when querying the log. + """ + + log_id: str + """ + An opaque, unique identifier for the log. + """ + + entry_kind: str + """ + The kind (type) of log entry. + """ + + entry_version: str + """ + The version of the log entry's submitted format. + """ + + integrated_time: int + """ + The UNIX timestamp from the log from when the entry was persisted. + """ + + inclusion_proof: InclusionProof + """ + The actual inclusion proof of the log entry. + """ + + + @dataclass + class InclusionProof: + log_index: int + """ + The index of the entry in the tree it was written to. + """ + + root_hash: str + """ + The digest stored at the root of the Merkle tree at the time of proof + generation. + """ + + tree_size: int + """ + The size of the Merkle tree at the time of proof generation. + """ + + hashes: list[str] + """ + A list of hashes required to complete the inclusion proof, sorted + in order from leaf to root. The leaf and root hashes are not themselves + included in this list; the root is supplied via `root_hash` and the client + must calculate the leaf hash. + """ + + checkpoint: str + """ + The signed tree head's signature, at the time of proof generation. + """ + + cosigned_checkpoints: list[str] + """ + Cosigned checkpoints from zero or more log witnesses. + """ diff --git a/source/specifications/section-package-indices.rst b/source/specifications/section-package-indices.rst index 13ba98113..73004b4d3 100644 --- a/source/specifications/section-package-indices.rst +++ b/source/specifications/section-package-indices.rst @@ -7,3 +7,4 @@ Package Index Interfaces pypirc simple-repository-api + index-hosted-attestations diff --git a/source/specifications/simple-repository-api.rst b/source/specifications/simple-repository-api.rst index 0d65a58aa..9ec8e4bf2 100644 --- a/source/specifications/simple-repository-api.rst +++ b/source/specifications/simple-repository-api.rst @@ -96,6 +96,15 @@ In addition to the above, the following constraints are placed on the API: In the attribute value, < and > have to be HTML encoded as ``<`` and ``>``, respectively. +* A repository **MAY** include a ``data-provenance`` attribute on a file link. + The value of this attribute **MUST** be a fully qualified URL, signaling that + the file's provenance can be found at that URL. This URL **MUST** represent + a `secure origin `_. + + .. note:: + + The format of the linked provenance is defined in :ref:`index-hosted-attestations`. + Normalized Names ---------------- @@ -495,6 +504,10 @@ Each individual file dictionary has the following keys: and is a truthy value, then it **SHOULD** be interpreted as indicating that the file pointed to by the ``url`` field has been "Yanked" as per :ref:`the API yank specification `. +- ``provenance``: An **optional** key which, if present **MUST** be either a JSON + string or ``null``. If not ``null``, it **MUST** be a URL to the file's + associated provenance, with the same rules as ``data-provenance`` in the + :ref:`base HTML API specification `. As an example: @@ -518,7 +531,8 @@ As an example: "url": "https://example.com/files/holygrail-1.0-py3-none-any.whl", "hashes": {"sha256": "...", "blake2b": "..."}, "requires-python": ">=3.7", - "dist-info-metadata": true + "dist-info-metadata": true, + "provenance": "https://example.com/files/holygrail-1.0-py3-none-any.whl.provenance" } ] } @@ -986,3 +1000,4 @@ History format, in :pep:`700` * June 2023: renaming the field which provides package metadata independently from a package, in :pep:`714` +* November 2024: provenance metadata in the HTML and JSON formats, in :pep:`740` From dcd35fd41dd3d7c29ead52084d75167b715aaed5 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 25 Nov 2024 09:39:23 -0500 Subject: [PATCH 2/2] fix upload API URL Signed-off-by: William Woodruff --- source/specifications/index-hosted-attestations.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/specifications/index-hosted-attestations.rst b/source/specifications/index-hosted-attestations.rst index 395ffc10c..d078e87bd 100644 --- a/source/specifications/index-hosted-attestations.rst +++ b/source/specifications/index-hosted-attestations.rst @@ -24,8 +24,8 @@ Upload endpoint changes .. important:: The "legacy" upload API is not standardized. - See :ref:`Warehouse's Upload API documentation - ` for how attestations are uploaded. + See `PyPI's Upload API documentation `_ + for how attestations are uploaded. .. _attestation-object: