From f2802975bb71de7b75cfa4a41eea559d85b91768 Mon Sep 17 00:00:00 2001 From: Eliott C Date: Thu, 2 May 2024 16:45:07 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20List-commits=20&=20count-commits=20?= =?UTF-8?q?(#647)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix https://github.com/huggingface/huggingface.js/issues/646 --- packages/hub/src/error.ts | 10 +- packages/hub/src/lib/count-commits.spec.ts | 16 +++ packages/hub/src/lib/count-commits.ts | 34 ++++++ packages/hub/src/lib/index.ts | 2 + packages/hub/src/lib/list-commits.spec.ts | 117 +++++++++++++++++++++ packages/hub/src/lib/list-commits.ts | 69 ++++++++++++ packages/hub/src/types/api/api-commit.d.ts | 9 ++ 7 files changed, 254 insertions(+), 3 deletions(-) create mode 100644 packages/hub/src/lib/count-commits.spec.ts create mode 100644 packages/hub/src/lib/count-commits.ts create mode 100644 packages/hub/src/lib/list-commits.spec.ts create mode 100644 packages/hub/src/lib/list-commits.ts diff --git a/packages/hub/src/error.ts b/packages/hub/src/error.ts index d34563a5a..db159b617 100644 --- a/packages/hub/src/error.ts +++ b/packages/hub/src/error.ts @@ -6,9 +6,11 @@ export async function createApiError( ): Promise { const error = new HubApiError(response.url, response.status, response.headers.get("X-Request-Id") ?? opts?.requestId); - error.message = `Api error with status ${error.statusCode}.${opts?.message ? ` ${opts.message}.` : ""} Request ID: ${ - error.requestId - }, url: ${error.url}`; + error.message = `Api error with status ${error.statusCode}${opts?.message ? `. ${opts.message}` : ""}`; + + const trailer = [`URL: ${error.url}`, error.requestId ? `Request ID: ${error.requestId}` : undefined] + .filter(Boolean) + .join(". "); if (response.headers.get("Content-Type")?.startsWith("application/json")) { const json = await response.json(); @@ -18,6 +20,8 @@ export async function createApiError( error.data = { message: await response.text() }; } + error.message += `. ${trailer}`; + throw error; } diff --git a/packages/hub/src/lib/count-commits.spec.ts b/packages/hub/src/lib/count-commits.spec.ts new file mode 100644 index 000000000..f60754789 --- /dev/null +++ b/packages/hub/src/lib/count-commits.spec.ts @@ -0,0 +1,16 @@ +import { assert, it, describe } from "vitest"; +import { countCommits } from "./count-commits"; + +describe("countCommits", () => { + it("should fetch paginated commits from the repo", async () => { + const count = await countCommits({ + repo: { + name: "openai-community/gpt2", + type: "model", + }, + revision: "607a30d783dfa663caf39e06633721c8d4cfcd7e", + }); + + assert.equal(count, 26); + }); +}); diff --git a/packages/hub/src/lib/count-commits.ts b/packages/hub/src/lib/count-commits.ts new file mode 100644 index 000000000..4489740f5 --- /dev/null +++ b/packages/hub/src/lib/count-commits.ts @@ -0,0 +1,34 @@ +import { HUB_URL } from "../consts"; +import { createApiError } from "../error"; +import type { Credentials, RepoDesignation } from "../types/public"; +import { checkCredentials } from "../utils/checkCredentials"; +import { toRepoId } from "../utils/toRepoId"; + +export async function countCommits(params: { + credentials?: Credentials; + repo: RepoDesignation; + /** + * Revision to list commits from. Defaults to the default branch. + */ + revision?: string; + hubUrl?: string; + fetch?: typeof fetch; +}): Promise { + checkCredentials(params.credentials); + const repoId = toRepoId(params.repo); + + // Could upgrade to 1000 commits per page + const url: string | undefined = `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commits/${ + params.revision ?? "main" + }?limit=1`; + + const res: Response = await (params.fetch ?? fetch)(url, { + headers: params.credentials ? { Authorization: `Bearer ${params.credentials.accessToken}` } : {}, + }); + + if (!res.ok) { + throw await createApiError(res); + } + + return parseInt(res.headers.get("x-total-count") ?? "0", 10); +} diff --git a/packages/hub/src/lib/index.ts b/packages/hub/src/lib/index.ts index c78b08aea..554977f02 100644 --- a/packages/hub/src/lib/index.ts +++ b/packages/hub/src/lib/index.ts @@ -1,4 +1,5 @@ export * from "./commit"; +export * from "./count-commits"; export * from "./create-repo"; export * from "./delete-file"; export * from "./delete-files"; @@ -6,6 +7,7 @@ export * from "./delete-repo"; export * from "./download-file"; export * from "./file-download-info"; export * from "./file-exists"; +export * from "./list-commits"; export * from "./list-datasets"; export * from "./list-files"; export * from "./list-models"; diff --git a/packages/hub/src/lib/list-commits.spec.ts b/packages/hub/src/lib/list-commits.spec.ts new file mode 100644 index 000000000..3903f071f --- /dev/null +++ b/packages/hub/src/lib/list-commits.spec.ts @@ -0,0 +1,117 @@ +import { assert, it, describe } from "vitest"; +import type { CommitData } from "./list-commits"; +import { listCommits } from "./list-commits"; + +describe("listCommits", () => { + it("should fetch paginated commits from the repo", async () => { + const commits: CommitData[] = []; + for await (const commit of listCommits({ + repo: { + name: "openai-community/gpt2", + type: "model", + }, + revision: "607a30d783dfa663caf39e06633721c8d4cfcd7e", + batchSize: 5, + })) { + commits.push(commit); + } + + assert.equal(commits.length, 26); + assert.deepEqual(commits.slice(0, 6), [ + { + oid: "607a30d783dfa663caf39e06633721c8d4cfcd7e", + title: "Adds the tokenizer configuration file (#80)", + message: "\n\n\n- Adds tokenizer_config.json file (db6d57930088fb63e52c010bd9ac77c955ac55e7)\n\n", + authors: [ + { + username: "lysandre", + avatarUrl: + "https://cdn-avatars.huggingface.co/v1/production/uploads/1618450692745-5e3aec01f55e2b62848a5217.jpeg", + }, + ], + date: new Date("2024-02-19T10:57:45.000Z"), + }, + { + oid: "11c5a3d5811f50298f278a704980280950aedb10", + title: "Adding ONNX file of this model (#60)", + message: "\n\n\n- Adding ONNX file of this model (9411f419c589519e1a46c94ac7789ea20fd7c322)\n\n", + authors: [ + { + username: "fxmarty", + avatarUrl: + "https://cdn-avatars.huggingface.co/v1/production/uploads/1651743336129-624c60cba8ec93a7ac188b56.png", + }, + ], + date: new Date("2023-06-30T02:19:43.000Z"), + }, + { + oid: "e7da7f221d5bf496a48136c0cd264e630fe9fcc8", + title: "Update generation_config.json", + message: "", + authors: [ + { + username: "joaogante", + avatarUrl: "https://cdn-avatars.huggingface.co/v1/production/uploads/1641203017724-noauth.png", + }, + ], + date: new Date("2022-12-16T15:44:21.000Z"), + }, + { + oid: "f27b190eeac4c2302d24068eabf5e9d6044389ae", + title: "Add note that this is the smallest version of the model (#18)", + message: + "\n\n\n- Add note that this is the smallest version of the model (611838ef095a5bb35bf2027d05e1194b7c9d37ac)\n\n\nCo-authored-by: helen \n", + authors: [ + { + username: "sgugger", + avatarUrl: + "https://cdn-avatars.huggingface.co/v1/production/uploads/1593126474392-5ef50182b71947201082a4e5.jpeg", + }, + { + username: "mathemakitten", + avatarUrl: + "https://cdn-avatars.huggingface.co/v1/production/uploads/1658248499901-6079afe2d2cd8c150e6ae05e.jpeg", + }, + ], + date: new Date("2022-11-23T12:55:26.000Z"), + }, + { + oid: "0dd7bcc7a64e4350d8859c9a2813132fbf6ae591", + title: "Our very first generation_config.json (#17)", + message: + "\n\n\n- Our very first generation_config.json (671851b7e9d56ef062890732065d7bd5f4628bd6)\n\n\nCo-authored-by: Joao Gante \n", + authors: [ + { + username: "sgugger", + avatarUrl: + "https://cdn-avatars.huggingface.co/v1/production/uploads/1593126474392-5ef50182b71947201082a4e5.jpeg", + }, + { + username: "joaogante", + avatarUrl: "https://cdn-avatars.huggingface.co/v1/production/uploads/1641203017724-noauth.png", + }, + ], + date: new Date("2022-11-18T18:19:30.000Z"), + }, + { + oid: "75e09b43581151bd1d9ef6700faa605df408979f", + title: "Upload model.safetensors with huggingface_hub (#12)", + message: + "\n\n\n- Upload model.safetensors with huggingface_hub (ba2f794b2e4ea09ef932a6628fa0815dfaf09661)\n\n\nCo-authored-by: Nicolas Patry \n", + authors: [ + { + username: "julien-c", + avatarUrl: + "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/NQtzmrDdbG0H8qkZvRyGk.jpeg", + }, + { + username: "Narsil", + avatarUrl: + "https://cdn-avatars.huggingface.co/v1/production/uploads/1608285816082-5e2967b819407e3277369b95.png", + }, + ], + date: new Date("2022-10-20T09:34:54.000Z"), + }, + ]); + }); +}); diff --git a/packages/hub/src/lib/list-commits.ts b/packages/hub/src/lib/list-commits.ts new file mode 100644 index 000000000..42c9ab765 --- /dev/null +++ b/packages/hub/src/lib/list-commits.ts @@ -0,0 +1,69 @@ +import { HUB_URL } from "../consts"; +import { createApiError } from "../error"; +import type { ApiCommitData } from "../types/api/api-commit"; +import type { Credentials, RepoDesignation } from "../types/public"; +import { checkCredentials } from "../utils/checkCredentials"; +import { parseLinkHeader } from "../utils/parseLinkHeader"; +import { toRepoId } from "../utils/toRepoId"; + +export interface CommitData { + oid: string; + title: string; + message: string; + authors: Array<{ username: string; avatarUrl: string }>; + date: Date; +} + +export async function* listCommits(params: { + credentials?: Credentials; + repo: RepoDesignation; + /** + * Revision to list commits from. Defaults to the default branch. + */ + revision?: string; + hubUrl?: string; + /** + * Number of commits to fetch from the hub each http call. Defaults to 100. Can be set to 1000. + */ + batchSize?: number; + /** + * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers. + */ + fetch?: typeof fetch; +}): AsyncGenerator { + checkCredentials(params.credentials); + const repoId = toRepoId(params.repo); + + // Could upgrade to 1000 commits per page + let url: string | undefined = `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commits/${ + params.revision ?? "main" + }?limit=${params.batchSize ?? 100}`; + + while (url) { + const res: Response = await (params.fetch ?? fetch)(url, { + headers: params.credentials ? { Authorization: `Bearer ${params.credentials.accessToken}` } : {}, + }); + + if (!res.ok) { + throw await createApiError(res); + } + + const resJson: ApiCommitData[] = await res.json(); + for (const commit of resJson) { + yield { + oid: commit.id, + title: commit.title, + message: commit.message, + authors: commit.authors.map((author) => ({ + username: author.user, + avatarUrl: author.avatar, + })), + date: new Date(commit.date), + }; + } + + const linkHeader = res.headers.get("Link"); + + url = linkHeader ? parseLinkHeader(linkHeader).next : undefined; + } +} diff --git a/packages/hub/src/types/api/api-commit.d.ts b/packages/hub/src/types/api/api-commit.d.ts index debb24413..7cc6ab45f 100644 --- a/packages/hub/src/types/api/api-commit.d.ts +++ b/packages/hub/src/types/api/api-commit.d.ts @@ -180,3 +180,12 @@ export type ApiCommitOperation = key: "deletedFile"; value: ApiCommitDeletedEntry; }; + +export interface ApiCommitData { + id: string; + title: string; + message: string; + authors: Array<{ user: string; avatar: string }>; + date: string; + formatted?: string; +}