|
| 1 | +import pathlib |
| 2 | +import re |
| 3 | +import requests |
| 4 | +import tempfile |
| 5 | +import typing as t |
| 6 | + |
| 7 | + |
| 8 | +def normalize_url(path: str) -> str: |
| 9 | + """Normalize a given URL by correcting backslashes and fixing malformed HTTPS.""" |
| 10 | + corrected_url = path.replace("\\", "/") |
| 11 | + return corrected_url.replace("https:/", "https://") |
| 12 | + |
| 13 | + |
| 14 | +def fetch_metadata_from_repo(repo_url: str, filename: str) -> t.Optional[pathlib.Path]: |
| 15 | + """ |
| 16 | + Fetch a metadata file (e.g., CITATION.cff or codemeta.json) from a GitHub or GitLab repository. |
| 17 | +
|
| 18 | + :param repo_url: The repository URL. |
| 19 | + :param filename: The name of the metadata file to fetch. |
| 20 | + :return: Path to the temporary file containing the downloaded metadata, or None. |
| 21 | + """ |
| 22 | + try: |
| 23 | + if "github.com" in repo_url: |
| 24 | + # GitHub API |
| 25 | + api_url = repo_url.replace("github.com", "api.github.com/repos").rstrip("/") + "/contents" |
| 26 | + response = requests.get(api_url) |
| 27 | + if response.status_code == 200: |
| 28 | + for file_info in response.json(): |
| 29 | + if file_info["name"] == filename: |
| 30 | + return _download_to_tempfile(file_info["download_url"], filename) |
| 31 | + elif "gitlab.com" in repo_url: |
| 32 | + # GitLab API |
| 33 | + match = re.match(r"https://([^/]+)/([^/]+)/([^/]+)", repo_url) |
| 34 | + if match: |
| 35 | + base_domain = match.group(1) |
| 36 | + group_or_user = match.group(2) |
| 37 | + project_name = match.group(3).split('/')[0] |
| 38 | + project_path = f"{group_or_user}/{project_name}" |
| 39 | + api_url = f"https://{base_domain}/api/v4/projects/{requests.utils.quote(project_path, safe='')}/repository/tree" |
| 40 | + |
| 41 | + response = requests.get(api_url) |
| 42 | + if response.status_code == 200: |
| 43 | + for file_info in response.json(): |
| 44 | + if file_info["name"] == filename: |
| 45 | + file_url = ( |
| 46 | + f"https://{base_domain}/api/v4/projects/" |
| 47 | + f"{requests.utils.quote(project_path, safe='')}/repository/files/" |
| 48 | + f"{requests.utils.quote(filename, safe='')}/raw" |
| 49 | + ) |
| 50 | + return _download_to_tempfile(file_url, filename) |
| 51 | + else: |
| 52 | + print(f"Unsupported repository URL: {repo_url}") |
| 53 | + return None |
| 54 | + except Exception as e: |
| 55 | + print(f"Error fetching metadata from repository: {e}") |
| 56 | + return None |
| 57 | + |
| 58 | + |
| 59 | +def _download_to_tempfile(url: str, filename: str) -> pathlib.Path: |
| 60 | + """ |
| 61 | + Download a file from a URL and save it to a temporary file. |
| 62 | +
|
| 63 | + :param url: The URL to download from. |
| 64 | + :param filename: The name of the file to save. |
| 65 | + :return: Path to the temporary file. |
| 66 | + """ |
| 67 | + try: |
| 68 | + content = requests.get(url).text |
| 69 | + with tempfile.NamedTemporaryFile(delete=False, suffix=f".{filename.split('.')[-1]}") as temp_file: |
| 70 | + temp_file.write(content.encode("utf-8")) |
| 71 | + print(f"Downloaded {filename} to {temp_file.name}") |
| 72 | + return pathlib.Path(temp_file.name) |
| 73 | + except Exception as e: |
| 74 | + print(f"Error downloading {filename}: {e}") |
| 75 | + return None |
0 commit comments