|
| 1 | +""" |
| 2 | +A script to plot the number of packages registered over time. |
| 3 | +
|
| 4 | +You will need the following packages installed: |
| 5 | +- PyGithub |
| 6 | +- numpy |
| 7 | +- pandas |
| 8 | +- seaborn |
| 9 | +- matplotlib |
| 10 | +
|
| 11 | +You will need a GitHub token with access to the repository to run this script. |
| 12 | +The token should be stored in the environment variable `GH_TOKEN`. |
| 13 | +""" |
| 14 | +import argparse |
| 15 | +import datetime |
| 16 | +import re |
| 17 | +import os |
| 18 | + |
| 19 | +import numpy as np |
| 20 | +import pandas as pd |
| 21 | +from github import Github |
| 22 | + |
| 23 | +import seaborn as sns |
| 24 | +import matplotlib.dates as mdates |
| 25 | +from matplotlib import pyplot as plt |
| 26 | + |
| 27 | +REPO_NAME = "mdanalysis/MDAKits" |
| 28 | +GITHUB_TOKEN = os.environ["GH_TOKEN"] |
| 29 | + |
| 30 | +parser = argparse.ArgumentParser(description=__doc__) |
| 31 | +parser.add_argument( |
| 32 | + "--verbose", |
| 33 | + action="store_true", |
| 34 | + help="Print the DataFrame of results", |
| 35 | +) |
| 36 | + |
| 37 | +def find_prs_with_file(repo_name, github_token): |
| 38 | + """ |
| 39 | + Find all pull requests that added a `metadata.yaml` file to the repository. |
| 40 | + """ |
| 41 | + pattern = re.compile(r"metadata\.yaml$") |
| 42 | + |
| 43 | + g = Github(github_token) |
| 44 | + repo = g.get_repo(repo_name) |
| 45 | + |
| 46 | + prs_with_file = [] |
| 47 | + |
| 48 | + # Iterate through all pull requests in the repository |
| 49 | + for pr in repo.get_pulls(state='closed'): |
| 50 | + # Check if the PR was merged |
| 51 | + if pr.merged_at is None: |
| 52 | + continue |
| 53 | + |
| 54 | + # Check files changed in the PR |
| 55 | + # look for addition of a `metadata.yaml` file |
| 56 | + # so we ignore updates and changes to the metadata |
| 57 | + files = pr.get_files() |
| 58 | + for file in files: |
| 59 | + if file.status == 'added' and re.search(pattern, file.filename): |
| 60 | + # mostly these attributes are for debugging |
| 61 | + prs_with_file.append({ |
| 62 | + 'pr_number': pr.number, |
| 63 | + 'pr_title': pr.title, |
| 64 | + 'merged_at': pr.merged_at, |
| 65 | + 'html_url': pr.html_url |
| 66 | + }) |
| 67 | + break |
| 68 | + |
| 69 | + return prs_with_file |
| 70 | + |
| 71 | + |
| 72 | +def main( |
| 73 | + verbose: bool = False, |
| 74 | +): |
| 75 | + """ |
| 76 | + Main function to plot the number of packages registered over time. |
| 77 | + """ |
| 78 | + results = find_prs_with_file(REPO_NAME, GITHUB_TOKEN) |
| 79 | + df = pd.DataFrame(results) |
| 80 | + df_cumsum = df.sort_values("merged_at") |
| 81 | + df_cumsum["Number of MDAKits"] = np.arange(len(df)) + 1 |
| 82 | + |
| 83 | + if verbose: |
| 84 | + print(df_cumsum) |
| 85 | + |
| 86 | + _, ax = plt.subplots(figsize=(4, 3)) |
| 87 | + ax = sns.lineplot( |
| 88 | + ax=ax, |
| 89 | + data=df_cumsum, |
| 90 | + x="merged_at", |
| 91 | + y="Number of MDAKits" |
| 92 | + ) |
| 93 | + |
| 94 | + ax.set_xlim(( |
| 95 | + datetime.date(2023, 1, 1), |
| 96 | + datetime.date(2024, 11, 30) |
| 97 | + )) |
| 98 | + ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=(1, 7))) |
| 99 | + ax.xaxis.set_minor_locator(mdates.MonthLocator()) |
| 100 | + ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b')) |
| 101 | + ax.set_xlabel("Added to registry") |
| 102 | + ax.set_ylabel("Number of MDAKits") |
| 103 | + ax.set_title("MDAKits Registry Growth") |
| 104 | + plt.tight_layout() |
| 105 | + plt.savefig("mdakits-registry-growth.png", dpi=300) |
| 106 | + print("Saved plot to mdakits-registry-growth.png") |
| 107 | + |
| 108 | + |
| 109 | +if __name__ == "__main__": |
| 110 | + args = parser.parse_args() |
| 111 | + main(verbose=args.verbose) |
0 commit comments