Skip to content

Commit 7342a20

Browse files
committed
add plotting script and plot
1 parent 8349841 commit 7342a20

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

devtools/mdakits-registry-growth.png

63.8 KB
Loading

devtools/plot-over-time.py

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""
2+
A script to plot the number of packages registered over time.
3+
4+
You will need the following packages installed:
5+
- PyGithub
6+
- numpy
7+
- pandas
8+
- seaborn
9+
- matplotlib
10+
11+
You will need a GitHub token with access to the repository to run this script.
12+
The token should be stored in the environment variable `GH_TOKEN`.
13+
"""
14+
import argparse
15+
import datetime
16+
import re
17+
import os
18+
19+
import numpy as np
20+
import pandas as pd
21+
from github import Github
22+
23+
import seaborn as sns
24+
import matplotlib.dates as mdates
25+
from matplotlib import pyplot as plt
26+
27+
REPO_NAME = "mdanalysis/MDAKits"
28+
GITHUB_TOKEN = os.environ["GH_TOKEN"]
29+
30+
parser = argparse.ArgumentParser(description=__doc__)
31+
parser.add_argument(
32+
"--verbose",
33+
action="store_true",
34+
help="Print the DataFrame of results",
35+
)
36+
37+
def find_prs_with_file(repo_name, github_token):
38+
"""
39+
Find all pull requests that added a `metadata.yaml` file to the repository.
40+
"""
41+
pattern = re.compile(r"metadata\.yaml$")
42+
43+
g = Github(github_token)
44+
repo = g.get_repo(repo_name)
45+
46+
prs_with_file = []
47+
48+
# Iterate through all pull requests in the repository
49+
for pr in repo.get_pulls(state='closed'):
50+
# Check if the PR was merged
51+
if pr.merged_at is None:
52+
continue
53+
54+
# Check files changed in the PR
55+
# look for addition of a `metadata.yaml` file
56+
# so we ignore updates and changes to the metadata
57+
files = pr.get_files()
58+
for file in files:
59+
if file.status == 'added' and re.search(pattern, file.filename):
60+
# mostly these attributes are for debugging
61+
prs_with_file.append({
62+
'pr_number': pr.number,
63+
'pr_title': pr.title,
64+
'merged_at': pr.merged_at,
65+
'html_url': pr.html_url
66+
})
67+
break
68+
69+
return prs_with_file
70+
71+
72+
def main(
73+
verbose: bool = False,
74+
):
75+
"""
76+
Main function to plot the number of packages registered over time.
77+
"""
78+
results = find_prs_with_file(REPO_NAME, GITHUB_TOKEN)
79+
df = pd.DataFrame(results)
80+
df_cumsum = df.sort_values("merged_at")
81+
df_cumsum["Number of MDAKits"] = np.arange(len(df)) + 1
82+
83+
if verbose:
84+
print(df_cumsum)
85+
86+
_, ax = plt.subplots(figsize=(4, 3))
87+
ax = sns.lineplot(
88+
ax=ax,
89+
data=df_cumsum,
90+
x="merged_at",
91+
y="Number of MDAKits"
92+
)
93+
94+
ax.set_xlim((
95+
datetime.date(2023, 1, 1),
96+
datetime.date(2024, 11, 30)
97+
))
98+
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=(1, 7)))
99+
ax.xaxis.set_minor_locator(mdates.MonthLocator())
100+
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b'))
101+
ax.set_xlabel("Added to registry")
102+
ax.set_ylabel("Number of MDAKits")
103+
ax.set_title("MDAKits Registry Growth")
104+
plt.tight_layout()
105+
plt.savefig("mdakits-registry-growth.png", dpi=300)
106+
print("Saved plot to mdakits-registry-growth.png")
107+
108+
109+
if __name__ == "__main__":
110+
args = parser.parse_args()
111+
main(verbose=args.verbose)

0 commit comments

Comments
 (0)