Skip to content

Commit 41a8898

Browse files
authored
Merge pull request #102 from newgene/auto-snapshot-cleanup
Implement auto snapshot cleanup feature
2 parents d23f306 + 80f2383 commit 41a8898

File tree

3 files changed

+86
-0
lines changed

3 files changed

+86
-0
lines changed

biothings/hub/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ class HubServer(object):
332332
"diff",
333333
"index",
334334
"snapshot",
335+
"auto_snapshot_cleaner",
335336
"release",
336337
"inspect",
337338
"sync",
@@ -706,6 +707,17 @@ def configure_snapshot_manager(self):
706707
snapshot_manager.poll("snapshot", snapshot_manager.snapshot_a_build)
707708
self.managers["snapshot_manager"] = snapshot_manager
708709

710+
def configure_auto_snapshot_cleaner_manager(self):
711+
assert "snapshot" in self.features, "'auto_snapshot_cleaner' feature requires 'snapshot'"
712+
from biothings.hub.dataindex.auto_snapshot_cleanup import AutoSnapshotCleanupManager
713+
714+
auto_snapshot_cleaner_manager = AutoSnapshotCleanupManager(
715+
snapshot_manager=self.managers["snapshot_manager"],
716+
job_manager=self.managers["job_manager"],
717+
)
718+
auto_snapshot_cleaner_manager.configure(config.AUTO_SNAPSHOT_CLEANUP_CONFIG)
719+
self.managers["auto_snapshot_cleaner_manager"] = auto_snapshot_cleaner_manager
720+
709721
def configure_release_manager(self):
710722
assert "diff" in self.features, "'release' feature requires 'diff'"
711723
assert "snapshot" in self.features, "'release' feature requires 'snapshot'"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from functools import partial
2+
3+
from biothings import config as btconfig
4+
from biothings.utils.manager import BaseManager
5+
6+
logger = btconfig.logger
7+
8+
9+
class AutoSnapshotCleanupManager(BaseManager):
10+
"""This feature will add a new console command "auto_snapshot_cleanup", a new api "/auto_snapshot_cleanup".
11+
It is intended to allow automatically cleanup old snapshot, based on configuration.
12+
13+
This feature can be configurated by using AUTO_SNAPSHOT_CLEANUP_CONFIG variable, to determine:
14+
- schedule: how frequency this task should run
15+
- days: how old a snapshot should be deleted
16+
17+
AUTO_SNAPSHOT_CLEANUP_CONFIG = {
18+
"environment_name": {
19+
"schedule": "* 0 * * *", # run daily at 0am UTC
20+
"keep": 3, # the number of most recent snapshots to keep in one group
21+
"group_by": "build_config", # the attr of which its values form groups
22+
"extra_filters": {} # a set of criterions to limit which snapshots are to be cleaned
23+
},
24+
...
25+
}
26+
"""
27+
28+
DEFAULT_SCHEDULE = "* 0 * * *" # run daily at 0am UTC
29+
30+
def __init__(self, snapshot_manager, job_manager, *args, **kwargs):
31+
super().__init__(job_manager, *args, **kwargs)
32+
33+
self.snapshot_manager = snapshot_manager
34+
35+
def configure(self, conf=None):
36+
self.auto_snapshot_cleaner_config = conf or {}
37+
38+
for env_name in self.snapshot_manager.register.keys():
39+
cleaner_config = self.auto_snapshot_cleaner_config.get(env_name)
40+
41+
if not isinstance(cleaner_config, dict):
42+
logger.info(f"Snapshot environment: {env_name}: No cleaner config found!")
43+
continue
44+
45+
schedule = cleaner_config.get("schedule") or self.DEFAULT_SCHEDULE
46+
keep = cleaner_config.get("keep")
47+
group_by = cleaner_config.get("group_by")
48+
extra_filters = cleaner_config.get("extra_filters")
49+
50+
self.job_manager.submit(
51+
partial(
52+
self.snapshot_manager.cleanup,
53+
env=env_name,
54+
keep=keep,
55+
group_by=group_by,
56+
dryrun=False,
57+
**extra_filters,
58+
),
59+
schedule=schedule,
60+
)

biothings/hub/default_config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,20 @@
225225
# Snapshot environment configuration
226226
SNAPSHOT_CONFIG = {}
227227

228+
# Auto snapshot cleaner feature will use this configuration to get schedule config for corresponding environment.
229+
AUTO_SNAPSHOT_CLEANUP_CONFIG = None
230+
"""
231+
AUTO_SNAPSHOT_CLEANUP_CONFIG = {
232+
"environment_name": {
233+
"schedule": "* 0 * * *", # run daily at 0am UTC
234+
"keep": 3, # the number of most recent snapshots to keep in one group
235+
"group_by": "build_config", # the attr of which its values form groups
236+
"extra_filters": {} # a set of criterions to limit which snapshots are to be cleaned
237+
},
238+
...
239+
}
240+
"""
241+
228242
# reporting diff results, number of IDs to consider (to avoid too much mem usage)
229243
MAX_REPORTED_IDS = 1000
230244
# for diff updates, number of IDs randomly picked as examples when rendering the report

0 commit comments

Comments
 (0)