Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use key manager merger into DEV #2985

Merged
merged 10 commits into from
Feb 12, 2025
25 changes: 24 additions & 1 deletion augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from augur.tasks.start_tasks import augur_collection_monitor, create_collection_status_records
from augur.tasks.git.facade_tasks import clone_repos
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.tasks.gitlab.gitlab_api_key_handler import GitlabApiKeyHandler
from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model
from augur.tasks.init.redis_connection import redis_connection
from augur.application.db.models import UserRepo
Expand All @@ -27,6 +29,7 @@
from augur.application.cli import test_connection, test_db_connection, with_database, DatabaseContext
import sqlalchemy as s

from keyman.KeyClient import KeyClient, KeyPublisher

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused KeyClient imported from keyman.KeyClient (unused-import)


logger = AugurLogger("augur", reset_logfiles=True).get_logger()

Expand Down Expand Up @@ -106,7 +109,7 @@
logger.info(f'Augur is running at: {"http" if development else "https"}://{host}:{port}')
logger.info(f"The API is available at '{api_response.json()['route']}'")

processes = start_celery_worker_processes(float(worker_vmem_cap), disable_collection)

Check warning on line 112 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'processes' from outer scope (line 466) (redefined-outer-name) Raw Output: augur/application/cli/backend.py:112:4: W0621: Redefining name 'processes' from outer scope (line 466) (redefined-outer-name)

if os.path.exists("celerybeat-schedule.db"):
logger.info("Deleting old task schedule")
Expand All @@ -116,8 +119,27 @@
celery_beat_process = None
celery_command = f"celery -A augur.tasks.init.celery_app.celery_app beat -l {log_level.lower()}"
celery_beat_process = subprocess.Popen(celery_command.split(" "))

keypub = KeyPublisher()

if not disable_collection:
orchestrator = subprocess.Popen("python keyman/Orchestrator.py".split())

# Wait for orchestrator startup
if not keypub.wait(republish=True):
logger.critical("Key orchestrator did not respond in time")
return

# load keys
ghkeyman = GithubApiKeyHandler(logger)
glkeyman = GitlabApiKeyHandler(logger)

for key in ghkeyman.keys:
keypub.publish(key, "github_rest")
keypub.publish(key, "github_graphql")

for key in glkeyman.keys:
keypub.publish(key, "gitlab_rest")

with DatabaseSession(logger, engine=ctx.obj.engine) as session:

clean_collection_status(session)
Expand Down Expand Up @@ -157,6 +179,7 @@
if not disable_collection:

try:
keypub.shutdown()
cleanup_after_collection_halt(logger, ctx.obj.engine)
except RedisConnectionError:
pass
Expand Down Expand Up @@ -229,7 +252,7 @@
"""
Sends SIGTERM to all Augur server & worker processes
"""
logger = logging.getLogger("augur.cli")

Check warning on line 255 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 34) (redefined-outer-name) Raw Output: augur/application/cli/backend.py:255:4: W0621: Redefining name 'logger' from outer scope (line 34) (redefined-outer-name)

augur_stop(signal.SIGTERM, logger, ctx.obj.engine)

Expand All @@ -242,7 +265,7 @@
"""
Stop collection tasks if they are running, block until complete
"""
processes = get_augur_processes()

Check warning on line 268 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'processes' from outer scope (line 466) (redefined-outer-name) Raw Output: augur/application/cli/backend.py:268:4: W0621: Redefining name 'processes' from outer scope (line 466) (redefined-outer-name)

stopped = []

Expand All @@ -252,7 +275,7 @@
stopped.append(p)
p.terminate()

if not len(stopped):

Check warning on line 278 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len) Raw Output: augur/application/cli/backend.py:278:7: C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len)
logger.info("No collection processes found")
return

Expand All @@ -261,7 +284,7 @@

killed = []
while True:
for i in range(len(alive)):

Check warning on line 287 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C0200: Consider using enumerate instead of iterating with range and len (consider-using-enumerate) Raw Output: augur/application/cli/backend.py:287:8: C0200: Consider using enumerate instead of iterating with range and len (consider-using-enumerate)
if alive[i].status() == psutil.STATUS_ZOMBIE:
logger.info(f"KILLING ZOMBIE: {alive[i].pid}")
alive[i].kill()
Expand All @@ -273,7 +296,7 @@
for i in reversed(killed):
alive.pop(i)

if not len(alive):

Check warning on line 299 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len) Raw Output: augur/application/cli/backend.py:299:11: C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len)
break

logger.info(f"Waiting on [{', '.join(str(p.pid for p in alive))}]")
Expand All @@ -290,11 +313,11 @@
"""
Sends SIGKILL to all Augur server & worker processes
"""
logger = logging.getLogger("augur.cli")

Check warning on line 316 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 34) (redefined-outer-name) Raw Output: augur/application/cli/backend.py:316:4: W0621: Redefining name 'logger' from outer scope (line 34) (redefined-outer-name)
augur_stop(signal.SIGKILL, logger, ctx.obj.engine)


def augur_stop(signal, logger, engine):

Check warning on line 320 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'signal' from outer scope (line 12) (redefined-outer-name) Raw Output: augur/application/cli/backend.py:320:15: W0621: Redefining name 'signal' from outer scope (line 12) (redefined-outer-name)

Check warning on line 320 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 34) (redefined-outer-name) Raw Output: augur/application/cli/backend.py:320:23: W0621: Redefining name 'logger' from outer scope (line 34) (redefined-outer-name)
"""
Stops augur with the given signal,
and cleans up collection if it was running
Expand All @@ -310,7 +333,7 @@
cleanup_after_collection_halt(logger, engine)


def cleanup_after_collection_halt(logger, engine):

Check warning on line 336 in augur/application/cli/backend.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 34) (redefined-outer-name) Raw Output: augur/application/cli/backend.py:336:34: W0621: Redefining name 'logger' from outer scope (line 34) (redefined-outer-name)
clear_redis_caches()

connection_string = get_value("RabbitMQ", "connection_string")
Expand Down
26 changes: 26 additions & 0 deletions augur/application/cli/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from augur.tasks.start_tasks import augur_collection_monitor, create_collection_status_records
from augur.tasks.git.facade_tasks import clone_repos
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.tasks.gitlab.gitlab_api_key_handler import GitlabApiKeyHandler
from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model
from augur.application.db.models import UserRepo
from augur.application.db.session import DatabaseSession
Expand All @@ -25,6 +27,8 @@
from augur.application.cli import test_connection, test_db_connection, with_database, DatabaseContext
from augur.application.cli._cli_util import _broadcast_signal_to_processes, raise_open_file_limit, clear_redis_caches, clear_rabbitmq_messages

from keyman.KeyClient import KeyClient, KeyPublisher

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused KeyClient imported from keyman.KeyClient (unused-import)


logger = AugurLogger("augur", reset_logfiles=False).get_logger()

@click.group('server', short_help='Commands for controlling the backend API server & data collection workers')
Expand All @@ -51,6 +55,26 @@ def start(ctx, development):
logger.error("Failed to raise open file limit!")
raise e

keypub = KeyPublisher()

orchestrator = subprocess.Popen("python keyman/Orchestrator.py".split())

# Wait for orchestrator startup
if not keypub.wait(republish=True):
logger.critical("Key orchestrator did not respond in time")
return

# load keys
ghkeyman = GithubApiKeyHandler(logger)
glkeyman = GitlabApiKeyHandler(logger)

for key in ghkeyman.keys:
keypub.publish(key, "github_rest")
keypub.publish(key, "github_graphql")

for key in glkeyman.keys:
keypub.publish(key, "gitlab_rest")

if development:
os.environ["AUGUR_DEV"] = "1"
logger.info("Starting in development mode")
Expand Down Expand Up @@ -94,6 +118,8 @@ def start(ctx, development):
if p:
p.terminate()

keypub.shutdown()

if celery_beat_process:
logger.info("Shutting down celery beat process")
celery_beat_process.terminate()
Expand Down
18 changes: 13 additions & 5 deletions augur/tasks/github/util/github_data_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import httpx
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception, RetryError
from urllib.parse import urlparse, parse_qs, urlencode
from keyman.KeyClient import KeyClient


class RatelimitException(Exception):
Expand All @@ -21,7 +22,8 @@ class GithubDataAccess:
def __init__(self, key_manager, logger: logging.Logger):

self.logger = logger
self.key_manager = key_manager
self.key_client = KeyClient("github_rest", logger)
self.key = None

def get_resource_count(self, url):

Expand Down Expand Up @@ -93,7 +95,12 @@ def make_request(self, url, method="GET", timeout=100):

with httpx.Client() as client:

response = client.request(method=method, url=url, auth=self.key_manager, timeout=timeout, follow_redirects=True)
if not self.key:
self.key = self.key_client.request()

headers = {"Authorization": f"token {self.key}"}

response = client.request(method=method, url=url, headers=headers, timeout=timeout, follow_redirects=True)

if response.status_code in [403, 429]:
raise RatelimitException(response)
Expand Down Expand Up @@ -121,7 +128,7 @@ def __make_request_with_retries(self, url, method="GET", timeout=100):
1. Retires 10 times
2. Waits 5 seconds between retires
3. Does not rety UrlNotFoundException
4. Catches RatelimitException and waits before raising exception
4. Catches RatelimitException and waits or expires key before raising exception
"""

try:
Expand Down Expand Up @@ -150,8 +157,9 @@ def __handle_github_ratelimit_response(self, response):
self.logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}")
key_reset_time = 0

self.logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)")
time.sleep(key_reset_time)
self.logger.info(f"\n\n\nAPI rate limit exceeded. Key resets in {key_reset_time} seconds. Informing key manager that key is expired")
self.key = self.key_client.expire(self.key, epoch_when_key_resets)

else:
time.sleep(60)

Expand Down
5 changes: 3 additions & 2 deletions augur/tasks/gitlab/gitlab_api_key_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,9 @@ def get_api_keys(self) -> List[str]:
# add all the keys to redis
self.redis_key_list.extend(valid_keys)

if not valid_keys:
raise NoValidKeysError("No valid gitlab api keys found in the config or worker oauth table")
# Removed because most people do not collect gitlab and this blows up on startup if they don't have any gitlab keys
# if not valid_keys:
# raise NoValidKeysError("No valid gitlab api keys found in the config or worker oauth table")


# shuffling the keys so not all processes get the same keys in the same order
Expand Down
Loading
Loading