Skip to content

Commit

Permalink
Merge pull request #2985 from chaoss:use-key-manager
Browse files Browse the repository at this point in the history
Use key manager merger into DEV
  • Loading branch information
Ulincsys authored Feb 12, 2025
2 parents 61efa9c + e13f66c commit 0e26d37
Show file tree
Hide file tree
Showing 8 changed files with 659 additions and 8 deletions.
25 changes: 24 additions & 1 deletion augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from augur.tasks.start_tasks import augur_collection_monitor, create_collection_status_records
from augur.tasks.git.facade_tasks import clone_repos
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.tasks.gitlab.gitlab_api_key_handler import GitlabApiKeyHandler
from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model
from augur.tasks.init.redis_connection import redis_connection
from augur.application.db.models import UserRepo
Expand All @@ -27,6 +29,7 @@
from augur.application.cli import test_connection, test_db_connection, with_database, DatabaseContext
import sqlalchemy as s

from keyman.KeyClient import KeyClient, KeyPublisher

logger = AugurLogger("augur", reset_logfiles=True).get_logger()

Expand Down Expand Up @@ -116,8 +119,27 @@ def start(ctx, disable_collection, development, pidfile, port):
celery_beat_process = None
celery_command = f"celery -A augur.tasks.init.celery_app.celery_app beat -l {log_level.lower()}"
celery_beat_process = subprocess.Popen(celery_command.split(" "))

keypub = KeyPublisher()

if not disable_collection:
orchestrator = subprocess.Popen("python keyman/Orchestrator.py".split())

# Wait for orchestrator startup
if not keypub.wait(republish=True):
logger.critical("Key orchestrator did not respond in time")
return

# load keys
ghkeyman = GithubApiKeyHandler(logger)
glkeyman = GitlabApiKeyHandler(logger)

for key in ghkeyman.keys:
keypub.publish(key, "github_rest")
keypub.publish(key, "github_graphql")

for key in glkeyman.keys:
keypub.publish(key, "gitlab_rest")

with DatabaseSession(logger, engine=ctx.obj.engine) as session:

clean_collection_status(session)
Expand Down Expand Up @@ -157,6 +179,7 @@ def start(ctx, disable_collection, development, pidfile, port):
if not disable_collection:

try:
keypub.shutdown()
cleanup_after_collection_halt(logger, ctx.obj.engine)
except RedisConnectionError:
pass
Expand Down
26 changes: 26 additions & 0 deletions augur/application/cli/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from augur.tasks.start_tasks import augur_collection_monitor, create_collection_status_records
from augur.tasks.git.facade_tasks import clone_repos
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.tasks.gitlab.gitlab_api_key_handler import GitlabApiKeyHandler
from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model
from augur.application.db.models import UserRepo
from augur.application.db.session import DatabaseSession
Expand All @@ -25,6 +27,8 @@
from augur.application.cli import test_connection, test_db_connection, with_database, DatabaseContext
from augur.application.cli._cli_util import _broadcast_signal_to_processes, raise_open_file_limit, clear_redis_caches, clear_rabbitmq_messages

from keyman.KeyClient import KeyClient, KeyPublisher

logger = AugurLogger("augur", reset_logfiles=False).get_logger()

@click.group('server', short_help='Commands for controlling the backend API server & data collection workers')
Expand All @@ -51,6 +55,26 @@ def start(ctx, development):
logger.error("Failed to raise open file limit!")
raise e

keypub = KeyPublisher()

orchestrator = subprocess.Popen("python keyman/Orchestrator.py".split())

# Wait for orchestrator startup
if not keypub.wait(republish=True):
logger.critical("Key orchestrator did not respond in time")
return

# load keys
ghkeyman = GithubApiKeyHandler(logger)
glkeyman = GitlabApiKeyHandler(logger)

for key in ghkeyman.keys:
keypub.publish(key, "github_rest")
keypub.publish(key, "github_graphql")

for key in glkeyman.keys:
keypub.publish(key, "gitlab_rest")

if development:
os.environ["AUGUR_DEV"] = "1"
logger.info("Starting in development mode")
Expand Down Expand Up @@ -94,6 +118,8 @@ def start(ctx, development):
if p:
p.terminate()

keypub.shutdown()

if celery_beat_process:
logger.info("Shutting down celery beat process")
celery_beat_process.terminate()
Expand Down
18 changes: 13 additions & 5 deletions augur/tasks/github/util/github_data_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import httpx
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception, RetryError
from urllib.parse import urlparse, parse_qs, urlencode
from keyman.KeyClient import KeyClient


class RatelimitException(Exception):
Expand All @@ -21,7 +22,8 @@ class GithubDataAccess:
def __init__(self, key_manager, logger: logging.Logger):

self.logger = logger
self.key_manager = key_manager
self.key_client = KeyClient("github_rest", logger)
self.key = None

def get_resource_count(self, url):

Expand Down Expand Up @@ -93,7 +95,12 @@ def make_request(self, url, method="GET", timeout=100):

with httpx.Client() as client:

response = client.request(method=method, url=url, auth=self.key_manager, timeout=timeout, follow_redirects=True)
if not self.key:
self.key = self.key_client.request()

headers = {"Authorization": f"token {self.key}"}

response = client.request(method=method, url=url, headers=headers, timeout=timeout, follow_redirects=True)

if response.status_code in [403, 429]:
raise RatelimitException(response)
Expand Down Expand Up @@ -121,7 +128,7 @@ def __make_request_with_retries(self, url, method="GET", timeout=100):
1. Retires 10 times
2. Waits 5 seconds between retires
3. Does not rety UrlNotFoundException
4. Catches RatelimitException and waits before raising exception
4. Catches RatelimitException and waits or expires key before raising exception
"""

try:
Expand Down Expand Up @@ -150,8 +157,9 @@ def __handle_github_ratelimit_response(self, response):
self.logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}")
key_reset_time = 0

self.logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)")
time.sleep(key_reset_time)
self.logger.info(f"\n\n\nAPI rate limit exceeded. Key resets in {key_reset_time} seconds. Informing key manager that key is expired")
self.key = self.key_client.expire(self.key, epoch_when_key_resets)

else:
time.sleep(60)

Expand Down
5 changes: 3 additions & 2 deletions augur/tasks/gitlab/gitlab_api_key_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,9 @@ def get_api_keys(self) -> List[str]:
# add all the keys to redis
self.redis_key_list.extend(valid_keys)

if not valid_keys:
raise NoValidKeysError("No valid gitlab api keys found in the config or worker oauth table")
# Removed because most people do not collect gitlab and this blows up on startup if they don't have any gitlab keys
# if not valid_keys:
# raise NoValidKeysError("No valid gitlab api keys found in the config or worker oauth table")


# shuffling the keys so not all processes get the same keys in the same order
Expand Down
Loading

0 comments on commit 0e26d37

Please sign in to comment.