From 431f58e4e47e5bea6da5c5755033805399cd9b2e Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 30 Aug 2024 06:54:13 -0500 Subject: [PATCH 01/19] add alembic script to add repo_src_id --- .../alembic/versions/30_add_repo_src_id.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 augur/application/schema/alembic/versions/30_add_repo_src_id.py diff --git a/augur/application/schema/alembic/versions/30_add_repo_src_id.py b/augur/application/schema/alembic/versions/30_add_repo_src_id.py new file mode 100644 index 000000000..c43c409ee --- /dev/null +++ b/augur/application/schema/alembic/versions/30_add_repo_src_id.py @@ -0,0 +1,23 @@ +"""Add commit message table + +Revision ID: 30 +Revises: 29 +Create Date: 2024-08-30 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = '30' +down_revision = '29' +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column('repo', sa.Column('repo_src_id', sa.BigInteger(), nullable=True), schema='augur_data') + + +def downgrade(): + op.drop_column('repo', 'repo_src_id', schema='augur_data') From 8214dc5bd74103b0cca0e142c24d8ffbf5f72341 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 2 Sep 2024 15:40:49 -0500 Subject: [PATCH 02/19] add task to populate repo src id --- .../tasks/github/util/populate_repo_src_id.py | 57 +++++++++++++++++++ augur/tasks/init/celery_app.py | 3 +- augur/tasks/start_tasks.py | 2 + 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 augur/tasks/github/util/populate_repo_src_id.py diff --git a/augur/tasks/github/util/populate_repo_src_id.py b/augur/tasks/github/util/populate_repo_src_id.py new file mode 100644 index 000000000..8c5c088d3 --- /dev/null +++ b/augur/tasks/github/util/populate_repo_src_id.py @@ -0,0 +1,57 @@ +import logging +import sqlalchemy as s + +from augur.tasks.init.celery_app import celery_app as celery +from augur.application.db.lib import get_repo_by_repo_git, execute_sql +from augur.tasks.github.util.util import get_owner_repo +from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess +from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth + +@celery.task +def populate_repo_src_id_task(repo_git): + + logger = logging.getLogger(populate_repo_src_id_task.__name__) + + repo_obj = get_repo_by_repo_git(repo_git) + repo_id = repo_obj.repo_id + + owner, repo = get_owner_repo(repo_git) + + key_auth = GithubRandomKeyAuth(logger) + + repo_src_id = get_repo_src_id(owner, repo, logger, key_auth) + + update_repo_src_id(repo_id, repo_src_id) + + +def get_repo_src_id(owner, repo, logger, key_auth): + + + query = """query($repo: String!, $owner: String!) { + repository(name: $repo, owner: $owner) { + updatedAt + } + } + """ + + github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) + + variables = { + "owner": owner, + "repo": repo + } + + result_keys = ["repository", "databaseId"] + + repo_src_id = github_graphql_data_access.get_resource(query, variables, result_keys) + + return repo_src_id + + + +def update_repo_src_id(repo_id, repo_src_id): + + query = s.sql.text("""UPDATE repo SET repo_src_id=:repo_src_id WHERE repo_id=:repo_id; + """).bindparams(repo_src_id=repo_src_id, repo_id=repo_id) + + execute_sql(query) diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index da97751db..562ddcb90 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -37,7 +37,8 @@ 'augur.tasks.github.detect_move.tasks', 'augur.tasks.github.pull_requests.files_model.tasks', 'augur.tasks.github.pull_requests.commits_model.tasks', - 'augur.tasks.github.traffic'] + 'augur.tasks.github.traffic', + 'augur.tasks.github.util.populate_repo_src_id'] gitlab_tasks = ['augur.tasks.gitlab.merge_request_task', 'augur.tasks.gitlab.issues_task', diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 562069ce8..785470ce8 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -14,6 +14,7 @@ from augur.tasks.github.repo_info.tasks import collect_repo_info, collect_linux_badge_info from augur.tasks.github.pull_requests.files_model.tasks import process_pull_request_files from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits +from augur.tasks.github.util.populate_repo_src_id import populate_repo_src_id_task from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic import collect_github_repo_clones_data from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_metadata, collect_merge_request_commits, collect_merge_request_files, collect_merge_request_comments @@ -65,6 +66,7 @@ def primary_repo_collect_phase(repo_git, full_collection): #Define primary group of jobs for the primary collect phase: issues and pull requests. primary_repo_jobs = group( + populate_repo_src_id_task.si(repo_git) collect_issues.si(repo_git, full_collection), collect_pull_requests.si(repo_git, full_collection) ) From 9ad160c087d008307b0186f60ac375bd53f19065 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 7 Sep 2024 11:31:11 -0500 Subject: [PATCH 03/19] move get_repo_src_id to generic location --- .../tasks/github/util/populate_repo_src_id.py | 33 ++----------------- augur/tasks/github/util/util.py | 27 +++++++++++++++ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/augur/tasks/github/util/populate_repo_src_id.py b/augur/tasks/github/util/populate_repo_src_id.py index 8c5c088d3..e5460f9b1 100644 --- a/augur/tasks/github/util/populate_repo_src_id.py +++ b/augur/tasks/github/util/populate_repo_src_id.py @@ -4,8 +4,8 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.application.db.lib import get_repo_by_repo_git, execute_sql from augur.tasks.github.util.util import get_owner_repo -from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess -from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth +from augur.tasks.github.util import get_repo_src_id + @celery.task def populate_repo_src_id_task(repo_git): @@ -17,38 +17,11 @@ def populate_repo_src_id_task(repo_git): owner, repo = get_owner_repo(repo_git) - key_auth = GithubRandomKeyAuth(logger) - - repo_src_id = get_repo_src_id(owner, repo, logger, key_auth) + repo_src_id = get_repo_src_id(owner, repo, logger) update_repo_src_id(repo_id, repo_src_id) -def get_repo_src_id(owner, repo, logger, key_auth): - - - query = """query($repo: String!, $owner: String!) { - repository(name: $repo, owner: $owner) { - updatedAt - } - } - """ - - github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) - - variables = { - "owner": owner, - "repo": repo - } - - result_keys = ["repository", "databaseId"] - - repo_src_id = github_graphql_data_access.get_resource(query, variables, result_keys) - - return repo_src_id - - - def update_repo_src_id(repo_id, repo_src_id): query = s.sql.text("""UPDATE repo SET repo_src_id=:repo_src_id WHERE repo_id=:repo_id; diff --git a/augur/tasks/github/util/util.py b/augur/tasks/github/util/util.py index 8dd6e4d81..feb3c0e37 100644 --- a/augur/tasks/github/util/util.py +++ b/augur/tasks/github/util/util.py @@ -4,9 +4,36 @@ import json import httpx from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth +from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess from augur.application.db.lib import get_repo_by_repo_git from augur.tasks.util.worker_util import calculate_date_weight_from_timestamps +def get_repo_src_id(owner, repo, logger): + + + query = """query($repo: String!, $owner: String!) { + repository(name: $repo, owner: $owner) { + databaseId + } + } + """ + + key_auth = GithubRandomKeyAuth(logger) + + github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) + + variables = { + "owner": owner, + "repo": repo + } + + result_keys = ["repository", "databaseId"] + + repo_src_id = github_graphql_data_access.get_resource(query, variables, result_keys) + + return repo_src_id + + # This function adds a key value pair to a list of dicts and returns the modified list of dicts back def add_key_value_pair_to_dicts(data: List[dict], key: str, value: Any) -> List[dict]: From db7e9efdce33c16813b05d9428a4a797b72c8b4a Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 7 Sep 2024 11:37:11 -0500 Subject: [PATCH 04/19] fix syntax issues --- augur/tasks/github/util/populate_repo_src_id.py | 4 +--- augur/tasks/start_tasks.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/augur/tasks/github/util/populate_repo_src_id.py b/augur/tasks/github/util/populate_repo_src_id.py index e5460f9b1..e4aad54ab 100644 --- a/augur/tasks/github/util/populate_repo_src_id.py +++ b/augur/tasks/github/util/populate_repo_src_id.py @@ -3,9 +3,7 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.application.db.lib import get_repo_by_repo_git, execute_sql -from augur.tasks.github.util.util import get_owner_repo -from augur.tasks.github.util import get_repo_src_id - +from augur.tasks.github.util.util import get_owner_repo, get_repo_src_id @celery.task def populate_repo_src_id_task(repo_git): diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 785470ce8..ab4cf217c 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -66,7 +66,7 @@ def primary_repo_collect_phase(repo_git, full_collection): #Define primary group of jobs for the primary collect phase: issues and pull requests. primary_repo_jobs = group( - populate_repo_src_id_task.si(repo_git) + populate_repo_src_id_task.si(repo_git), collect_issues.si(repo_git, full_collection), collect_pull_requests.si(repo_git, full_collection) ) From 7c6ea1195f6d8a739fb0727213cff99a30ceccc0 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 9 Sep 2024 18:31:04 -0500 Subject: [PATCH 05/19] first run at new algorithm --- augur/application/db/lib.py | 32 +++- augur/application/db/models/augur_data.py | 1 + augur/tasks/frontend.py | 154 ++++++++++++++++-- .../github/util/github_graphql_data_access.py | 27 +-- 4 files changed, 188 insertions(+), 26 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 8db495c76..e65f4665d 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -9,7 +9,7 @@ from psycopg2.errors import DeadlockDetected from typing import List, Any, Optional, Union -from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus +from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus, UserGroup, RepoGroup from augur.tasks.util.collection_state import CollectionState from augur.application.db import get_session, get_engine from augur.application.db.util import execute_session_query @@ -144,6 +144,15 @@ def get_repo_by_repo_id(repo_id): return repo +def get_repo_by_src_id(src_id): + + with get_session() as session: + + query = session.query(Repo).filter(Repo.repo_src_id == src_id) + repo = execute_session_query(query, 'first') + + return repo + def remove_working_commits_by_repo_id_and_hashes(repo_id, commit_hashes): remove_working_commits = s.sql.text("""DELETE FROM working_commits @@ -537,3 +546,24 @@ def get_updated_issues(repo_id, since): with get_session() as session: return session.query(Issue).filter(Issue.repo_id == repo_id, Issue.updated_at >= since).order_by(Issue.gh_issue_number).all() + + +def get_group_by_name(user_id, group_name): + + + with get_session() as session: + + try: + user_group = session.query(UserGroup).filter(UserGroup.user_id == user_id, UserGroup.name == group_name).one() + except s.orm.exc.NoResultFound: + return None + + return user_group.group_id + +def get_repo_group_by_name(name): + + + with get_session() as session: + + return session.query(RepoGroup).filter(RepoGroup.rg_name == name).first() + \ No newline at end of file diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 934949138..afd4c685c 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -869,6 +869,7 @@ class Repo(Base): data_collection_date = Column( TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP") ) + repo_src_id = Column(BigInteger) repo_group = relationship("RepoGroup", back_populates="repo") user_repo = relationship("UserRepo", back_populates="repo") diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index fffd79d33..669d8c1cd 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -1,8 +1,14 @@ import logging import re +import sqlalchemy as s from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.github.util.github_task_session import GithubTaskSession +from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess +from augur.application.db.lib import get_group_by_name, get_repo_group_by_name, get_repo_by_repo_git, get_repo_by_src_id +from augur.tasks.github.util.util import get_owner_repo +from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME + from augur.application.db.models import UserRepo, Repo, User def parse_org_name(string): @@ -15,6 +21,91 @@ def parse_org_and_repo_name(string): match = re.match(r'^\/?([a-zA-Z0-9_-]+)\/([a-zA-Z0-9_-]+)\/?$', string) return match +@celery.task +def add_orgs_and_repos(user_id, group_name, org_urls, repo_urls): + + logger = logging.getLogger(add_org_repo_list.__name__) + + with GithubTaskSession(logger) as session: + + user = User.get_by_id(session, user_id) + + group = get_group_by_name(session, user_id, group_name) + if not group: + return False, {"status": "Invalid group name"} + + group_id = group.group_id + + for url in org_urls: + org_repos, _ = retrieve_owner_repos(url) + if not org_repos: + continue + + repo_urls.extend(org_repos) + + + data = get_repos_data(repo_urls, session, logger) + + for url in repo_urls: + + repo_data = data[url] + if not repo_data: + # skip since the repo doesn't exists + continue + + repo_type = repo_data["databaseId"] + repo_src_id = repo_data["owner"]["__typename"] + + try: + repo = get_repo_by_repo_git(url) + except s.orm.exc.NoResultFound: + # log a warning + continue + + repo = get_repo_by_src_id(repo_src_id) + if repo: + # log a warning + continue + + frontend_repo_group = get_repo_group_by_name(FRONTEND_REPO_GROUP_NAME) + if not frontend_repo_group: + return False, {"status": "Could not find repo group with name 'Frontend Repos'", "repo_url": url} + + repo_group_id = frontend_repo_group.repo_group_id + + + # These two things really need to be done in one commit + repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type) + if not repo_id: + # log a warning + continue + + result = UserRepo.insert(session, repo_id, group_id) + if not result: + # log a warning + continue + + + # repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type) + # if not repo_id: + # return False, {"status": "Repo insertion failed", "repo_url": url} + + # result = UserRepo.insert(session, repo_id, group_id) + # if not result: + # return False, {"status": "repo_user insertion failed", "repo_url": url} + + #collection_status records are now only added during collection -IM 5/1/23 + #status = CollectionStatus.insert(session, repo_id) + #if not status: + # return False, {"status": "Failed to create status for repo", "repo_url": url} + + return True, {"status": "Repo Added", "repo_url": url} + +@celery.task +def add_org(): + + pass + @celery.task def add_org_repo_list(user_id, group_name, urls): @@ -75,24 +166,61 @@ def add_org_repo_list(user_id, group_name, urls): # TODO: Change to github specific -@celery.task -def add_repo(user_id, group_name, repo_url): +# @celery.task +# def add_repo(user_id, group_name, repo_url): - logger = logging.getLogger(add_org.__name__) +# logger = logging.getLogger(add_org.__name__) - with GithubTaskSession(logger) as session: - result = UserRepo.add_github_repo(session, repo_url, user_id, group_name) +# with GithubTaskSession(logger) as session: +# result = UserRepo.add_github_repo(session, repo_url, user_id, group_name) - print(repo_url, result) +# print(repo_url, result) -# TODO: Change to github specific -@celery.task -def add_org(user_id, group_name, org_url): +# # TODO: Change to github specific +# @celery.task +# def add_org(user_id, group_name, org_url): - logger = logging.getLogger(add_org.__name__) +# logger = logging.getLogger(add_org.__name__) - with GithubTaskSession(logger) as session: - result = UserRepo.add_github_org_repos(session, org_url, user_id, group_name) +# with GithubTaskSession(logger) as session: +# result = UserRepo.add_github_org_repos(session, org_url, user_id, group_name) + +# print(org_url, result) + + + + + + + + + + + +def get_repos_data(repo_urls, session, logger): + + github_graphql_data_access = GithubGraphQlDataAccess(session.oauths, logger, ingore_not_found_error=True) + + query_parts = [] + repo_map = {} + for i, url in enumerate(repo_urls): + owner, repo = get_owner_repo(url) + query_parts.append(f"""{i}: repository(owner: "{owner}", name: "{repo}") {{ + databaseId, owner {{ __typename }} + }}""") + repo_map[url] = i + + query = f"query GetRepoIds {{ {' '.join(query_parts)}}}" + + data = github_graphql_data_access.get_resource(query, {}, []) + + result_data = {} + for url in repo_urls: + key =repo_map[url] + repo_data = data[key] + + result_data[url] = repo_data + + return result_data - print(org_url, result) diff --git a/augur/tasks/github/util/github_graphql_data_access.py b/augur/tasks/github/util/github_graphql_data_access.py index 7d8c6851e..96b0c6ab7 100644 --- a/augur/tasks/github/util/github_graphql_data_access.py +++ b/augur/tasks/github/util/github_graphql_data_access.py @@ -22,10 +22,11 @@ class InvalidDataException(Exception): class GithubGraphQlDataAccess: - def __init__(self, key_manager, logger: logging.Logger): + def __init__(self, key_manager, logger: logging.Logger, ingore_not_found_error=False): self.logger = logger self.key_manager = key_manager + self.ingore_not_found_error = ingore_not_found_error def get_resource(self, query, variables, result_keys): @@ -77,17 +78,19 @@ def make_request(self, query, variables, timeout=40): response.raise_for_status() - json_response = response.json() - if "errors" in json_response and len(json_response["errors"]) > 0: - errors = json_response["errors"] - - not_found_error = self.__find_first_error_of_type(errors, "NOT_FOUND") - - if not_found_error: - message = not_found_error.get("message", "Resource not found.") - raise NotFoundException(f"Could not find: {message}") - - raise Exception(f"Github Graphql Data Access Errors: {errors}") + if not self.ingore_not_found_error: + + json_response = response.json() + if "errors" in json_response and len(json_response["errors"]) > 0: + errors = json_response["errors"] + + not_found_error = self.__find_first_error_of_type(errors, "NOT_FOUND") + + if not_found_error: + message = not_found_error.get("message", "Resource not found.") + raise NotFoundException(f"Could not find: {message}") + + raise Exception(f"Github Graphql Data Access Errors: {errors}") return response From 652d7f394da11d39dd3fb1a35798b62f02360348 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 9 Sep 2024 19:22:37 -0500 Subject: [PATCH 06/19] insert src_id and make it unique --- augur/application/db/models/augur_data.py | 5 +++-- .../schema/alembic/versions/30_add_repo_src_id.py | 2 ++ augur/tasks/frontend.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index afd4c685c..ddc9a14c4 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -1111,7 +1111,7 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source): return result[0]["repo_id"] @staticmethod - def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type): + def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type, repo_src_id): """Add a repo to the repo table. Args: @@ -1146,7 +1146,8 @@ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_ "repo_type": repo_type, "tool_source": tool_source, "tool_version": "1.0", - "data_source": "Git" + "data_source": "Git", + "repo_src_id": repo_src_id } repo_unique = ["repo_git"] diff --git a/augur/application/schema/alembic/versions/30_add_repo_src_id.py b/augur/application/schema/alembic/versions/30_add_repo_src_id.py index c43c409ee..d47eb568c 100644 --- a/augur/application/schema/alembic/versions/30_add_repo_src_id.py +++ b/augur/application/schema/alembic/versions/30_add_repo_src_id.py @@ -17,7 +17,9 @@ def upgrade(): op.add_column('repo', sa.Column('repo_src_id', sa.BigInteger(), nullable=True), schema='augur_data') + op.create_unique_constraint('repo_src_id_unique', 'repo', ['repo_src_id'], schema='augur_data') def downgrade(): + op.drop_constraint('repo_src_id_unique', 'repo', schema='augur_data', type_='unique') op.drop_column('repo', 'repo_src_id', schema='augur_data') diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index 669d8c1cd..c3bc80dd0 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -75,7 +75,7 @@ def add_orgs_and_repos(user_id, group_name, org_urls, repo_urls): # These two things really need to be done in one commit - repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type) + repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type, repo_src_id) if not repo_id: # log a warning continue From 3d89a4f870385ab7d12055fec195020b91f5d0ea Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 10 Sep 2024 18:05:29 -0500 Subject: [PATCH 07/19] improve algorithm --- augur/api/view/api.py | 23 +++- augur/tasks/frontend.py | 272 ++++++++++++++++++++++------------------ 2 files changed, 171 insertions(+), 124 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index cbd7e4a0f..a9653b9f9 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -2,7 +2,7 @@ import re from flask_login import current_user, login_required from augur.application.db.models import Repo, RepoGroup, UserGroup, UserRepo -from augur.tasks.frontend import add_org_repo_list, parse_org_and_repo_name, parse_org_name +from augur.tasks.frontend import add_orgs_and_repos, parse_org_and_repo_name, parse_org_name from .utils import * from ..server import app from augur.application.db.session import DatabaseSession @@ -68,11 +68,16 @@ def av_add_user_repo(): invalid_urls = [] + orgs = [] + repo_urls = [] with DatabaseSession(logger, current_app.engine) as session: for url in urls: # matches https://github.com/{org}/ or htts://github.com/{org} if (org_name := Repo.parse_github_org_url(url)): + + orgs.append(org_name) + rg_obj = RepoGroup.get_by_name(session, org_name) if rg_obj: # add the orgs repos to the group @@ -80,6 +85,9 @@ def av_add_user_repo(): # matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo} elif Repo.parse_github_repo_url(url)[0]: + + repo_urls.append(url) + org_name, repo_name = Repo.parse_github_repo_url(url) repo_git = f"https://github.com/{org_name}/{repo_name}" repo_obj = Repo.get_by_repo_git(session, repo_git) @@ -90,6 +98,9 @@ def av_add_user_repo(): elif (match := parse_org_and_repo_name(url)): org, repo = match.groups() repo_git = f"https://github.com/{org}/{repo}" + + repo_urls.append(repo_git) + repo_obj = Repo.get_by_repo_git(session, repo_git) if repo_obj: add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) @@ -97,6 +108,9 @@ def av_add_user_repo(): # matches /{org}/ or /{org} or {org}/ or {org} elif (match := parse_org_name(url)): org_name = match.group(1) + + orgs.append(org) + rg_obj = RepoGroup.get_by_name(session, org_name) logger.info(rg_obj) if rg_obj: @@ -117,9 +131,10 @@ def av_add_user_repo(): else: invalid_urls.append(url) - if urls: - urls = [url.lower() for url in urls] - add_org_repo_list.si(current_user.user_id, group, urls).apply_async() + if orgs or repo_urls: + repo_urls = [url.lower() for url in repo_urls] + orgs = [url.lower() for url in orgs] + add_orgs_and_repos.si(current_user.user_id, group, orgs, repo_urls).apply_async() flash("Adding repos and orgs in the background") diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index c3bc80dd0..e726468be 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -7,7 +7,7 @@ from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess from augur.application.db.lib import get_group_by_name, get_repo_group_by_name, get_repo_by_repo_git, get_repo_by_src_id from augur.tasks.github.util.util import get_owner_repo -from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME +from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup from augur.application.db.models import UserRepo, Repo, User @@ -22,144 +22,201 @@ def parse_org_and_repo_name(string): return match @celery.task -def add_orgs_and_repos(user_id, group_name, org_urls, repo_urls): +def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): - logger = logging.getLogger(add_org_repo_list.__name__) + logger = logging.getLogger(add_orgs_and_repos.__name__) with GithubTaskSession(logger) as session: - user = User.get_by_id(session, user_id) - + # determine group id from name group = get_group_by_name(session, user_id, group_name) if not group: - return False, {"status": "Invalid group name"} + logger.error(f"Error while adding repo. Invalid group name of {group_name}. Cannot insert repos") + return group_id = group.group_id - for url in org_urls: + # get frontend repo group + frontend_repo_group = RepoGroup.get_by_name(FRONTEND_REPO_GROUP_NAME) + if not frontend_repo_group: + logger.error("Error while adding repo: Could not find frontend repo group so repos cannot be inserted") + return + + repo_group_id = frontend_repo_group.repo_group_id + + + # define repo_data and assoicate repos with frontend repo group + repo_data = [tuple(url, repo_group_id) for url in repo_urls] + + for org in orgs: + + # create repo group for org if it doesn't exist + repo_group = RepoGroup.get_by_name(org) + if not repo_group: + repo_group = create_repo_group(session, org) + + # retrieve repo urls for org org_repos, _ = retrieve_owner_repos(url) if not org_repos: continue - repo_urls.extend(org_repos) + # define urls and repo_group_id of org and then add to repo_data + org_repo_data = [tuple(url, repo_group.repo_group_id) for url in org_repos] + repo_data.extend(org_repo_data) - data = get_repos_data(repo_urls, session, logger) + # get data for repos to determine type, src id, and if they exist + data = get_repos_data(repo_data, session, logger) - for url in repo_urls: + for url, repo_group_id in repo_data: repo_data = data[url] if not repo_data: - # skip since the repo doesn't exists + # skip since cause the repo is not valid (doesn't exist likely) continue - repo_type = repo_data["databaseId"] - repo_src_id = repo_data["owner"]["__typename"] + repo_src_id = repo_data["databaseId"] + repo_type = repo_data["owner"]["__typename"] - try: - repo = get_repo_by_repo_git(url) - except s.orm.exc.NoResultFound: - # log a warning + repo = get_repo_by_repo_git(session, url) + if repo: + # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record + add_existing_repo_to_group(logger, session, user_id, group_name, repo.repo_id) + logger.warning(f"Error while adding repo: Repo already exists with {url}") continue repo = get_repo_by_src_id(repo_src_id) if repo: - # log a warning + # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record + add_existing_repo_to_group(logger, session, user_id, group_name, repo.repo_id) + logger.warning(f"Error while adding repo: Repo found with same src id. Inserting url: {url}. Inserting src_id {repo_src_id}") continue - frontend_repo_group = get_repo_group_by_name(FRONTEND_REPO_GROUP_NAME) - if not frontend_repo_group: - return False, {"status": "Could not find repo group with name 'Frontend Repos'", "repo_url": url} + add_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id) - repo_group_id = frontend_repo_group.repo_group_id + return - # These two things really need to be done in one commit - repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type, repo_src_id) - if not repo_id: - # log a warning - continue +def get_repos_data(repo_data, session, logger): - result = UserRepo.insert(session, repo_id, group_id) - if not result: - # log a warning - continue + repo_urls = [x[0] for x in repo_data] + + github_graphql_data_access = GithubGraphQlDataAccess(session.oauths, logger, ingore_not_found_error=True) + + query_parts = [] + repo_map = {} + for i, url in enumerate(repo_urls): + owner, repo = get_owner_repo(url) + query_parts.append(f"""{i}: repository(owner: "{owner}", name: "{repo}") {{ + databaseId, owner {{ __typename }} + }}""") + repo_map[url] = i + + query = f"query GetRepoIds {{ {' '.join(query_parts)}}}" + data = github_graphql_data_access.get_resource(query, {}, []) - # repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type) - # if not repo_id: - # return False, {"status": "Repo insertion failed", "repo_url": url} + result_data = {} + for url in repo_urls: + key =repo_map[url] + repo_data = data[key] - # result = UserRepo.insert(session, repo_id, group_id) - # if not result: - # return False, {"status": "repo_user insertion failed", "repo_url": url} + result_data[url] = repo_data + + return result_data - #collection_status records are now only added during collection -IM 5/1/23 - #status = CollectionStatus.insert(session, repo_id) - #if not status: - # return False, {"status": "Failed to create status for repo", "repo_url": url} +def get_repo_by_repo_git(session, url): - return True, {"status": "Repo Added", "repo_url": url} + return session.query(Repo).filter(Repo.repo_git == url).first() -@celery.task -def add_org(): - pass +def add_existing_repo_to_group(logger, session, user_id, group_name, repo_id): + logger.info("Adding existing repo to group") -@celery.task -def add_org_repo_list(user_id, group_name, urls): + group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name) + if group_id is None: + return False + + result = UserRepo.insert(session, repo_id, group_id) + if not result: + return False + +def create_repo_group(session, owner): - logger = logging.getLogger(add_org_repo_list.__name__) + repo_group = RepoGroup(rg_name=owner.lower(), rg_description="", rg_website="", rg_recache=0, rg_type="Unknown", + tool_source="Loaded by user", tool_version="1.0", data_source="Git") + session.add(repo_group) + session.commit() - with GithubTaskSession(logger) as session: + return repo_group + +def add_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id): + + # These two things really need to be done in one commit in the future to prevent one existing without the other + repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type, repo_src_id) + if not repo_id: + logger.error("Error while adding repo: Failed to insert github repo") + return + + result = UserRepo.insert(session, repo_id, group_id) + if not result: + logger.error(f"Error while adding repo: Failed to insert user repo record. A record with a repo_id of {repo_id} and a group id of {group_id} needs to be added to the user repo table so that this repo shows up in the users group") + return + +# @celery.task +# def add_org_repo_list(user_id, group_name, urls): + +# logger = logging.getLogger(add_org_repo_list.__name__) + +# with GithubTaskSession(logger) as session: - user = User.get_by_id(session, user_id) - - invalid_urls = [] - valid_orgs = [] - valid_repos = [] - for url in urls: - - # matches https://github.com/{org}/ or http://github.com/{org} - if Repo.parse_github_org_url(url): - added = user.add_github_org(group_name, url)[0] - if added: - valid_orgs.append(url) - - # matches https://github.com/{org}/{repo}/ or http://github.com/{org}/{repo} - elif Repo.parse_github_repo_url(url)[0]: - added = user.add_github_repo(group_name, url)[0] - if added: - valid_repos.append(url) - - # matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo} - elif (match := parse_org_and_repo_name(url)): - org, repo = match.groups() - repo_url = f"https://github.com/{org}/{repo}/" - added = user.add_github_repo(group_name, repo_url)[0] - if added: - valid_repos.append(url) - - # matches /{org}/ or /{org} or {org}/ or {org} - elif (match := parse_org_name(url)): - org = match.group(1) - org_url = f"https://github.com/{org}/" - added = user.add_github_org(group_name, org_url)[0] - if added: - valid_orgs.append(url) - - # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} - elif Repo.parse_gitlab_repo_url(url)[0]: - - added = user.add_gitlab_repo(group_name, url)[0] - if added: - valid_repos.append(url) - - else: - invalid_urls.append(url) - - return valid_orgs, valid_repos, invalid_urls +# user = User.get_by_id(session, user_id) + +# invalid_urls = [] +# valid_orgs = [] +# valid_repos = [] +# for url in urls: + +# # matches https://github.com/{org}/ or http://github.com/{org} +# if Repo.parse_github_org_url(url): +# added = user.add_github_org(group_name, url)[0] +# if added: +# valid_orgs.append(url) + +# # matches https://github.com/{org}/{repo}/ or http://github.com/{org}/{repo} +# elif Repo.parse_github_repo_url(url)[0]: +# added = user.add_github_repo(group_name, url)[0] +# if added: +# valid_repos.append(url) + +# # matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo} +# elif (match := parse_org_and_repo_name(url)): +# org, repo = match.groups() +# repo_url = f"https://github.com/{org}/{repo}/" +# added = user.add_github_repo(group_name, repo_url)[0] +# if added: +# valid_repos.append(url) + +# # matches /{org}/ or /{org} or {org}/ or {org} +# elif (match := parse_org_name(url)): +# org = match.group(1) +# org_url = f"https://github.com/{org}/" +# added = user.add_github_org(group_name, org_url)[0] +# if added: +# valid_orgs.append(url) + +# # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} +# elif Repo.parse_gitlab_repo_url(url)[0]: + +# added = user.add_gitlab_repo(group_name, url)[0] +# if added: +# valid_repos.append(url) + +# else: +# invalid_urls.append(url) + +# return valid_orgs, valid_repos, invalid_urls @@ -198,29 +255,4 @@ def add_org_repo_list(user_id, group_name, urls): -def get_repos_data(repo_urls, session, logger): - - github_graphql_data_access = GithubGraphQlDataAccess(session.oauths, logger, ingore_not_found_error=True) - - query_parts = [] - repo_map = {} - for i, url in enumerate(repo_urls): - owner, repo = get_owner_repo(url) - query_parts.append(f"""{i}: repository(owner: "{owner}", name: "{repo}") {{ - databaseId, owner {{ __typename }} - }}""") - repo_map[url] = i - - query = f"query GetRepoIds {{ {' '.join(query_parts)}}}" - - data = github_graphql_data_access.get_resource(query, {}, []) - - result_data = {} - for url in repo_urls: - key =repo_map[url] - repo_data = data[key] - - result_data[url] = repo_data - - return result_data From 02d1a177dedc488610d6d6dae99a0b8d2e41f835 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 10 Sep 2024 18:11:00 -0500 Subject: [PATCH 08/19] remove old code --- augur/api/view/api.py | 40 ++++++++++++++++++++-------------------- augur/tasks/frontend.py | 1 + 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index a9653b9f9..bb18fbf4a 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -78,21 +78,21 @@ def av_add_user_repo(): orgs.append(org_name) - rg_obj = RepoGroup.get_by_name(session, org_name) - if rg_obj: - # add the orgs repos to the group - add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) + # rg_obj = RepoGroup.get_by_name(session, org_name) + # if rg_obj: + # # add the orgs repos to the group + # add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) # matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo} elif Repo.parse_github_repo_url(url)[0]: repo_urls.append(url) - org_name, repo_name = Repo.parse_github_repo_url(url) - repo_git = f"https://github.com/{org_name}/{repo_name}" - repo_obj = Repo.get_by_repo_git(session, repo_git) - if repo_obj: - add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) + # org_name, repo_name = Repo.parse_github_repo_url(url) + # repo_git = f"https://github.com/{org_name}/{repo_name}" + # repo_obj = Repo.get_by_repo_git(session, repo_git) + # if repo_obj: + # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) # matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo} elif (match := parse_org_and_repo_name(url)): @@ -101,9 +101,9 @@ def av_add_user_repo(): repo_urls.append(repo_git) - repo_obj = Repo.get_by_repo_git(session, repo_git) - if repo_obj: - add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) + # repo_obj = Repo.get_by_repo_git(session, repo_git) + # if repo_obj: + # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) # matches /{org}/ or /{org} or {org}/ or {org} elif (match := parse_org_name(url)): @@ -111,11 +111,11 @@ def av_add_user_repo(): orgs.append(org) - rg_obj = RepoGroup.get_by_name(session, org_name) - logger.info(rg_obj) - if rg_obj: - # add the orgs repos to the group - add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) + # rg_obj = RepoGroup.get_by_name(session, org_name) + # logger.info(rg_obj) + # if rg_obj: + # # add the orgs repos to the group + # add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} elif Repo.parse_gitlab_repo_url(url)[0]: @@ -124,9 +124,9 @@ def av_add_user_repo(): repo_git = f"https://gitlab.com/{org_name}/{repo_name}" # TODO: gitlab ensure the whole repo git is inserted so it can be found here - repo_obj = Repo.get_by_repo_git(session, repo_git) - if repo_obj: - add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) + # repo_obj = Repo.get_by_repo_git(session, repo_git) + # if repo_obj: + # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) else: invalid_urls.append(url) diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index e726468be..e20d539c2 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -21,6 +21,7 @@ def parse_org_and_repo_name(string): match = re.match(r'^\/?([a-zA-Z0-9_-]+)\/([a-zA-Z0-9_-]+)\/?$', string) return match +# TODO: Add support for gitlab @celery.task def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): From 35505e62fce35a689a1e86f399529c51ce8323e7 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 10 Sep 2024 19:27:24 -0500 Subject: [PATCH 09/19] fix syntax errors --- augur/api/view/api.py | 97 ++++++++++--------- augur/application/db/lib.py | 2 +- .../alembic/versions/30_add_repo_src_id.py | 2 +- augur/tasks/frontend.py | 22 ++--- 4 files changed, 64 insertions(+), 59 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index bb18fbf4a..9f5c4a235 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -48,6 +48,8 @@ def add_existing_org_to_group(session, user_id, group_name, rg_id): @login_required def av_add_user_repo(): + print("Adding user repos") + urls = request.form.get('urls') group = request.form.get("group_name") @@ -70,70 +72,73 @@ def av_add_user_repo(): orgs = [] repo_urls = [] - with DatabaseSession(logger, current_app.engine) as session: - for url in urls: + for url in urls: - # matches https://github.com/{org}/ or htts://github.com/{org} - if (org_name := Repo.parse_github_org_url(url)): + # matches https://github.com/{org}/ or htts://github.com/{org} + if (org_name := Repo.parse_github_org_url(url)): - orgs.append(org_name) + orgs.append(org_name) - # rg_obj = RepoGroup.get_by_name(session, org_name) - # if rg_obj: - # # add the orgs repos to the group - # add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) + # rg_obj = RepoGroup.get_by_name(session, org_name) + # if rg_obj: + # # add the orgs repos to the group + # add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) - # matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo} - elif Repo.parse_github_repo_url(url)[0]: + # matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo} + elif Repo.parse_github_repo_url(url)[0]: - repo_urls.append(url) + repo_urls.append(url) - # org_name, repo_name = Repo.parse_github_repo_url(url) - # repo_git = f"https://github.com/{org_name}/{repo_name}" - # repo_obj = Repo.get_by_repo_git(session, repo_git) - # if repo_obj: - # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) + # org_name, repo_name = Repo.parse_github_repo_url(url) + # repo_git = f"https://github.com/{org_name}/{repo_name}" + # repo_obj = Repo.get_by_repo_git(session, repo_git) + # if repo_obj: + # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) - # matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo} - elif (match := parse_org_and_repo_name(url)): - org, repo = match.groups() - repo_git = f"https://github.com/{org}/{repo}" + # matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo} + elif (match := parse_org_and_repo_name(url)): + org, repo = match.groups() + repo_git = f"https://github.com/{org}/{repo}" - repo_urls.append(repo_git) + repo_urls.append(repo_git) - # repo_obj = Repo.get_by_repo_git(session, repo_git) - # if repo_obj: - # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) - - # matches /{org}/ or /{org} or {org}/ or {org} - elif (match := parse_org_name(url)): - org_name = match.group(1) + # repo_obj = Repo.get_by_repo_git(session, repo_git) + # if repo_obj: + # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) + + # matches /{org}/ or /{org} or {org}/ or {org} + elif (match := parse_org_name(url)): + org_name = match.group(1) - orgs.append(org) + orgs.append(org_name) - # rg_obj = RepoGroup.get_by_name(session, org_name) - # logger.info(rg_obj) - # if rg_obj: - # # add the orgs repos to the group - # add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) + # rg_obj = RepoGroup.get_by_name(session, org_name) + # logger.info(rg_obj) + # if rg_obj: + # # add the orgs repos to the group + # add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) - # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} - elif Repo.parse_gitlab_repo_url(url)[0]: + # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} + elif Repo.parse_gitlab_repo_url(url)[0]: - org_name, repo_name = Repo.parse_github_repo_url(url) - repo_git = f"https://gitlab.com/{org_name}/{repo_name}" + org_name, repo_name = Repo.parse_github_repo_url(url) + repo_git = f"https://gitlab.com/{org_name}/{repo_name}" - # TODO: gitlab ensure the whole repo git is inserted so it can be found here - # repo_obj = Repo.get_by_repo_git(session, repo_git) - # if repo_obj: - # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) - - else: - invalid_urls.append(url) + # TODO: gitlab ensure the whole repo git is inserted so it can be found here + # repo_obj = Repo.get_by_repo_git(session, repo_git) + # if repo_obj: + # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) + + else: + invalid_urls.append(url) + + if orgs or repo_urls: repo_urls = [url.lower() for url in repo_urls] orgs = [url.lower() for url in orgs] + flash(f"Adding repos: {repo_urls}") + flash(f"Adding orgs: {orgs}") add_orgs_and_repos.si(current_user.user_id, group, orgs, repo_urls).apply_async() flash("Adding repos and orgs in the background") diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index e65f4665d..5b463ba78 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -558,7 +558,7 @@ def get_group_by_name(user_id, group_name): except s.orm.exc.NoResultFound: return None - return user_group.group_id + return user_group def get_repo_group_by_name(name): diff --git a/augur/application/schema/alembic/versions/30_add_repo_src_id.py b/augur/application/schema/alembic/versions/30_add_repo_src_id.py index d47eb568c..013890697 100644 --- a/augur/application/schema/alembic/versions/30_add_repo_src_id.py +++ b/augur/application/schema/alembic/versions/30_add_repo_src_id.py @@ -1,4 +1,4 @@ -"""Add commit message table +"""Add repo src id Revision ID: 30 Revises: 29 diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index e20d539c2..eae4b2682 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -7,7 +7,7 @@ from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess from augur.application.db.lib import get_group_by_name, get_repo_group_by_name, get_repo_by_repo_git, get_repo_by_src_id from augur.tasks.github.util.util import get_owner_repo -from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup +from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup, UserGroup from augur.application.db.models import UserRepo, Repo, User @@ -30,7 +30,7 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): with GithubTaskSession(logger) as session: # determine group id from name - group = get_group_by_name(session, user_id, group_name) + group = get_group_by_name(user_id, group_name) if not group: logger.error(f"Error while adding repo. Invalid group name of {group_name}. Cannot insert repos") return @@ -38,7 +38,7 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): group_id = group.group_id # get frontend repo group - frontend_repo_group = RepoGroup.get_by_name(FRONTEND_REPO_GROUP_NAME) + frontend_repo_group = RepoGroup.get_by_name(session, FRONTEND_REPO_GROUP_NAME) if not frontend_repo_group: logger.error("Error while adding repo: Could not find frontend repo group so repos cannot be inserted") return @@ -47,27 +47,28 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): # define repo_data and assoicate repos with frontend repo group - repo_data = [tuple(url, repo_group_id) for url in repo_urls] + repo_data = [(url, repo_group_id) for url in repo_urls] for org in orgs: # create repo group for org if it doesn't exist - repo_group = RepoGroup.get_by_name(org) + repo_group = RepoGroup.get_by_name(session, org) if not repo_group: repo_group = create_repo_group(session, org) # retrieve repo urls for org - org_repos, _ = retrieve_owner_repos(url) + org_repos, _ = retrieve_owner_repos(session, org) if not org_repos: continue # define urls and repo_group_id of org and then add to repo_data - org_repo_data = [tuple(url, repo_group.repo_group_id) for url in org_repos] + org_repo_data = [(url, repo_group.repo_group_id) for url in org_repos] repo_data.extend(org_repo_data) # get data for repos to determine type, src id, and if they exist data = get_repos_data(repo_data, session, logger) + print(f"Repo data: {data}") for url, repo_group_id in repo_data: @@ -83,14 +84,12 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): if repo: # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record add_existing_repo_to_group(logger, session, user_id, group_name, repo.repo_id) - logger.warning(f"Error while adding repo: Repo already exists with {url}") continue repo = get_repo_by_src_id(repo_src_id) if repo: # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record add_existing_repo_to_group(logger, session, user_id, group_name, repo.repo_id) - logger.warning(f"Error while adding repo: Repo found with same src id. Inserting url: {url}. Inserting src_id {repo_src_id}") continue add_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id) @@ -98,6 +97,7 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): return +# TODO: Make it only get like 100 at a time def get_repos_data(repo_data, session, logger): repo_urls = [x[0] for x in repo_data] @@ -108,10 +108,10 @@ def get_repos_data(repo_data, session, logger): repo_map = {} for i, url in enumerate(repo_urls): owner, repo = get_owner_repo(url) - query_parts.append(f"""{i}: repository(owner: "{owner}", name: "{repo}") {{ + query_parts.append(f"""repo_{i}: repository(owner: "{owner}", name: "{repo}") {{ databaseId, owner {{ __typename }} }}""") - repo_map[url] = i + repo_map[url] = f"repo_{i}" query = f"query GetRepoIds {{ {' '.join(query_parts)}}}" From f7baea0ccf3f8084b3ce6dd323b6e7899cf76475 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 16 Sep 2024 18:34:47 -0500 Subject: [PATCH 10/19] consider src id first --- augur/tasks/frontend.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index eae4b2682..9d811d04e 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -68,7 +68,6 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): # get data for repos to determine type, src id, and if they exist data = get_repos_data(repo_data, session, logger) - print(f"Repo data: {data}") for url, repo_group_id in repo_data: @@ -80,17 +79,17 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): repo_src_id = repo_data["databaseId"] repo_type = repo_data["owner"]["__typename"] - repo = get_repo_by_repo_git(session, url) + repo = get_repo_by_src_id(repo_src_id) if repo: # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record add_existing_repo_to_group(logger, session, user_id, group_name, repo.repo_id) - continue + continue - repo = get_repo_by_src_id(repo_src_id) + repo = get_repo_by_repo_git(session, url) if repo: # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record add_existing_repo_to_group(logger, session, user_id, group_name, repo.repo_id) - continue + continue add_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id) From 1966ad61dd2ab4a2ca00ef78f22942a9f65bb297 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 16 Sep 2024 18:49:55 -0500 Subject: [PATCH 11/19] process 100 repos at a time --- augur/api/view/api.py | 12 ------- augur/tasks/frontend.py | 77 +++++++++++++++++++++++------------------ 2 files changed, 43 insertions(+), 46 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index 9f5c4a235..ae5a8c250 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -14,18 +14,6 @@ def cache(file=None): return redirect(url_for('static', filename="cache")) return redirect(url_for('static', filename="cache/" + toCacheFilename(file, False))) - -def add_existing_repo_to_group(session, user_id, group_name, repo_id): - - logger.info("Adding existing repo to group") - - group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name) - if group_id is None: - return False - - result = UserRepo.insert(session, repo_id, group_id) - if not result: - return False def add_existing_org_to_group(session, user_id, group_name, rg_id): diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index 9d811d04e..e48e24d62 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -65,35 +65,57 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): org_repo_data = [(url, repo_group.repo_group_id) for url in org_repos] repo_data.extend(org_repo_data) + # break list of repos into lists of 100 so that graphql query isn't overwhelmed + for chunk in divide_list_into_chunks(repo_data, 100): - # get data for repos to determine type, src id, and if they exist - data = get_repos_data(repo_data, session, logger) + add_new_repos(chunk, group_id, session, logger) - for url, repo_group_id in repo_data: + return + - repo_data = data[url] - if not repo_data: - # skip since cause the repo is not valid (doesn't exist likely) - continue +def add_new_repos(repo_data, group_id, session, logger): - repo_src_id = repo_data["databaseId"] - repo_type = repo_data["owner"]["__typename"] + # get data for repos to determine type, src id, and if they exist + data = get_repos_data(repo_data, session, logger) - repo = get_repo_by_src_id(repo_src_id) - if repo: - # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record - add_existing_repo_to_group(logger, session, user_id, group_name, repo.repo_id) - continue + for url, repo_group_id in repo_data: - repo = get_repo_by_repo_git(session, url) - if repo: - # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record - add_existing_repo_to_group(logger, session, user_id, group_name, repo.repo_id) - continue + repo_data = data[url] + if not repo_data: + # skip since cause the repo is not valid (doesn't exist likely) + continue - add_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id) + repo_src_id = repo_data["databaseId"] + repo_type = repo_data["owner"]["__typename"] - return + repo = get_repo_by_src_id(repo_src_id) + if repo: + # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record + add_existing_repo_to_group(logger, session, group_id, repo.repo_id) + continue + + repo = get_repo_by_repo_git(session, url) + if repo: + # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record + add_existing_repo_to_group(logger, session, group_id, repo.repo_id) + continue + + add_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id) + + +def add_existing_repo_to_group(logger, session, group_id, repo_id): + + logger.info("Adding existing repo to group") + + result = UserRepo.insert(session, repo_id, group_id) + if not result: + return False + + +def divide_list_into_chunks(data, size): + + for i in range(0, len(data), size): + yield data[i:i + size] # TODO: Make it only get like 100 at a time @@ -128,19 +150,6 @@ def get_repos_data(repo_data, session, logger): def get_repo_by_repo_git(session, url): return session.query(Repo).filter(Repo.repo_git == url).first() - - -def add_existing_repo_to_group(logger, session, user_id, group_name, repo_id): - - logger.info("Adding existing repo to group") - - group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name) - if group_id is None: - return False - - result = UserRepo.insert(session, repo_id, group_id) - if not result: - return False def create_repo_group(session, owner): From cfed800b37016a493834f7b634f8d37f08881b3e Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 16 Sep 2024 18:55:20 -0500 Subject: [PATCH 12/19] improve organization and remove commented code --- augur/api/view/api.py | 30 -------------------------- augur/tasks/frontend.py | 47 ++++++++++++++++++++++------------------- 2 files changed, 25 insertions(+), 52 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index ae5a8c250..c260b65fa 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -64,58 +64,28 @@ def av_add_user_repo(): # matches https://github.com/{org}/ or htts://github.com/{org} if (org_name := Repo.parse_github_org_url(url)): - orgs.append(org_name) - # rg_obj = RepoGroup.get_by_name(session, org_name) - # if rg_obj: - # # add the orgs repos to the group - # add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) - # matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo} elif Repo.parse_github_repo_url(url)[0]: - repo_urls.append(url) - # org_name, repo_name = Repo.parse_github_repo_url(url) - # repo_git = f"https://github.com/{org_name}/{repo_name}" - # repo_obj = Repo.get_by_repo_git(session, repo_git) - # if repo_obj: - # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) - # matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo} elif (match := parse_org_and_repo_name(url)): org, repo = match.groups() repo_git = f"https://github.com/{org}/{repo}" - repo_urls.append(repo_git) - - # repo_obj = Repo.get_by_repo_git(session, repo_git) - # if repo_obj: - # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) # matches /{org}/ or /{org} or {org}/ or {org} elif (match := parse_org_name(url)): org_name = match.group(1) - orgs.append(org_name) - # rg_obj = RepoGroup.get_by_name(session, org_name) - # logger.info(rg_obj) - # if rg_obj: - # # add the orgs repos to the group - # add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) - # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} elif Repo.parse_gitlab_repo_url(url)[0]: org_name, repo_name = Repo.parse_github_repo_url(url) repo_git = f"https://gitlab.com/{org_name}/{repo_name}" - - # TODO: gitlab ensure the whole repo git is inserted so it can be found here - # repo_obj = Repo.get_by_repo_git(session, repo_git) - # if repo_obj: - # add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) else: invalid_urls.append(url) diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index e48e24d62..cac4d9fe7 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -45,33 +45,40 @@ def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): repo_group_id = frontend_repo_group.repo_group_id - # define repo_data and assoicate repos with frontend repo group repo_data = [(url, repo_group_id) for url in repo_urls] - for org in orgs: - - # create repo group for org if it doesn't exist - repo_group = RepoGroup.get_by_name(session, org) - if not repo_group: - repo_group = create_repo_group(session, org) - - # retrieve repo urls for org - org_repos, _ = retrieve_owner_repos(session, org) - if not org_repos: - continue - - # define urls and repo_group_id of org and then add to repo_data - org_repo_data = [(url, repo_group.repo_group_id) for url in org_repos] - repo_data.extend(org_repo_data) + # get org repos and associate them with their org repo group + org_repo_data = get_org_repo_data(orgs, session) + repo_data.extend(org_repo_data) # break list of repos into lists of 100 so that graphql query isn't overwhelmed for chunk in divide_list_into_chunks(repo_data, 100): add_new_repos(chunk, group_id, session, logger) - return +def get_org_repo_data(orgs, session): + + repo_data = [] + for org in orgs: + + # create repo group for org if it doesn't exist + repo_group = RepoGroup.get_by_name(session, org) + if not repo_group: + repo_group = create_repo_group(session, org) + + # retrieve repo urls for org + org_repos, _ = retrieve_owner_repos(session, org) + if not org_repos: + continue + + # define urls and repo_group_id of org and then add to repo_data + org_repo_data = [(url, repo_group.repo_group_id) for url in org_repos] + repo_data.extend(org_repo_data) + + return repo_data + def add_new_repos(repo_data, group_id, session, logger): @@ -104,12 +111,8 @@ def add_new_repos(repo_data, group_id, session, logger): def add_existing_repo_to_group(logger, session, group_id, repo_id): - - logger.info("Adding existing repo to group") - result = UserRepo.insert(session, repo_id, group_id) - if not result: - return False + UserRepo.insert(session, repo_id, group_id) def divide_list_into_chunks(data, size): From 18a84d1f102688ba9f42647af8ba93142750ca0a Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 16 Sep 2024 18:56:25 -0500 Subject: [PATCH 13/19] rename task to be github specific --- augur/api/view/api.py | 4 ++-- augur/tasks/frontend.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index c260b65fa..d1f4ebb80 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -2,7 +2,7 @@ import re from flask_login import current_user, login_required from augur.application.db.models import Repo, RepoGroup, UserGroup, UserRepo -from augur.tasks.frontend import add_orgs_and_repos, parse_org_and_repo_name, parse_org_name +from augur.tasks.frontend import add_github_orgs_and_repos, parse_org_and_repo_name, parse_org_name from .utils import * from ..server import app from augur.application.db.session import DatabaseSession @@ -97,7 +97,7 @@ def av_add_user_repo(): orgs = [url.lower() for url in orgs] flash(f"Adding repos: {repo_urls}") flash(f"Adding orgs: {orgs}") - add_orgs_and_repos.si(current_user.user_id, group, orgs, repo_urls).apply_async() + add_github_orgs_and_repos.si(current_user.user_id, group, orgs, repo_urls).apply_async() flash("Adding repos and orgs in the background") diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index cac4d9fe7..614f4013e 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -23,9 +23,9 @@ def parse_org_and_repo_name(string): # TODO: Add support for gitlab @celery.task -def add_orgs_and_repos(user_id, group_name, orgs, repo_urls): +def add_github_orgs_and_repos(user_id, group_name, orgs, repo_urls): - logger = logging.getLogger(add_orgs_and_repos.__name__) + logger = logging.getLogger(add_github_orgs_and_repos.__name__) with GithubTaskSession(logger) as session: From aebe6abf46c2c7dd4c4c3d7933410364f75e68e3 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 16 Sep 2024 18:58:42 -0500 Subject: [PATCH 14/19] rename to be github specific and add template for gitlab task --- augur/tasks/frontend.py | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index 614f4013e..db04ecb8c 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -55,7 +55,40 @@ def add_github_orgs_and_repos(user_id, group_name, orgs, repo_urls): # break list of repos into lists of 100 so that graphql query isn't overwhelmed for chunk in divide_list_into_chunks(repo_data, 100): - add_new_repos(chunk, group_id, session, logger) + add_new_github_repos(chunk, group_id, session, logger) + + +# TODO: Add support for gitlab +@celery.task +def add_gitlab_repos(user_id, group_name, repo_urls): + + logger = logging.getLogger(add_github_orgs_and_repos.__name__) + + with GithubTaskSession(logger) as session: + + # determine group id from name + group = get_group_by_name(user_id, group_name) + if not group: + logger.error(f"Error while adding repo. Invalid group name of {group_name}. Cannot insert repos") + return + + group_id = group.group_id + + # get frontend repo group + frontend_repo_group = RepoGroup.get_by_name(session, FRONTEND_REPO_GROUP_NAME) + if not frontend_repo_group: + logger.error("Error while adding repo: Could not find frontend repo group so repos cannot be inserted") + return + + repo_group_id = frontend_repo_group.repo_group_id + + # define repo_data and assoicate repos with frontend repo group + repo_data = [(url, repo_group_id) for url in repo_urls] + + # break list of repos into lists of 100 so that graphql query isn't overwhelmed + for chunk in divide_list_into_chunks(repo_data, 100): + + add_new_github_repos(chunk, group_id, session, logger) def get_org_repo_data(orgs, session): @@ -80,10 +113,10 @@ def get_org_repo_data(orgs, session): return repo_data -def add_new_repos(repo_data, group_id, session, logger): +def add_new_github_repos(repo_data, group_id, session, logger): # get data for repos to determine type, src id, and if they exist - data = get_repos_data(repo_data, session, logger) + data = get_github_repos_data(repo_data, session, logger) for url, repo_group_id in repo_data: @@ -122,7 +155,7 @@ def divide_list_into_chunks(data, size): # TODO: Make it only get like 100 at a time -def get_repos_data(repo_data, session, logger): +def get_github_repos_data(repo_data, session, logger): repo_urls = [x[0] for x in repo_data] From d39cca5a92dbed9c35af89cf66a52d2d3f1971b3 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 17 Sep 2024 17:38:15 -0500 Subject: [PATCH 15/19] add support for gitlab repo --- augur/api/view/api.py | 7 ++++++- augur/tasks/frontend.py | 34 ++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index d1f4ebb80..5b0a7ebdc 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -2,7 +2,7 @@ import re from flask_login import current_user, login_required from augur.application.db.models import Repo, RepoGroup, UserGroup, UserRepo -from augur.tasks.frontend import add_github_orgs_and_repos, parse_org_and_repo_name, parse_org_name +from augur.tasks.frontend import add_github_orgs_and_repos, parse_org_and_repo_name, parse_org_name, add_gitlab_repos from .utils import * from ..server import app from augur.application.db.session import DatabaseSession @@ -60,6 +60,7 @@ def av_add_user_repo(): orgs = [] repo_urls = [] + gitlab_repo_urls = [] for url in urls: # matches https://github.com/{org}/ or htts://github.com/{org} @@ -87,6 +88,7 @@ def av_add_user_repo(): org_name, repo_name = Repo.parse_github_repo_url(url) repo_git = f"https://gitlab.com/{org_name}/{repo_name}" + gitlab_repo_urls.append(repo_git) else: invalid_urls.append(url) @@ -99,6 +101,9 @@ def av_add_user_repo(): flash(f"Adding orgs: {orgs}") add_github_orgs_and_repos.si(current_user.user_id, group, orgs, repo_urls).apply_async() + if gitlab_repo_urls: + add_gitlab_repos(current_user.user_id, group, gitlab_repo_urls) + flash("Adding repos and orgs in the background") return redirect(url_for("user_settings") + "?section=tracker") diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index db04ecb8c..e1543c2fb 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -82,15 +82,33 @@ def add_gitlab_repos(user_id, group_name, repo_urls): repo_group_id = frontend_repo_group.repo_group_id - # define repo_data and assoicate repos with frontend repo group - repo_data = [(url, repo_group_id) for url in repo_urls] + for url in repo_urls: - # break list of repos into lists of 100 so that graphql query isn't overwhelmed - for chunk in divide_list_into_chunks(repo_data, 100): + result = Repo.is_valid_gitlab_repo(session, url) + if not result[0]: + continue + + # TODO: Add logic to get gitlab src id + + repo = get_repo_by_repo_git(session, url) + if repo: + # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record + add_existing_repo_to_group(logger, session, group_id, repo.repo_id) + + add_gitlab_repo(session, url, repo_group_id, group_id) + + +def add_gitlab_repo(session, url, repo_group_id, group_id): + + repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend") + if not repo_id: + return False, {"status": "Repo insertion failed", "repo_url": url} + + result = UserRepo.insert(session, repo_id, group_id) + if not result: + return False, {"status": "repo_user insertion failed", "repo_url": url} - add_new_github_repos(chunk, group_id, session, logger) - def get_org_repo_data(orgs, session): repo_data = [] @@ -140,7 +158,7 @@ def add_new_github_repos(repo_data, group_id, session, logger): add_existing_repo_to_group(logger, session, group_id, repo.repo_id) continue - add_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id) + add_github_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id) def add_existing_repo_to_group(logger, session, group_id, repo_id): @@ -196,7 +214,7 @@ def create_repo_group(session, owner): return repo_group -def add_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id): +def add_github_repo(logger, session, url, repo_group_id, group_id, repo_type, repo_src_id): # These two things really need to be done in one commit in the future to prevent one existing without the other repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type, repo_src_id) From 449917f22110e7d19d82affba88705828defc6d2 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 28 Sep 2024 09:28:10 -0500 Subject: [PATCH 16/19] improve logic --- augur/tasks/frontend.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index e1543c2fb..df7808285 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -130,7 +130,7 @@ def get_org_repo_data(orgs, session): return repo_data - +# TODO: Do we need to check if the repo already exists in the user group? def add_new_github_repos(repo_data, group_id, session, logger): # get data for repos to determine type, src id, and if they exist @@ -186,18 +186,14 @@ def get_github_repos_data(repo_data, session, logger): query_parts.append(f"""repo_{i}: repository(owner: "{owner}", name: "{repo}") {{ databaseId, owner {{ __typename }} }}""") - repo_map[url] = f"repo_{i}" query = f"query GetRepoIds {{ {' '.join(query_parts)}}}" data = github_graphql_data_access.get_resource(query, {}, []) result_data = {} - for url in repo_urls: - key =repo_map[url] - repo_data = data[key] - - result_data[url] = repo_data + for i, url in enumerate(repo_urls): + result_data[url] = data[f"repo_{i}"] return result_data From 08a92a164f8095586fbb42b02eaa2d7edbaa6863 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 8 Oct 2024 12:42:09 -0500 Subject: [PATCH 17/19] add logic to support gitlab repos --- augur/application/db/models/augur_data.py | 5 +- augur/tasks/frontend.py | 75 ++++++++++++++++++++--- 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index baf01b5ac..a645ea1ba 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -1065,7 +1065,7 @@ def parse_github_org_url(url): return result.groups()[0] @staticmethod - def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source): + def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source, repo_src_id): """Add a repo to the repo table. Args: @@ -1099,7 +1099,8 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source): "repo_type": None, "tool_source": tool_source, "tool_version": "1.0", - "data_source": "Git" + "data_source": "Git", + "repo_src_id": repo_src_id } repo_unique = ["repo_git"] diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index df7808285..cd38d0acc 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -1,15 +1,18 @@ import logging import re import sqlalchemy as s +import urllib.parse +from time import sleep from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess -from augur.application.db.lib import get_group_by_name, get_repo_group_by_name, get_repo_by_repo_git, get_repo_by_src_id +from augur.application.db.lib import get_group_by_name, get_repo_by_repo_git, get_repo_by_src_id from augur.tasks.github.util.util import get_owner_repo -from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup, UserGroup +from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup +from augur.tasks.github.util.github_paginator import hit_api -from augur.application.db.models import UserRepo, Repo, User +from augur.application.db.models import UserRepo, Repo def parse_org_name(string): @@ -84,18 +87,29 @@ def add_gitlab_repos(user_id, group_name, repo_urls): for url in repo_urls: - result = Repo.is_valid_gitlab_repo(session, url) - if not result[0]: + result = get_gitlab_repo_data(session, url, logger) + if not result: continue - # TODO: Add logic to get gitlab src id + if "id" not in result: + logger.error(f"Gitlab repo data returned without id. Url: {url}. Data: {result}") + continue + + repo_src_id = result["id"] + + repo = get_repo_by_src_id(repo_src_id) + if repo: + # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record + add_existing_repo_to_group(logger, session, group_id, repo.repo_id) + continue repo = get_repo_by_repo_git(session, url) if repo: # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record add_existing_repo_to_group(logger, session, group_id, repo.repo_id) + continue - add_gitlab_repo(session, url, repo_group_id, group_id) + add_github_repo(logger, session, url, repo_group_id, group_id, repo_src_id) def add_gitlab_repo(session, url, repo_group_id, group_id): @@ -222,6 +236,53 @@ def add_github_repo(logger, session, url, repo_group_id, group_id, repo_type, re if not result: logger.error(f"Error while adding repo: Failed to insert user repo record. A record with a repo_id of {repo_id} and a group id of {group_id} needs to be added to the user repo table so that this repo shows up in the users group") return + + +def get_gitlab_repo_data(gl_session, url: str, logger) -> bool: + + REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/" + + owner, repo = Repo.parse_gitlab_repo_url(url) + if not owner or not repo: + logger.error(f"Tried to get gitlab repo data for invalid url: {url}") + return None + + # Encode namespace and project name for the API request + project_identifier = urllib.parse.quote(f"{owner}/{repo}", safe='') + url = REPO_ENDPOINT.format(project_identifier) + + attempts = 0 + while attempts < 10: + response = hit_api(gl_session.oauths, url, logger) + + if wait_in_seconds := response.headers.get("Retry-After") is not None: + sleep(int(wait_in_seconds)) + + if response.status_code == 404: + return None + + if response.status_code == 200: + return response.json() + + attempts += 1 + sleep(attempts*3) + + logger.error(f"Failed to get gitlab repo data after multiple attemps. Url: {url}") + + return None + +def add_gitlab_repo(logger, session, url, repo_group_id, group_id, repo_src_id): + + # These two things really need to be done in one commit in the future to prevent one existing without the other + repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend", repo_src_id) + if not repo_id: + logger.error("Error while adding repo: Failed to insert github repo") + return + + result = UserRepo.insert(session, repo_id, group_id) + if not result: + logger.error(f"Error while adding repo: Failed to insert user repo record. A record with a repo_id of {repo_id} and a group id of {group_id} needs to be added to the user repo table so that this repo shows up in the users group") + return # @celery.task # def add_org_repo_list(user_id, group_name, urls): From 6940a262ef5d2a168b05ea337b0ea5a68fb0214e Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 8 Oct 2024 12:49:33 -0500 Subject: [PATCH 18/19] update src id search to only match on relevant repos --- augur/application/db/lib.py | 14 ++++++++++++-- augur/tasks/frontend.py | 6 +++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 9de1243e6..4d10b9011 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -144,15 +144,25 @@ def get_repo_by_repo_id(repo_id): return repo -def get_repo_by_src_id(src_id): +def get_github_repo_by_src_id(src_id): with get_session() as session: - query = session.query(Repo).filter(Repo.repo_src_id == src_id) + query = session.query(Repo).filter(Repo.repo_src_id == src_id, Repo.repo_git.ilike(f'%https://github.com%')) repo = execute_session_query(query, 'first') return repo +def get_gitlab_repo_by_src_id(src_id): + + with get_session() as session: + + query = session.query(Repo).filter(Repo.repo_src_id == src_id, Repo.repo_git.ilike(f'%https://gitlab.com%')) + repo = execute_session_query(query, 'first') + + return repo + + def remove_working_commits_by_repo_id_and_hashes(repo_id, commit_hashes): remove_working_commits = s.sql.text("""DELETE FROM working_commits diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index cd38d0acc..da4c1cd9d 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -7,7 +7,7 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess -from augur.application.db.lib import get_group_by_name, get_repo_by_repo_git, get_repo_by_src_id +from augur.application.db.lib import get_group_by_name, get_repo_by_repo_git, get_github_repo_by_src_id, get_gitlab_repo_by_src_id from augur.tasks.github.util.util import get_owner_repo from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup from augur.tasks.github.util.github_paginator import hit_api @@ -97,7 +97,7 @@ def add_gitlab_repos(user_id, group_name, repo_urls): repo_src_id = result["id"] - repo = get_repo_by_src_id(repo_src_id) + repo = get_gitlab_repo_by_src_id(repo_src_id) if repo: # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record add_existing_repo_to_group(logger, session, group_id, repo.repo_id) @@ -160,7 +160,7 @@ def add_new_github_repos(repo_data, group_id, session, logger): repo_src_id = repo_data["databaseId"] repo_type = repo_data["owner"]["__typename"] - repo = get_repo_by_src_id(repo_src_id) + repo = get_github_repo_by_src_id(repo_src_id) if repo: # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record add_existing_repo_to_group(logger, session, group_id, repo.repo_id) From 8786f31efec978d88752e6aeba774f8614d4942f Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 15 Oct 2024 17:53:46 -0500 Subject: [PATCH 19/19] remove TODOs --- augur/tasks/frontend.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index da4c1cd9d..f526d9041 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -24,7 +24,6 @@ def parse_org_and_repo_name(string): match = re.match(r'^\/?([a-zA-Z0-9_-]+)\/([a-zA-Z0-9_-]+)\/?$', string) return match -# TODO: Add support for gitlab @celery.task def add_github_orgs_and_repos(user_id, group_name, orgs, repo_urls): @@ -61,7 +60,6 @@ def add_github_orgs_and_repos(user_id, group_name, orgs, repo_urls): add_new_github_repos(chunk, group_id, session, logger) -# TODO: Add support for gitlab @celery.task def add_gitlab_repos(user_id, group_name, repo_urls): @@ -109,7 +107,7 @@ def add_gitlab_repos(user_id, group_name, repo_urls): add_existing_repo_to_group(logger, session, group_id, repo.repo_id) continue - add_github_repo(logger, session, url, repo_group_id, group_id, repo_src_id) + add_gitlab_repo(logger, session, url, repo_group_id, group_id, repo_src_id) def add_gitlab_repo(session, url, repo_group_id, group_id):