Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize repo insertion and add repo src id #2929

Merged
merged 20 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 47 additions & 64 deletions augur/api/view/api.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from flask import request, jsonify, redirect, url_for, flash, current_app

Check warning on line 1 in augur/api/view/api.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0611: Unused current_app imported from flask (unused-import) Raw Output: augur/api/view/api.py:1:0: W0611: Unused current_app imported from flask (unused-import)
import re
from flask_login import current_user, login_required
from augur.application.db.models import Repo, RepoGroup, UserGroup, UserRepo

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused RepoGroup imported from augur.application.db.models (unused-import)

from augur.tasks.frontend import add_org_repo_list, parse_org_and_repo_name, parse_org_name
from augur.tasks.frontend import add_github_orgs_and_repos, parse_org_and_repo_name, parse_org_name, add_gitlab_repos
from .utils import *
from ..server import app
from augur.application.db.session import DatabaseSession

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused DatabaseSession imported from augur.application.db.session (unused-import)

Expand All @@ -14,33 +14,21 @@
return redirect(url_for('static', filename="cache"))
return redirect(url_for('static', filename="cache/" + toCacheFilename(file, False)))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
E0602: Undefined variable 'toCacheFilename' (undefined-variable)



def add_existing_repo_to_group(session, user_id, group_name, repo_id):

logger.info("Adding existing repo to group")

group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name)
if group_id is None:
return False

result = UserRepo.insert(session, repo_id, group_id)
if not result:
return False

def add_existing_org_to_group(session, user_id, group_name, rg_id):

logger.info("Adding existing org to group")

Check warning on line 20 in augur/api/view/api.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E0602: Undefined variable 'logger' (undefined-variable) Raw Output: augur/api/view/api.py:20:4: E0602: Undefined variable 'logger' (undefined-variable)

group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name)
if group_id is None:
return False

repos = session.query(Repo).filter(Repo.repo_group_id == rg_id).all()
logger.info("Length of repos in org: " + str(len(repos)))

Check warning on line 27 in augur/api/view/api.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E0602: Undefined variable 'logger' (undefined-variable) Raw Output: augur/api/view/api.py:27:4: E0602: Undefined variable 'logger' (undefined-variable)
for repo in repos:
result = UserRepo.insert(session, repo.repo_id, group_id)
if not result:
logger.info("Failed to add repo to group")

Check warning on line 31 in augur/api/view/api.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E0602: Undefined variable 'logger' (undefined-variable) Raw Output: augur/api/view/api.py:31:12: E0602: Undefined variable 'logger' (undefined-variable)



Expand All @@ -48,6 +36,8 @@
@login_required
def av_add_user_repo():

print("Adding user repos")

urls = request.form.get('urls')
group = request.form.get("group_name")

Expand All @@ -68,58 +58,51 @@

invalid_urls = []

with DatabaseSession(logger, current_app.engine) as session:
for url in urls:

# matches https://github.com/{org}/ or htts://github.com/{org}
if (org_name := Repo.parse_github_org_url(url)):
rg_obj = RepoGroup.get_by_name(session, org_name)
if rg_obj:
# add the orgs repos to the group
add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id)

# matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo}
elif Repo.parse_github_repo_url(url)[0]:
org_name, repo_name = Repo.parse_github_repo_url(url)
repo_git = f"https://github.com/{org_name}/{repo_name}"
repo_obj = Repo.get_by_repo_git(session, repo_git)
if repo_obj:
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)

# matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo}
elif (match := parse_org_and_repo_name(url)):
org, repo = match.groups()
repo_git = f"https://github.com/{org}/{repo}"
repo_obj = Repo.get_by_repo_git(session, repo_git)
if repo_obj:
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)
orgs = []
repo_urls = []
gitlab_repo_urls = []
for url in urls:

# matches https://github.com/{org}/ or htts://github.com/{org}
if (org_name := Repo.parse_github_org_url(url)):
orgs.append(org_name)

# matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo}
elif Repo.parse_github_repo_url(url)[0]:
repo_urls.append(url)

# matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo}
elif (match := parse_org_and_repo_name(url)):
org, repo = match.groups()
repo_git = f"https://github.com/{org}/{repo}"
repo_urls.append(repo_git)

# matches /{org}/ or /{org} or {org}/ or {org}
elif (match := parse_org_name(url)):
org_name = match.group(1)
orgs.append(org_name)

# matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
elif Repo.parse_gitlab_repo_url(url)[0]:

org_name, repo_name = Repo.parse_gitlab_repo_url(url)
repo_git = f"https://gitlab.com/{org_name}/{repo_name}"

# matches /{org}/ or /{org} or {org}/ or {org}
elif (match := parse_org_name(url)):
org_name = match.group(1)
rg_obj = RepoGroup.get_by_name(session, org_name)
logger.info(rg_obj)
if rg_obj:
# add the orgs repos to the group
add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id)

# matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
elif Repo.parse_gitlab_repo_url(url)[0]:

org_name, repo_name = Repo.parse_gitlab_repo_url(url)
repo_git = f"https://gitlab.com/{org_name}/{repo_name}"

# TODO: gitlab ensure the whole repo git is inserted so it can be found here
repo_obj = Repo.get_by_repo_git(session, repo_git)
if repo_obj:
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)

else:
invalid_urls.append(url)

if urls:
urls = [url.lower() for url in urls]
add_org_repo_list.si(current_user.user_id, group, urls).apply_async()
gitlab_repo_urls.append(repo_git)
else:
invalid_urls.append(url)



if orgs or repo_urls:
repo_urls = [url.lower() for url in repo_urls]
orgs = [url.lower() for url in orgs]
flash(f"Adding repos: {repo_urls}")
flash(f"Adding orgs: {orgs}")
add_github_orgs_and_repos.si(current_user.user_id, group, orgs, repo_urls).apply_async()

if gitlab_repo_urls:
add_gitlab_repos(current_user.user_id, group, gitlab_repo_urls)

flash("Adding repos and orgs in the background")

Expand Down Expand Up @@ -226,5 +209,5 @@
"""
@app.route('/requests/report/wait/<id>')
def wait_for_report_request(id):
requestReports(id)

Check warning on line 212 in augur/api/view/api.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E0602: Undefined variable 'requestReports' (undefined-variable) Raw Output: augur/api/view/api.py:212:4: E0602: Undefined variable 'requestReports' (undefined-variable)
return jsonify(report_requests[id])

Check warning on line 213 in augur/api/view/api.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E0602: Undefined variable 'report_requests' (undefined-variable) Raw Output: augur/api/view/api.py:213:19: E0602: Undefined variable 'report_requests' (undefined-variable)
42 changes: 41 additions & 1 deletion augur/application/db/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from psycopg2.errors import DeadlockDetected
from typing import List, Any, Optional, Union

from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus
from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus, UserGroup, RepoGroup
from augur.tasks.util.collection_state import CollectionState
from augur.application.db import get_session, get_engine
from augur.application.db.util import execute_session_query
Expand All @@ -17,7 +17,7 @@

logger = logging.getLogger("db_lib")

def convert_type_of_value(config_dict, logger=None):

Check warning on line 20 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name) Raw Output: augur/application/db/lib.py:20:39: W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name)


data_type = config_dict["type"]
Expand Down Expand Up @@ -144,6 +144,25 @@

return repo

def get_github_repo_by_src_id(src_id):

with get_session() as session:

query = session.query(Repo).filter(Repo.repo_src_id == src_id, Repo.repo_git.ilike(f'%https://github.com%'))
repo = execute_session_query(query, 'first')

return repo

def get_gitlab_repo_by_src_id(src_id):

with get_session() as session:

query = session.query(Repo).filter(Repo.repo_src_id == src_id, Repo.repo_git.ilike(f'%https://gitlab.com%'))
repo = execute_session_query(query, 'first')

return repo


def remove_working_commits_by_repo_id_and_hashes(repo_id, commit_hashes):

remove_working_commits = s.sql.text("""DELETE FROM working_commits
Expand Down Expand Up @@ -177,7 +196,7 @@

try:
working_commits = fetchall_data_from_sql_text(query)
except:

Check warning on line 199 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0702: No exception type(s) specified (bare-except) Raw Output: augur/application/db/lib.py:199:4: W0702: No exception type(s) specified (bare-except)
working_commits = []

return working_commits
Expand All @@ -193,7 +212,7 @@

try:
missing_commit_hashes = fetchall_data_from_sql_text(fetch_missing_hashes_sql)
except:

Check warning on line 215 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0702: No exception type(s) specified (bare-except) Raw Output: augur/application/db/lib.py:215:4: W0702: No exception type(s) specified (bare-except)
missing_commit_hashes = []

return missing_commit_hashes
Expand All @@ -213,7 +232,7 @@
return session.query(CollectionStatus).filter(getattr(CollectionStatus,f"{collection_type}_status" ) == CollectionState.COLLECTING.value).count()


def facade_bulk_insert_commits(logger, records):

Check warning on line 235 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name) Raw Output: augur/application/db/lib.py:235:31: W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name)

with get_session() as session:

Expand Down Expand Up @@ -553,3 +572,24 @@
with get_session() as session:
return session.query(Issue).filter(Issue.repo_id == repo_id, Issue.updated_at >= since).order_by(Issue.gh_issue_number).all()



def get_group_by_name(user_id, group_name):


with get_session() as session:

try:
user_group = session.query(UserGroup).filter(UserGroup.user_id == user_id, UserGroup.name == group_name).one()
except s.orm.exc.NoResultFound:
return None

return user_group

def get_repo_group_by_name(name):


with get_session() as session:

return session.query(RepoGroup).filter(RepoGroup.rg_name == name).first()

11 changes: 7 additions & 4 deletions augur/application/db/models/augur_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,7 @@ class Repo(Base):
data_collection_date = Column(
TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP")
)
repo_src_id = Column(BigInteger)

repo_group = relationship("RepoGroup", back_populates="repo")
user_repo = relationship("UserRepo", back_populates="repo")
Expand Down Expand Up @@ -1064,7 +1065,7 @@ def parse_github_org_url(url):
return result.groups()[0]

@staticmethod
def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source, repo_src_id):
"""Add a repo to the repo table.

Args:
Expand Down Expand Up @@ -1098,7 +1099,8 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
"repo_type": None,
"tool_source": tool_source,
"tool_version": "1.0",
"data_source": "Git"
"data_source": "Git",
"repo_src_id": repo_src_id
}

repo_unique = ["repo_git"]
Expand All @@ -1111,7 +1113,7 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
return result[0]["repo_id"]

@staticmethod
def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type):
def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type, repo_src_id):
"""Add a repo to the repo table.

Args:
Expand Down Expand Up @@ -1146,7 +1148,8 @@ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_
"repo_type": repo_type,
"tool_source": tool_source,
"tool_version": "1.0",
"data_source": "Git"
"data_source": "Git",
"repo_src_id": repo_src_id
}

repo_unique = ["repo_git"]
Expand Down
25 changes: 25 additions & 0 deletions augur/application/schema/alembic/versions/30_add_repo_src_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Add repo src id

Revision ID: 30
Revises: 29
Create Date: 2024-08-30

"""
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = '30'
down_revision = '29'
branch_labels = None
depends_on = None


def upgrade():
op.add_column('repo', sa.Column('repo_src_id', sa.BigInteger(), nullable=True), schema='augur_data')
op.create_unique_constraint('repo_src_id_unique', 'repo', ['repo_src_id'], schema='augur_data')


def downgrade():
op.drop_constraint('repo_src_id_unique', 'repo', schema='augur_data', type_='unique')
op.drop_column('repo', 'repo_src_id', schema='augur_data')
Loading
Loading