diff --git a/.docker-setup.sh b/.docker-setup.sh
index d9c9e7f8c..dfdc797a3 100755
--- a/.docker-setup.sh
+++ b/.docker-setup.sh
@@ -24,7 +24,6 @@ missingModules=""
#Check everything that needs to be in the $PATH is in there.
#Bash doesn't let this work if this is in an if statement for some reason it has to be chained
type -P "docker" &>/dev/null && echo "docker found..." || missingModules="${missingModules} docker"
-type -P "docker-compose" &>/dev/null && echo "docker-compose found..." || missingModules="${missingModules} docker-compose"
type -P "ifconfig" &>/dev/null && echo "ifconfig found..." || missingModules="${missingModules} ifconfig (part of net-tools)"
type -P "psql" &>/dev/null && echo "psql found..." || missingModules="${missingModules} psql"
type -P "watch" &>/dev/null && echo "watch found..." || missingModules="${missingModules} watch"
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
new file mode 100644
index 000000000..c23bfd7bb
--- /dev/null
+++ b/.github/workflows/checks.yml
@@ -0,0 +1,31 @@
+name: "run-linting-checks"
+on:
+ pull_request:
+ branches: [main, dev]
+
+jobs:
+ run-pylint:
+ name: runner / pylint
+ permissions: write-all
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: dciborow/action-pylint@0.1.0
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ reporter: github-pr-review
+ level: warning
+ glob_pattern: "**/*.py"
+ filter_mode: "file"
+
+ misspell:
+ name: runner / misspell
+ runs-on: ubuntu-latest
+ steps:
+ - name: Highlight any misspellings in changes.
+ uses: actions/checkout@v4
+ - name: misspell
+ uses: reviewdog/action-misspell@v1
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ locale: "US"
\ No newline at end of file
diff --git a/.pylintrc b/.pylintrc
index 0b1b7d204..0056af873 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -12,7 +12,7 @@
#refactoring checker
#enable=R
-disable=E0611,E1101,W1203,R0801,W0614,W0611,C0411,C0103,C0301,C0303,C0304,C0305,W0311
+disable=E0611,E1101,W1203,R0801,W0614,W0611,C0411,C0103,C0301,C0303,C0304,C0305,W0311,E0401
# Analyse import fallback blocks. This can be used to support both Python 2 and
diff --git a/Makefile b/Makefile
index 26cac178b..22364ac16 100644
--- a/Makefile
+++ b/Makefile
@@ -139,16 +139,16 @@ docs-view: docs
compose-run:
- @ docker-compose -f docker-compose.yml up --build
+ @ docker compose -f docker-compose.yml up --build
compose-run-database:
@ echo "**************************************************************************"
@ echo "Make sure there are no database credentials in docker_env.txt!"
@ echo "**************************************************************************"
@ echo
- @ docker-compose -f docker-compose.yml -f database-compose.yml up --build
+ @ docker compose -f docker-compose.yml -f database-compose.yml up --build
-docker-build: docker-build-backend docker-build-frontend docker-build-database
+docker-build: docker-build-backend docker-build-frontend docker-build-database docker-build-rabbitmq
docker-build-backend:
@ docker build -t augurlabs/augur:backend -f util/docker/backend/Dockerfile .
@@ -159,6 +159,8 @@ docker-build-frontend:
docker-build-database:
@ docker build -t augurlabs/augur:database -f util/docker/database/Dockerfile .
+docker-build-rabbitmq:
+ @ docker build -t augurlabs/augur:rabbitmq -f util/docker/rabbitmq/Dockerfile .
docker-run-backend:
@ - docker stop augur_backend
@@ -174,3 +176,8 @@ docker-run-database:
@ - docker stop augur_database
@ - docker rm augur_database
docker run -p 5434:5432 --name augur_database augurlabs/augur:database
+
+docker-run-rabbitmq:
+ @ - docker stop augur_rabbitmq
+ @ - docker rm augur_rabbitmq
+ docker run -p 5434:5432 --name augur_rabbitmq augurlabs/augur:rabbitmq
\ No newline at end of file
diff --git a/README.md b/README.md
index 9977fc81a..13fbe0dca 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,7 @@
-# Augur NEW Release v0.60.0
+# Augur NEW Release v0.62.4
+
+Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else!
+The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io
[](https://www.firsttimersonly.com/) We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy of tagging issues for first timers only, and walking one newcomer through the resolution process weekly. [You can find these issues tagged with "first timers only" on our issues list.](https://github.com/chaoss/augur/labels/first-timers-only).
@@ -7,7 +10,7 @@
## NEW RELEASE ALERT!
### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md)
-Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.60.0
+Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.62.4
- The `main` branch is a stable version of our new architecture, which features:
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.
- A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard
@@ -94,6 +97,7 @@ Contributors
- `Dawn Foster `_
- `Ivana Atanasova `_
- `Georg J.P. Link `_
+- `Gary P White `_
GSoC 2022 participants
-----------------------
diff --git a/augur/api/metrics/README.md b/augur/api/metrics/README.md
index cabcc4475..5990291bf 100644
--- a/augur/api/metrics/README.md
+++ b/augur/api/metrics/README.md
@@ -26,7 +26,8 @@ from augur.application.db.engine import engine
4. Define any queries with the structure show below
```py
repo_sql = s.sql.text(""" SELECT repo.repo_name FROM repo WHERE repo.repo_id = :repo_id """)
-results = pd.read_sql(repo_sql, engine, params={'repo_id': repo_id})
+with engine.connect() as conn:
+ results = pd.read_sql(repo_sql, conn, params={'repo_id': repo_id})
```
5. Return either a pandas dataframe, dict, or json.
- Note: If you return a pandas dataframe or dict it will be automatically converted into json
diff --git a/augur/api/metrics/commit.py b/augur/api/metrics/commit.py
index c143cd9f6..41d86abbf 100644
--- a/augur/api/metrics/commit.py
+++ b/augur/api/metrics/commit.py
@@ -90,8 +90,9 @@ def committers(repo_group_id, repo_id=None, begin_date=None, end_date=None, peri
"""
)
- results = pd.read_sql(committersSQL, engine, params={'repo_id': repo_id,
- 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date, 'period':period})
+ with engine.connect() as conn:
+ results = pd.read_sql(committersSQL, conn, params={'repo_id': repo_id,
+ 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date, 'period':period})
return results
@@ -167,8 +168,9 @@ def annual_commit_count_ranked_by_new_repo_in_repo_group(repo_group_id, repo_id=
ORDER BY YEAR ASC
""".format(table, period))
- results = pd.read_sql(cdRgNewrepRankedCommitsSQL, engine, params={'repo_id': repo_id,
- 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(cdRgNewrepRankedCommitsSQL, conn, params={'repo_id': repo_id,
+ 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -265,8 +267,9 @@ def annual_commit_count_ranked_by_repo_in_repo_group(repo_group_id, repo_id=None
LIMIT 10
""")
- results = pd.read_sql(cdRgTpRankedCommitsSQL, engine, params={ "repo_group_id": repo_group_id,
- "repo_id": repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(cdRgTpRankedCommitsSQL, conn, params={ "repo_group_id": repo_group_id,
+ "repo_id": repo_id})
return results
@register_metric()
@@ -296,8 +299,9 @@ def top_committers(repo_group_id, repo_id=None, year=None, threshold=0.8):
ORDER BY patches DESC) a
""")
- results = pd.read_sql(total_commits_SQL, engine,
- params={'year': year, 'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(total_commits_SQL, conn,
+ params={'year': year, 'repo_group_id': repo_group_id})
else:
total_commits_SQL = s.sql.text("""
SELECT SUM(patches)::int
@@ -308,8 +312,9 @@ def top_committers(repo_group_id, repo_id=None, year=None, threshold=0.8):
ORDER BY patches DESC) a
""")
- results = pd.read_sql(total_commits_SQL, engine,
- params={'year': year, 'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(total_commits_SQL, conn,
+ params={'year': year, 'repo_id': repo_id})
if not results.iloc[0]['sum']:
return pd.DataFrame()
@@ -334,8 +339,9 @@ def top_committers(repo_group_id, repo_id=None, year=None, threshold=0.8):
ORDER BY commits DESC
""")
- results = pd.read_sql(committers_SQL, engine,
- params={'year': year, 'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(committers_SQL, conn,
+ params={'year': year, 'repo_group_id': repo_group_id})
else:
committers_SQL = s.sql.text("""
SELECT
@@ -353,8 +359,9 @@ def top_committers(repo_group_id, repo_id=None, year=None, threshold=0.8):
ORDER BY commits DESC
""")
- results = pd.read_sql(committers_SQL, engine,
- params={'year': year, 'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(committers_SQL, conn,
+ params={'year': year, 'repo_id': repo_id})
cumsum = 0
for i, row in results.iterrows():
diff --git a/augur/api/metrics/contributor.py b/augur/api/metrics/contributor.py
index 7d255ecb4..3f25236d0 100644
--- a/augur/api/metrics/contributor.py
+++ b/augur/api/metrics/contributor.py
@@ -125,8 +125,9 @@ def contributors(repo_group_id, repo_id=None, period='day', begin_date=None, end
ORDER BY total DESC
""")
- results = pd.read_sql(contributorsSQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(contributorsSQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
else:
contributorsSQL = s.sql.text("""
SELECT id::text AS user_id,
@@ -211,8 +212,9 @@ def contributors(repo_group_id, repo_id=None, period='day', begin_date=None, end
ORDER BY total DESC
""")
- results = pd.read_sql(contributorsSQL, engine, params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(contributorsSQL, conn, params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -281,8 +283,9 @@ def contributors_new(repo_group_id, repo_id=None, period='day', begin_date=None,
GROUP BY date, repo.repo_id, repo_name
""")
- results = pd.read_sql(contributorsNewSQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(contributorsNewSQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
else:
contributorsNewSQL = s.sql.text("""
SELECT date_trunc(:period, b.created_at::DATE) AS date, COUNT(id) AS new_contributors, repo.repo_id, repo_name
@@ -330,8 +333,9 @@ def contributors_new(repo_group_id, repo_id=None, period='day', begin_date=None,
GROUP BY date, repo.repo_id, repo_name
""")
- results = pd.read_sql(contributorsNewSQL, engine, params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(contributorsNewSQL, conn, params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -351,7 +355,8 @@ def lines_changed_by_author(repo_group_id, repo_id=None):
GROUP BY commits.repo_id, date_trunc('week', cmt_author_date::date), cmt_author_affiliation, cmt_author_email, repo_name
ORDER BY date_trunc('week', cmt_author_date::date) ASC;
""")
- results = pd.read_sql(linesChangedByAuthorSQL, engine, params={"repo_id": repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(linesChangedByAuthorSQL, conn, params={"repo_id": repo_id})
return results
else:
linesChangedByAuthorSQL = s.sql.text("""
@@ -362,7 +367,8 @@ def lines_changed_by_author(repo_group_id, repo_id=None):
GROUP BY repo_id, date_trunc('week', cmt_author_date::date), cmt_author_affiliation, cmt_author_email
ORDER BY date_trunc('week', cmt_author_date::date) ASC;
""")
- results = pd.read_sql(linesChangedByAuthorSQL, engine, params={"repo_group_id": repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(linesChangedByAuthorSQL, conn, params={"repo_group_id": repo_group_id})
return results
@register_metric()
@@ -420,8 +426,9 @@ def contributors_code_development(repo_group_id, repo_id=None, period='all', beg
GROUP BY a.email, a.repo_id, repo_name
""")
- results = pd.read_sql(contributorsSQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(contributorsSQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
else:
contributorsSQL = s.sql.text("""
SELECT
@@ -455,6 +462,7 @@ def contributors_code_development(repo_group_id, repo_id=None, period='all', beg
ORDER BY commits desc, email
""")
- results = pd.read_sql(contributorsSQL, engine, params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(contributorsSQL, conn, params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
diff --git a/augur/api/metrics/deps.py b/augur/api/metrics/deps.py
index deb5ac89f..d92371d89 100644
--- a/augur/api/metrics/deps.py
+++ b/augur/api/metrics/deps.py
@@ -6,6 +6,7 @@
import sqlalchemy as s
import pandas as pd
from augur.api.util import register_metric
+import datetime
from ..server import engine
@@ -45,7 +46,8 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No
AND repo_dependencies.repo_id = :repo_id
""")
- results = pd.read_sql(depsSQL, engine)
+ with engine.connect() as conn:
+ results = pd.read_sql(depsSQL, conn)
else:
@@ -69,7 +71,8 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No
AND repo.repo_group_id = :repo_group_id
""")
- results = pd.read_sql(depsSQL, engine)
+ with engine.connect() as conn:
+ results = pd.read_sql(depsSQL, conn)
return results
diff --git a/augur/api/metrics/insight.py b/augur/api/metrics/insight.py
index 874f656f7..848161e1a 100644
--- a/augur/api/metrics/insight.py
+++ b/augur/api/metrics/insight.py
@@ -29,5 +29,6 @@ def top_insights(repo_group_id, num_repos=6):
LIMIT :num_repos
)
""")
- results = pd.read_sql(topInsightsSQL, engine, params={'repo_group_id': repo_group_id, 'num_repos': num_repos})
+ with engine.connect() as conn:
+ results = pd.read_sql(topInsightsSQL, conn, params={'repo_group_id': repo_group_id, 'num_repos': num_repos})
return results
diff --git a/augur/api/metrics/issue.py b/augur/api/metrics/issue.py
index 72108bc20..22ee2630b 100644
--- a/augur/api/metrics/issue.py
+++ b/augur/api/metrics/issue.py
@@ -50,8 +50,10 @@ def issues_first_time_opened(repo_group_id, repo_id=None, period='day', begin_da
GROUP BY issue_date, repo_name
ORDER BY issue_date
""")
- results = pd.read_sql(issueNewContributor, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+
+ with engine.connect() as conn:
+ results = pd.read_sql(issueNewContributor, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
else:
issueNewContributor = s.sql.text("""
SELECT
@@ -76,9 +78,10 @@ def issues_first_time_opened(repo_group_id, repo_id=None, period='day', begin_da
GROUP BY repo.repo_id, issue_date
ORDER BY issue_date
""")
- results = pd.read_sql(issueNewContributor, engine,
- params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issueNewContributor, conn,
+ params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -119,8 +122,9 @@ def issues_first_time_closed(repo_group_id, repo_id=None, period='day', begin_da
) AS iss_close
GROUP BY issue_date, repo_name
""")
- results = pd.read_sql(issuesClosedSQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issuesClosedSQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
else:
issuesClosedSQL = s.sql.text("""
SELECT date_trunc(:period, new_date::DATE) AS issue_date,
@@ -141,8 +145,10 @@ def issues_first_time_closed(repo_group_id, repo_id=None, period='day', begin_da
) AS iss_close
GROUP BY repo_id, repo_name,issue_date
""")
- results = pd.read_sql(issuesClosedSQL, engine, params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+
+ with engine.connect() as conn:
+ results = pd.read_sql(issuesClosedSQL, conn, params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@@ -179,8 +185,9 @@ def issues_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_d
ORDER BY issues.repo_id, date
""")
- results = pd.read_sql(issues_new_SQL, engine, params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issues_new_SQL, conn, params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@@ -198,8 +205,9 @@ def issues_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_d
ORDER BY date;
""")
- results = pd.read_sql(issues_new_SQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issues_new_SQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -235,8 +243,9 @@ def issues_active(repo_group_id, repo_id=None, period='day', begin_date=None, en
ORDER BY issues.repo_id, date
""")
- results = pd.read_sql(issues_active_SQL, engine, params={'repo_group_id': repo_group_id, 'period':period,
- 'begin_date': begin_date, 'end_date':end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issues_active_SQL, conn, params={'repo_group_id': repo_group_id, 'period':period,
+ 'begin_date': begin_date, 'end_date':end_date})
else:
issues_active_SQL = s.sql.text("""
@@ -254,8 +263,9 @@ def issues_active(repo_group_id, repo_id=None, period='day', begin_date=None, en
ORDER BY date
""")
- results = pd.read_sql(issues_active_SQL, engine, params={'repo_id': repo_id, 'period':period,
- 'begin_date': begin_date, 'end_date':end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issues_active_SQL, conn, params={'repo_id': repo_id, 'period':period,
+ 'begin_date': begin_date, 'end_date':end_date})
return results
@register_metric()
@@ -290,8 +300,9 @@ def issues_closed(repo_group_id, repo_id=None, period='day', begin_date=None, en
ORDER BY issues.repo_id, date
""")
- results = pd.read_sql(issues_closed_SQL, engine, params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issues_closed_SQL, conn, params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
else:
issues_closed_SQL = s.sql.text("""
@@ -308,8 +319,9 @@ def issues_closed(repo_group_id, repo_id=None, period='day', begin_date=None, en
ORDER BY date;
""")
- results = pd.read_sql(issues_closed_SQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issues_closed_SQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@@ -347,9 +359,10 @@ def issue_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None):
ORDER BY repo_id, issue_id
""")
- results = pd.read_sql(issue_duration_SQL, engine, params={'repo_group_id': repo_group_id,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issue_duration_SQL, conn, params={'repo_group_id': repo_group_id,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
results['duration'] = results['duration'].astype(str)
return results
@@ -371,9 +384,10 @@ def issue_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None):
ORDER BY issue_id;
""")
- results = pd.read_sql(issue_duration_SQL, engine, params={'repo_id': repo_id,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issue_duration_SQL, conn, params={'repo_id': repo_id,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
results['duration'] = results['duration'].astype(str)
return results
@@ -417,9 +431,10 @@ def issue_participants(repo_group_id, repo_id=None, begin_date=None, end_date=No
ORDER BY issues.repo_id, issues.created_at
""")
- result = pd.read_sql(issue_participants_SQL, engine, params={'repo_group_id': repo_group_id,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ result = pd.read_sql(issue_participants_SQL, conn, params={'repo_group_id': repo_group_id,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
return result
else:
issue_participants_SQL = s.sql.text("""
@@ -445,9 +460,10 @@ def issue_participants(repo_group_id, repo_id=None, begin_date=None, end_date=No
ORDER BY issues.created_at
""")
- result = pd.read_sql(issue_participants_SQL, engine, params={'repo_id': repo_id,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ result = pd.read_sql(issue_participants_SQL, conn, params={'repo_id': repo_id,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
return result
@register_metric()
@@ -468,7 +484,9 @@ def issue_backlog(repo_group_id, repo_id=None):
GROUP BY issues.repo_id, repo_name
ORDER BY issues.repo_id
""")
- result = pd.read_sql(issue_backlog_SQL, engine, params={'repo_group_id': repo_group_id})
+
+ with engine.connect() as conn:
+ result = pd.read_sql(issue_backlog_SQL, conn, params={'repo_group_id': repo_group_id})
return result
else:
@@ -481,7 +499,8 @@ def issue_backlog(repo_group_id, repo_id=None):
GROUP BY repo_name
""")
- result = pd.read_sql(issue_backlog_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ result = pd.read_sql(issue_backlog_SQL, conn, params={'repo_id': repo_id})
return result
@register_metric()
@@ -509,7 +528,8 @@ def issue_throughput(repo_group_id, repo_id=None):
AND table1.repo_id = repo.repo_id
""")
- results = pd.read_sql(issue_throughput_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(issue_throughput_SQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
@@ -525,7 +545,8 @@ def issue_throughput(repo_group_id, repo_id=None):
WHERE table1.repo_id = repo.repo_id
""")
- result = pd.read_sql(issue_throughput_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ result = pd.read_sql(issue_throughput_SQL, conn, params={'repo_id': repo_id})
return result
@register_metric()
@@ -574,9 +595,10 @@ def issues_open_age(repo_group_id, repo_id=None, period='day', begin_date=None,
ORDER BY open_date DESC
""")
- results = pd.read_sql(openAgeSQL, engine,
- params={'repo_id': repo_id, 'repo_group_id': repo_group_id,
- 'period': period, 'begin_date':begin_date, 'end_date':end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(openAgeSQL, conn,
+ params={'repo_id': repo_id, 'repo_group_id': repo_group_id,
+ 'period': period, 'begin_date':begin_date, 'end_date':end_date})
return results
@@ -634,11 +656,12 @@ def issues_closed_resolution_duration(repo_group_id, repo_id=None, period='day',
ORDER BY gh_issue_number
""")
- results = pd.read_sql(issueSQL, engine,
- params={'repo_id': repo_id,
- 'repo_group_id': repo_group_id,
- 'period': period, 'begin_date':begin_date,
- 'end_date':end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issueSQL, conn,
+ params={'repo_id': repo_id,
+ 'repo_group_id': repo_group_id,
+ 'period': period, 'begin_date':begin_date,
+ 'end_date':end_date})
return results
@@ -667,8 +690,9 @@ def average_issue_resolution_time(repo_group_id, repo_id=None):
""")
- results = pd.read_sql(avg_issue_resolution_SQL, engine,
- params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(avg_issue_resolution_SQL, conn,
+ params={'repo_group_id': repo_group_id})
return results
else:
@@ -683,8 +707,9 @@ def average_issue_resolution_time(repo_group_id, repo_id=None):
GROUP BY repo.repo_name
""")
- results = pd.read_sql(avg_issue_resolution_SQL, engine,
- params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(avg_issue_resolution_SQL, conn,
+ params={'repo_id': repo_id})
return results
@register_metric()
@@ -757,7 +782,8 @@ def issues_maintainer_response_duration(repo_group_id, repo_id=None, begin_date=
group by repo_id, repo_name
""")
- results = pd.read_sql(issuesSQL, engine, params={'repo_id': repo_id, 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(issuesSQL, conn, params={'repo_id': repo_id, 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date})
return results
@@ -780,7 +806,8 @@ def open_issues_count(repo_group_id, repo_id=None):
GROUP BY date, repo_groups.rg_name
ORDER BY date
""")
- results = pd.read_sql(openIssueCountSQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(openIssueCountSQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
openIssueCountSQL = s.sql.text("""
@@ -794,7 +821,8 @@ def open_issues_count(repo_group_id, repo_id=None):
GROUP BY date, repo.repo_id
ORDER BY date
""")
- results = pd.read_sql(openIssueCountSQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(openIssueCountSQL, conn, params={'repo_id': repo_id})
return results
@@ -817,7 +845,8 @@ def closed_issues_count(repo_group_id, repo_id=None):
GROUP BY date, repo_groups.rg_name
ORDER BY date
""")
- results = pd.read_sql(closedIssueCountSQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(closedIssueCountSQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
closedIssueCountSQL = s.sql.text("""
@@ -831,7 +860,8 @@ def closed_issues_count(repo_group_id, repo_id=None):
GROUP BY date, repo.repo_id
ORDER BY date
""")
- results = pd.read_sql(closedIssueCountSQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(closedIssueCountSQL, conn, params={'repo_id': repo_id})
return results
@register_metric()
@@ -893,8 +923,9 @@ def issue_comments_mean(repo_group_id, repo_id=None, group_by='week'):
else:
raise ValueError("Incorrect value for 'group_by'")
- results = pd.read_sql(issue_comments_mean_std_SQL, engine,
- params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(issue_comments_mean_std_SQL, conn,
+ params={'repo_group_id': repo_group_id})
return results
else:
@@ -946,8 +977,9 @@ def issue_comments_mean(repo_group_id, repo_id=None, group_by='week'):
else:
raise ValueError("Incorrect value for 'group_by'")
- results = pd.read_sql(issue_comments_mean_std_SQL, engine,
- params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(issue_comments_mean_std_SQL, conn,
+ params={'repo_id': repo_id})
return results
@register_metric()
@@ -978,9 +1010,10 @@ def issue_comments_mean_std(repo_group_id, repo_id=None, group_by='week'):
""")
- results = pd.read_sql(issue_comments_mean_std_SQL, engine,
- params={'repo_group_id': repo_group_id,
- 'group_by': group_by})
+ with engine.connect() as conn:
+ results = pd.read_sql(issue_comments_mean_std_SQL, conn,
+ params={'repo_group_id': repo_group_id,
+ 'group_by': group_by})
return results
else:
@@ -1006,8 +1039,9 @@ def issue_comments_mean_std(repo_group_id, repo_id=None, group_by='week'):
ORDER BY date
""")
- results = pd.read_sql(issue_comments_mean_std_SQL, engine,
- params={'repo_id': repo_id, 'group_by': group_by})
+ with engine.connect() as conn:
+ results = pd.read_sql(issue_comments_mean_std_SQL, conn,
+ params={'repo_id': repo_id, 'group_by': group_by})
return results
@register_metric()
@@ -1057,6 +1091,7 @@ def abandoned_issues(repo_group_id, repo_id=None, period='day', begin_date=None,
'''
)
- results = pd.read_sql(abandonedSQL, engine, params={'repo_id': repo_id, 'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(abandonedSQL, conn, params={'repo_id': repo_id, 'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
diff --git a/augur/api/metrics/message.py b/augur/api/metrics/message.py
index 8c36c3a4c..9988f5a0d 100644
--- a/augur/api/metrics/message.py
+++ b/augur/api/metrics/message.py
@@ -56,9 +56,9 @@ def repo_messages(repo_group_id, repo_id=None, period='day', begin_date=None, en
""")
-
- results = pd.read_sql(repomessagesSQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(repomessagesSQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
else:
repomessagesSQL = s.sql.text("""
@@ -85,10 +85,11 @@ def repo_messages(repo_group_id, repo_id=None, period='day', begin_date=None, en
rg_name,
message_date
""")
-
- results = pd.read_sql(repomessagesSQL, engine,
- params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+
+ with engine.connect() as conn:
+ results = pd.read_sql(repomessagesSQL, conn,
+ params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
diff --git a/augur/api/metrics/pull_request.py b/augur/api/metrics/pull_request.py
index 9fbcc6175..3b1798ec0 100644
--- a/augur/api/metrics/pull_request.py
+++ b/augur/api/metrics/pull_request.py
@@ -10,6 +10,53 @@
from ..server import engine
+@register_metric()
+def pull_requests_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
+ """
+ Returns a time series of the number of new Pull Requests opened during a certain period.
+
+ :param repo_id: The repository's id
+ :param repo_group_id: The repository's group id
+ :param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
+ :param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:01'
+ :param end_date: Specifies the end date, defaults to datetime.now()
+ :return: DataFrame of new Pull Requests/period
+ """
+ if not begin_date:
+ begin_date = '1970-1-1 00:00:01'
+ if not end_date:
+ end_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+
+ if repo_id:
+ new_pull_requests_query = s.sql.text("""
+ SELECT DATE_TRUNC(:period, pr_created_at) AS created_date,
+ COUNT(pr_id) AS new_pull_requests
+ FROM pull_requests
+ WHERE repo_id = :repo_id
+ AND pr_created_at BETWEEN :begin_date AND :end_date
+ GROUP BY created_date
+ """)
+
+ results = pd.read_sql(new_pull_requests_query, engine, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
+ else:
+ new_pull_requests_query = s.sql.text("""
+ SELECT DATE_TRUNC(:period, pr_created_at) AS created_date,
+ COUNT(pr_id) AS new_pull_requests
+ FROM pull_requests
+ WHERE repo_id IN (SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
+ AND pr_created_at BETWEEN :begin_date AND :end_date
+ GROUP BY created_date
+ """)
+
+ results = pd.read_sql(new_pull_requests_query, engine,
+ params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
+
+ return results
+
@register_metric()
def pull_requests_merge_contributor_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
"""
@@ -40,9 +87,10 @@ def pull_requests_merge_contributor_new(repo_group_id, repo_id=None, period='day
""")
- results = pd.read_sql(commitNewContributor, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(commitNewContributor, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
else:
commitNewContributor = s.sql.text("""
SELECT abc.repo_id, repo_name ,date_trunc(:period, new_date::DATE) as commit_date,
@@ -58,11 +106,11 @@ def pull_requests_merge_contributor_new(repo_group_id, repo_id=None, period='day
GROUP BY abc.repo_id, repo_name, commit_date
""")
-
- results = pd.read_sql(commitNewContributor, engine,
- params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(commitNewContributor, conn,
+ params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
return results
@register_metric()
@@ -96,9 +144,10 @@ def pull_requests_closed_no_merge(repo_group_id, repo_id=None, period='day', beg
- results = pd.read_sql(closedNoMerge, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(closedNoMerge, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
else:
closedNoMerge = s.sql.text("""
@@ -110,11 +159,11 @@ def pull_requests_closed_no_merge(repo_group_id, repo_id=None, period='day', beg
ORDER BY closed_date
""")
-
- results = pd.read_sql(closedNoMerge, engine,
- params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(closedNoMerge, conn,
+ params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
return results
@register_metric()
@@ -151,9 +200,10 @@ def reviews(repo_group_id, repo_id=None, period='day', begin_date=None, end_date
""")
- results = pd.read_sql(reviews_SQL, engine,
- params={'period': period, 'repo_group_id': repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date })
+ with engine.connect() as conn:
+ results = pd.read_sql(reviews_SQL, conn,
+ params={'period': period, 'repo_group_id': repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date })
return results
else:
@@ -171,10 +221,10 @@ def reviews(repo_group_id, repo_id=None, period='day', begin_date=None, end_date
ORDER BY date
""")
-
- results = pd.read_sql(reviews_SQL, engine,
- params={'period': period, 'repo_id': repo_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(reviews_SQL, conn,
+ params={'period': period, 'repo_id': repo_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -211,10 +261,10 @@ def reviews_accepted(repo_group_id, repo_id=None, period='day', begin_date=None,
ORDER BY pull_requests.repo_id, date
""")
-
- results = pd.read_sql(reviews_accepted_SQL, engine,
- params={'period': period, 'repo_group_id': repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(reviews_accepted_SQL, conn,
+ params={'period': period, 'repo_group_id': repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
else:
reviews_accepted_SQL = s.sql.text("""
@@ -232,9 +282,10 @@ def reviews_accepted(repo_group_id, repo_id=None, period='day', begin_date=None,
ORDER BY date
""")
- results = pd.read_sql(reviews_accepted_SQL, engine,
- params={'period': period, 'repo_id': repo_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(reviews_accepted_SQL, conn,
+ params={'period': period, 'repo_id': repo_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -271,10 +322,10 @@ def reviews_declined(repo_group_id, repo_id=None, period='day', begin_date=None,
ORDER BY pull_requests.repo_id, date
""")
-
- results = pd.read_sql(reviews_declined_SQL, engine,
- params={'period': period, 'repo_group_id': repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date })
+ with engine.connect() as conn:
+ results = pd.read_sql(reviews_declined_SQL, conn,
+ params={'period': period, 'repo_group_id': repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date })
return results
else:
reviews_declined_SQL = s.sql.text("""
@@ -292,9 +343,10 @@ def reviews_declined(repo_group_id, repo_id=None, period='day', begin_date=None,
ORDER BY date
""")
- results = pd.read_sql(reviews_declined_SQL, engine,
- params={'period': period, 'repo_id': repo_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(reviews_declined_SQL, conn,
+ params={'period': period, 'repo_id': repo_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -331,11 +383,11 @@ def review_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None)
ORDER BY pull_requests.repo_id, pull_requests.pull_request_id
""")
-
- results = pd.read_sql(review_duration_SQL, engine,
- params={'repo_group_id': repo_group_id,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(review_duration_SQL, conn,
+ params={'repo_group_id': repo_group_id,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
results['duration'] = results['duration'].astype(str)
return results
else:
@@ -355,10 +407,11 @@ def review_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None)
ORDER BY pull_requests.repo_id, pull_request_id
""")
- results = pd.read_sql(review_duration_SQL, engine,
- params={'repo_id': repo_id,
- 'begin_date': begin_date,
- 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(review_duration_SQL, conn,
+ params={'repo_id': repo_id,
+ 'begin_date': begin_date,
+ 'end_date': end_date})
results['duration'] = results['duration'].astype(str)
return results
@@ -408,8 +461,9 @@ def pull_request_acceptance_rate(repo_group_id, repo_id=None, begin_date=None, e
ON opened.date_created = accepted.accepted_on
""")
- results = pd.read_sql(prAccRateSQL, engine, params={'repo_group_id': repo_group_id, 'group_by': group_by,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(prAccRateSQL, conn, params={'repo_group_id': repo_group_id, 'group_by': group_by,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
else:
prAccRateSQL = s.sql.text("""
@@ -441,8 +495,9 @@ def pull_request_acceptance_rate(repo_group_id, repo_id=None, begin_date=None, e
ON opened.date_created = accepted.accepted_on
""")
- results = pd.read_sql(prAccRateSQL, engine, params={'repo_id': repo_id, 'group_by': group_by,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(prAccRateSQL, conn, params={'repo_id': repo_id, 'group_by': group_by,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -546,9 +601,10 @@ def pull_request_average_time_to_close(repo_group_id, repo_id=None, group_by='mo
- pr_all = pd.read_sql(pr_all_SQL, engine,
- params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ pr_all = pd.read_sql(pr_all_SQL, conn,
+ params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date})
if not repo_id:
pr_avg_time_to_close = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_to_close'.format(time_unit)]]
else:
@@ -657,10 +713,11 @@ def pull_request_merged_status_counts(repo_group_id, repo_id=None, begin_date='1
GROUP BY closed_year, closed_month, merged_status, time_between_responses.pr_closed_at, time_between_responses.average_time_between_responses
""")
-
- pr_all = pd.read_sql(pr_all_SQL, engine,
- params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date})
+
+ with engine.connect() as conn:
+ pr_all = pd.read_sql(pr_all_SQL, conn,
+ params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date})
if not repo_id:
pr_avg_time_between_responses = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_between_responses'.format(time_unit)]]
else:
@@ -767,10 +824,11 @@ def pull_request_average_commit_counts(repo_group_id, repo_id=None, group_by='mo
GROUP BY closed_year, merged_status, data.pr_closed_at, data.commit_count
""")
-
- pr_all = pd.read_sql(pr_all_SQL, engine,
- params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date})
+
+ with engine.connect() as conn:
+ pr_all = pd.read_sql(pr_all_SQL, conn,
+ params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date})
if not repo_id:
pr_avg_commit_counts = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_commits_per_pull_request']]
else:
@@ -926,10 +984,11 @@ def pull_request_average_event_counts(repo_group_id, repo_id=None, group_by='mon
ORDER BY merged_status, closed_year, closed_week, closed_day
""")
-
- pr_all = pd.read_sql(pr_all_SQL, engine,
- params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date})
+
+ with engine.connect() as conn:
+ pr_all = pd.read_sql(pr_all_SQL, conn,
+ params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date})
count_names = ['assigned_count', 'review_requested_count', 'labeled_count', 'unlabeled_count', 'subscribed_count', 'mentioned_count', 'referenced_count', 'closed_count', 'head_ref_force_pushed_count', 'head_ref_deleted_count', 'milestoned_count', 'merged_count', 'comment_count']
average_count_names = []
@@ -1050,9 +1109,10 @@ def pull_request_average_time_to_responses_and_close(repo_group_id, repo_id=None
GROUP BY closed_year, merged_status, response_times.first_response_time, response_times.last_response_time, response_times.pr_created_at, response_times.pr_closed_at
""")
- pr_all = pd.read_sql(pr_all_SQL, engine,
- params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ pr_all = pd.read_sql(pr_all_SQL, conn,
+ params={'repo_id': repo_id, 'repo_group_id':repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date})
if not repo_id:
avg_pr_time_to_responses_and_close = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_to_first_response'.format(time_unit), 'average_{}_to_last_response'.format(time_unit), 'average_{}_to_close'.format(time_unit)]]
@@ -1132,9 +1192,10 @@ def pull_request_merged_status_counts(repo_group_id, repo_id=None, begin_date='1
AND pr_closed_at::date <= :end_date ::date
""")
-
- pr_all = pd.read_sql(pr_all_sql, engine, params={'repo_group_id': repo_group_id,
- 'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date})
+
+ with engine.connect() as conn:
+ pr_all = pd.read_sql(pr_all_sql, conn, params={'repo_group_id': repo_group_id,
+ 'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date})
if not repo_id:
pr_merged_counts = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).count().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['pull_request_count']]
diff --git a/augur/api/metrics/release.py b/augur/api/metrics/release.py
index 60f779365..5594f7ef0 100644
--- a/augur/api/metrics/release.py
+++ b/augur/api/metrics/release.py
@@ -50,10 +50,10 @@ def releases(repo_group_id, repo_id=None, period='day', begin_date=None, end_dat
ORDER BY releases.release_published_at DESC
""")
-
- results = pd.read_sql(releases_SQL, engine,
- params={'period': period, 'repo_group_id': repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date })
+ with engine.connect() as conn:
+ results = pd.read_sql(releases_SQL, conn,
+ params={'period': period, 'repo_group_id': repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date })
return results
else:
@@ -80,10 +80,10 @@ def releases(repo_group_id, repo_id=None, period='day', begin_date=None, end_dat
ORDER BY releases.release_published_at DESC
""")
-
- results = pd.read_sql(releases_SQL, engine,
- params={'period': period, 'repo_id': repo_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(releases_SQL, conn,
+ params={'period': period, 'repo_id': repo_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@register_metric()
@@ -127,10 +127,10 @@ def tag_only_releases(repo_group_id, repo_id=None, period='day', begin_date=None
ORDER BY releases.release_published_at DESC
""")
-
- results = pd.read_sql(releases_SQL, engine,
- params={'period': period, 'repo_group_id': repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date })
+ with engine.connect() as conn:
+ results = pd.read_sql(releases_SQL, conn,
+ params={'period': period, 'repo_group_id': repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date })
return results
else:
@@ -150,10 +150,11 @@ def tag_only_releases(repo_group_id, repo_id=None, period='day', begin_date=None
ORDER BY releases.release_published_at DESC
""")
- results = pd.read_sql(releases_SQL, engine,
- params={'period': period, 'repo_id': repo_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(releases_SQL, conn,
+ params={'period': period, 'repo_id': repo_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
-def create_release_metrics(metrics):
- add_metrics(metrics, __name__)
+#def create_release_metrics(metrics):
+# add_metrics(metrics, __name__)
diff --git a/augur/api/metrics/repo_meta.py b/augur/api/metrics/repo_meta.py
index ca4d9668e..c5d8e1138 100644
--- a/augur/api/metrics/repo_meta.py
+++ b/augur/api/metrics/repo_meta.py
@@ -46,8 +46,8 @@ def code_changes(repo_group_id, repo_id=None, period='week', begin_date=None, en
ORDER BY week
""")
-
- results = pd.read_sql(code_changes_SQL, engine, params={'repo_group_id': repo_group_id, 'period': period,
+ with engine.connect() as conn:
+ results = pd.read_sql(code_changes_SQL, conn, params={'repo_group_id': repo_group_id, 'period': period,
'begin_date': begin_date, 'end_date': end_date})
results['week'] = results['week'].apply(lambda x: x - 1)
results['date'] = results['year'].astype(str) + ' ' + results['week'].astype(str) + ' 0'
@@ -68,9 +68,9 @@ def code_changes(repo_group_id, repo_id=None, period='week', begin_date=None, en
ORDER BY week
""")
-
- results = pd.read_sql(code_changes_SQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(code_changes_SQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
results['week'] = results['week'].apply(lambda x: x - 1)
results['date'] = results['year'].astype(str) + ' ' + results['week'].astype(str) + ' 0'
@@ -111,8 +111,9 @@ def code_changes_lines(repo_group_id, repo_id=None, period='day', begin_date=Non
ORDER BY commits.repo_id, date
""")
- results = pd.read_sql(code_changes_lines_SQL, engine, params={'repo_group_id': repo_group_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(code_changes_lines_SQL, conn, params={'repo_group_id': repo_group_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@@ -130,9 +131,9 @@ def code_changes_lines(repo_group_id, repo_id=None, period='day', begin_date=Non
ORDER BY date;
""")
-
- results = pd.read_sql(code_changes_lines_SQL, engine, params={'repo_id': repo_id, 'period': period,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(code_changes_lines_SQL, conn, params={'repo_id': repo_id, 'period': period,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@@ -163,8 +164,9 @@ def sub_projects(repo_group_id, repo_id=None, begin_date=None, end_date=None):
AND repo_added BETWEEN :begin_date AND :end_date
""")
- results = pd.read_sql(sub_projectsSQL, engine, params={'repo_id': repo_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(sub_projectsSQL, conn, params={'repo_id': repo_id,
+ 'begin_date': begin_date, 'end_date': end_date})
else:
sub_projectsSQL = s.sql.text("""
SELECT COUNT(*) AS sub_project_count
@@ -173,8 +175,9 @@ def sub_projects(repo_group_id, repo_id=None, begin_date=None, end_date=None):
AND repo_added BETWEEN :begin_date AND :end_date
""")
- results = pd.read_sql(sub_projectsSQL, engine, params={'repo_group_id': repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(sub_projectsSQL, conn, params={'repo_group_id': repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@@ -194,8 +197,8 @@ def sbom_download(repo_group_id, repo_id=None):
logger.debug(dosocs_SQL)
params = {'repo_id': repo_id}
-
- return pd.read_sql(dosocs_SQL, engine, params=params)
+ with engine.connect() as conn:
+ return pd.read_sql(dosocs_SQL, conn, params=params)
#return [json.dumps(license_information)]
@register_metric()
@@ -223,7 +226,8 @@ def cii_best_practices_badge(repo_group_id, repo_id=None):
LIMIT 1
""")
- raw_df = pd.read_sql(cii_best_practices_badge_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ raw_df = pd.read_sql(cii_best_practices_badge_SQL, conn, params={'repo_id': repo_id})
if len(raw_df) == 0:
return []
@@ -263,8 +267,8 @@ def forks(repo_group_id, repo_id=None):
ORDER BY repo_info.repo_id, date
""")
-
- results = pd.read_sql(forks_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(forks_SQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
@@ -278,8 +282,8 @@ def forks(repo_group_id, repo_id=None):
ORDER BY date
""")
-
- results = pd.read_sql(forks_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(forks_SQL, conn, params={'repo_id': repo_id})
return results
@register_metric()
@@ -303,8 +307,8 @@ def fork_count(repo_group_id, repo_id=None):
WHERE repo_group_id = :repo_group_id)
""")
-
- results = pd.read_sql(fork_count_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(fork_count_SQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
fork_count_SQL = s.sql.text("""
@@ -315,8 +319,8 @@ def fork_count(repo_group_id, repo_id=None):
LIMIT 1
""")
-
- results = pd.read_sql(fork_count_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(fork_count_SQL, conn, params={'repo_id': repo_id})
return results
@register_metric()
@@ -334,7 +338,8 @@ def languages(repo_group_id, repo_id=None):
WHERE repo_id IN (SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
""")
- results = pd.read_sql(languages_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(languages_SQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
@@ -344,8 +349,8 @@ def languages(repo_group_id, repo_id=None):
WHERE repo_id = :repo_id
""")
-
- results = pd.read_sql(languages_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(languages_SQL, conn, params={'repo_id': repo_id})
return results
@register_metric(type="license")
@@ -381,7 +386,8 @@ def license_files(license_id, spdx_binary, repo_group_id, repo_id=None,):
b.license_id in ( 369,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482));
""")
- results = pd.read_sql(license_data_SQL, engine, params={'repo_id': repo_id, 'spdx_binary': spdx_binary, 'license_id': license_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(license_data_SQL, conn, params={'repo_id': repo_id, 'spdx_binary': spdx_binary, 'license_id': license_id})
return results
@register_metric()
@@ -450,7 +456,8 @@ def license_declared(repo_group_id, repo_id=None):
short_name;
""")
- results = pd.read_sql(license_declared_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(license_declared_SQL, conn, params={'repo_id': repo_id})
return results
@register_metric()
@@ -534,7 +541,8 @@ def license_coverage(repo_group_id, repo_id=None):
GROUP BY a.name, a.licensed, a.licensed, b.total
""")
- results = pd.read_sql(license_declared_SQL, engine, params={'repo_id': repo_id, 'repo_group_id':repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(license_declared_SQL, conn, params={'repo_id': repo_id, 'repo_group_id':repo_group_id})
return results
@@ -595,8 +603,8 @@ def license_count(repo_group_id, repo_id=None):
GROUP BY a.name, a.number_of_license, a.licensed, b.total
""")
-
- results = pd.read_sql(license_declared_SQL, engine, params={'repo_id': repo_id, 'repo_group_id':repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(license_declared_SQL, conn, params={'repo_id': repo_id, 'repo_group_id':repo_group_id})
return results
@@ -624,8 +632,8 @@ def stars(repo_group_id, repo_id=None):
ORDER BY repo_info.repo_id, date
""")
-
- results = pd.read_sql(stars_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(stars_SQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
@@ -639,7 +647,8 @@ def stars(repo_group_id, repo_id=None):
ORDER BY date
""")
- results = pd.read_sql(stars_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(stars_SQL, conn, params={'repo_id': repo_id})
return results
@register_metric()
@@ -663,8 +672,8 @@ def stars_count(repo_group_id, repo_id=None):
WHERE repo_group_id = :repo_group_id)
""")
-
- results = pd.read_sql(stars_count_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(stars_count_SQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
stars_count_SQL = s.sql.text("""
@@ -675,7 +684,8 @@ def stars_count(repo_group_id, repo_id=None):
LIMIT 1
""")
- results = pd.read_sql(stars_count_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(stars_count_SQL, conn, params={'repo_id': repo_id})
return results
@register_metric()
@@ -701,8 +711,8 @@ def watchers(repo_group_id, repo_id=None):
ORDER BY repo_info.repo_id, date
""")
-
- results = pd.read_sql(watchers_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(watchers_SQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
@@ -716,8 +726,8 @@ def watchers(repo_group_id, repo_id=None):
ORDER BY date
""")
-
- results = pd.read_sql(watchers_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(watchers_SQL, conn, params={'repo_id': repo_id})
return results
@register_metric()
@@ -741,8 +751,8 @@ def watchers_count(repo_group_id, repo_id=None):
WHERE repo_group_id = :repo_group_id)
""")
-
- results = pd.read_sql(watchers_count_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(watchers_count_SQL, conn, params={'repo_group_id': repo_group_id})
return results
else:
watchers_count_SQL = s.sql.text("""
@@ -753,8 +763,8 @@ def watchers_count(repo_group_id, repo_id=None):
LIMIT 1
""")
-
- results = pd.read_sql(watchers_count_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(watchers_count_SQL, conn, params={'repo_id': repo_id})
return results
@register_metric()
@@ -798,8 +808,9 @@ def annual_lines_of_code_count_ranked_by_new_repo_in_repo_group(repo_group_id, r
LIMIT 10
""")
- results = pd.read_sql(cdRgNewrepRankedCommitsSQL, engine, params={ "repo_group_id": repo_group_id,
- "repo_id": repo_id, "calendar_year": calendar_year})
+ with engine.connect() as conn:
+ results = pd.read_sql(cdRgNewrepRankedCommitsSQL, conn, params={ "repo_group_id": repo_group_id,
+ "repo_id": repo_id, "calendar_year": calendar_year})
return results
@register_metric()
@@ -894,9 +905,9 @@ def annual_lines_of_code_count_ranked_by_repo_in_repo_group(repo_group_id, repo_
""")
-
- results = pd.read_sql(cdRgTpRankedCommitsSQL, engine, params={ "repo_group_id": repo_group_id,
- "repo_id": repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(cdRgTpRankedCommitsSQL, conn, params={ "repo_group_id": repo_group_id,
+ "repo_id": repo_id})
return results
@register_metric()
@@ -948,8 +959,8 @@ def lines_of_code_commit_counts_by_calendar_year_grouped(repo_url, calendar_year
GROUP BY week
""")
-
- results = pd.read_sql(cdRepTpIntervalLocCommitsSQL, engine, params={"repourl": '%{}%'.format(repo_url), 'calendar_year': calendar_year})
+ with engine.connect() as conn:
+ results = pd.read_sql(cdRepTpIntervalLocCommitsSQL, conn, params={"repourl": '%{}%'.format(repo_url), 'calendar_year': calendar_year})
return results
@register_metric()
@@ -969,9 +980,9 @@ def average_weekly_commits(repo_group_id=None, repo_id=None, calendar_year=None)
ORDER BY repo_name
""".format(extra_and))
-
- results = pd.read_sql(average_weekly_commits_sql, engine, params={"repo_group_id": repo_group_id,
- "repo_id": repo_id, "calendar_year": calendar_year})
+ with engine.connect() as conn:
+ results = pd.read_sql(average_weekly_commits_sql, conn, params={"repo_group_id": repo_group_id,
+ "repo_id": repo_id, "calendar_year": calendar_year})
return results
@register_metric()
@@ -1054,8 +1065,9 @@ def aggregate_summary(repo_group_id, repo_id=None, begin_date=None, end_date=Non
) commit_data
""")
- results = pd.read_sql(summarySQL, engine, params={'repo_group_id': repo_group_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(summarySQL, conn, params={'repo_group_id': repo_group_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
else:
summarySQL = s.sql.text("""
@@ -1123,6 +1135,7 @@ def aggregate_summary(repo_group_id, repo_id=None, begin_date=None, end_date=Non
) commit_data
""")
- results = pd.read_sql(summarySQL, engine, params={'repo_id': repo_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(summarySQL, conn, params={'repo_id': repo_id,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
diff --git a/augur/api/metrics/toss.py b/augur/api/metrics/toss.py
index 122cb3567..d3e91ad40 100644
--- a/augur/api/metrics/toss.py
+++ b/augur/api/metrics/toss.py
@@ -57,8 +57,9 @@ def toss_pull_request_acceptance_rate(repo_id, begin_date=None, end_date=None, g
) opened ON merged.repo_id = opened.repo_id
""")
- results = pd.read_sql(pr_acceptance_rate_sql, engine, params={'repo_id': repo_id, 'group_by': group_by,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(pr_acceptance_rate_sql, conn, params={'repo_id': repo_id, 'group_by': group_by,
+ 'begin_date': begin_date, 'end_date': end_date})
return results
@@ -89,8 +90,9 @@ def toss_review_duration(repo_id, begin_date=None, end_date=None):
AND :end_date
""")
- results = pd.read_sql(pr_acceptance_rate_sql, engine, params={'repo_id': repo_id,
- 'begin_date': begin_date, 'end_date': end_date})
+ with engine.connect() as conn:
+ results = pd.read_sql(pr_acceptance_rate_sql, conn, params={'repo_id': repo_id,
+ 'begin_date': begin_date, 'end_date': end_date})
if results.iloc[0]['duration'] is None:
results.iloc[0]['duration'] = -1
else:
@@ -120,5 +122,6 @@ def toss_repo_info(repo_id):
LIMIT 1;
""")
- results = pd.read_sql(license_file_sql, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(license_file_sql, conn, params={'repo_id': repo_id})
return results
diff --git a/augur/api/routes/__init__.py b/augur/api/routes/__init__.py
index 5e601f54e..03c2e2fa7 100644
--- a/augur/api/routes/__init__.py
+++ b/augur/api/routes/__init__.py
@@ -11,3 +11,4 @@
from .user import *
from .dei import *
from .util import *
+from .complexity import *
diff --git a/augur/api/routes/collection_status.py b/augur/api/routes/collection_status.py
index 58e17311f..8afd8eb2d 100644
--- a/augur/api/routes/collection_status.py
+++ b/augur/api/routes/collection_status.py
@@ -25,7 +25,9 @@ def commit_collection_status(): # TODO: make this name automatic - wrapper?
AND
c.facade_status = 'Success';
""")
- results = pd.read_sql(commit_collection_sql, engine)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(commit_collection_sql, conn)
data = results.to_json(
orient="records", date_format='iso', date_unit='ms')
return Response(response=data,
@@ -86,7 +88,9 @@ def issue_collection_status(): # TODO: make this name automatic - wrapper?
) D
WHERE d.issues_enabled = 'true';
""")
- results = pd.read_sql(issue_collection_sql, engine)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(issue_collection_sql, conn)
data = results.to_json(
orient="records", date_format='iso', date_unit='ms')
parsed_data = json.loads(data)
@@ -156,7 +160,9 @@ def pull_request_collection_status(): # TODO: make this name automatic - wrappe
ORDER BY
ratio_abs;
""")
- results = pd.read_sql(pull_request_collection_sql, engine)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(pull_request_collection_sql, conn)
data = results.to_json(
orient="records", date_format='iso', date_unit='ms')
parsed_data = json.loads(data)
diff --git a/augur/api/routes/complexity.py b/augur/api/routes/complexity.py
index 81045720a..bee39eb92 100644
--- a/augur/api/routes/complexity.py
+++ b/augur/api/routes/complexity.py
@@ -6,32 +6,113 @@
import os
import requests
-AUGUR_API_VERSION = 'api/unstable'
+from augur.api.routes import AUGUR_API_VERSION
+from ..server import app, engine
-def create_routes(server):
- @server.app.route('/{}/complexity/project_languages'.format(AUGUR_API_VERSION), methods=["GET"])
- def get_project_languages():
- project_languages_sql = s.sql.text("""
- SELECT
- e.repo_id,
- augur_data.repo.repo_git,
- augur_data.repo.repo_name,
- e.programming_language,
- e.code_lines,
- e.files
+@app.route('/{}/complexity/project_languages'.format(AUGUR_API_VERSION), methods=["GET"])
+def get_project_languages():
+ project_languages_sql = s.sql.text("""
+ SELECT
+ e.repo_id,
+ augur_data.repo.repo_git,
+ augur_data.repo.repo_name,
+ e.programming_language,
+ e.code_lines,
+ e.files
+ FROM
+ augur_data.repo,
+ (SELECT
+ d.repo_id,
+ d.programming_language,
+ SUM(d.code_lines) AS code_lines,
+ COUNT(*)::int AS files
+ FROM
+ (SELECT
+ augur_data.repo_labor.repo_id,
+ augur_data.repo_labor.programming_language,
+ augur_data.repo_labor.code_lines
+ FROM
+ augur_data.repo_labor,
+ ( SELECT
+ augur_data.repo_labor.repo_id,
+ MAX ( data_collection_date ) AS last_collected
+ FROM
+ augur_data.repo_labor
+ GROUP BY augur_data.repo_labor.repo_id) recent
+ WHERE
+ augur_data.repo_labor.repo_id = recent.repo_id
+ AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
+ GROUP BY d.repo_id, d.programming_language) e
+ WHERE augur_data.repo.repo_id = e.repo_id
+ ORDER BY e.repo_id
+ """)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(project_languages_sql, conn)
+ data = results.to_json(orient="records", date_format='iso', date_unit='ms')
+ return Response(response=data,
+ status=200,
+ mimetype="application/json")
+
+@app.route('/{}/complexity/project_files'.format(AUGUR_API_VERSION), methods=["GET"])
+def get_project_files():
+ project_files_sql = s.sql.text("""
+ SELECT
+ e.repo_id,
+ augur_data.repo.repo_git,
+ augur_data.repo.repo_name,
+ e.files
+ FROM
+ augur_data.repo,
+ (SELECT
+ d.repo_id,
+ count(*) AS files
FROM
- augur_data.repo,
- (SELECT
+ (SELECT
+ augur_data.repo_labor.repo_id
+ FROM
+ augur_data.repo_labor,
+ ( SELECT
+ augur_data.repo_labor.repo_id,
+ MAX ( data_collection_date ) AS last_collected
+ FROM
+ augur_data.repo_labor
+ GROUP BY augur_data.repo_labor.repo_id) recent
+ WHERE
+ augur_data.repo_labor.repo_id = recent.repo_id
+ AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
+ GROUP BY d.repo_id) e
+ WHERE augur_data.repo.repo_id = e.repo_id
+ ORDER BY e.repo_id
+ """)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(project_files_sql, conn)
+ data = results.to_json(orient="records", date_format='iso', date_unit='ms')
+ return Response(response=data,
+ status=200,
+ mimetype="application/json")
+
+@app.route('/{}/complexity/project_lines'.format(AUGUR_API_VERSION), methods=["GET"])
+def get_project_lines():
+ project_lines_sql = s.sql.text("""
+ SELECT
+ e.repo_id,
+ augur_data.repo.repo_git,
+ augur_data.repo.repo_name,
+ e.total_lines,
+ e.average_lines
+ FROM
+ augur_data.repo,
+ (SELECT
d.repo_id,
- d.programming_language,
- SUM(d.code_lines) AS code_lines,
- COUNT(*)::int AS files
+ SUM(d.total_lines) AS total_lines,
+ AVG(d.total_lines)::INT AS average_lines
FROM
(SELECT
augur_data.repo_labor.repo_id,
- augur_data.repo_labor.programming_language,
- augur_data.repo_labor.code_lines
+ augur_data.repo_labor.total_lines
FROM
augur_data.repo_labor,
( SELECT
@@ -43,113 +124,80 @@ def get_project_languages():
WHERE
augur_data.repo_labor.repo_id = recent.repo_id
AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
- GROUP BY d.repo_id, d.programming_language) e
- WHERE augur_data.repo.repo_id = e.repo_id
- ORDER BY e.repo_id
- """)
- results = pd.read_sql(project_languages_sql, server.engine)
- data = results.to_json(orient="records", date_format='iso', date_unit='ms')
- return Response(response=data,
- status=200,
- mimetype="application/json")
+ GROUP BY d.repo_id) e
+ WHERE augur_data.repo.repo_id = e.repo_id
+ ORDER BY e.repo_id
+ """)
- @server.app.route('/{}/complexity/project_files'.format(AUGUR_API_VERSION), methods=["GET"])
- def get_project_files():
- project_files_sql = s.sql.text("""
- SELECT
- e.repo_id,
- augur_data.repo.repo_git,
- augur_data.repo.repo_name,
- e.files
- FROM
- augur_data.repo,
- (SELECT
- d.repo_id,
- count(*) AS files
- FROM
- (SELECT
- augur_data.repo_labor.repo_id
- FROM
- augur_data.repo_labor,
- ( SELECT
- augur_data.repo_labor.repo_id,
- MAX ( data_collection_date ) AS last_collected
- FROM
- augur_data.repo_labor
- GROUP BY augur_data.repo_labor.repo_id) recent
- WHERE
- augur_data.repo_labor.repo_id = recent.repo_id
- AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
- GROUP BY d.repo_id) e
- WHERE augur_data.repo.repo_id = e.repo_id
- ORDER BY e.repo_id
- """)
- results = pd.read_sql(project_files_sql, server.engine)
- data = results.to_json(orient="records", date_format='iso', date_unit='ms')
- return Response(response=data,
- status=200,
- mimetype="application/json")
+ with engine.connect() as conn:
+ results = pd.read_sql(project_lines_sql, conn)
+ data = results.to_json(orient="records", date_format='iso', date_unit='ms')
+ return Response(response=data,
+ status=200,
+ mimetype="application/json")
- @server.app.route('/{}/complexity/project_lines'.format(AUGUR_API_VERSION), methods=["GET"])
- def get_project_lines():
- project_lines_sql = s.sql.text("""
- SELECT
- e.repo_id,
- augur_data.repo.repo_git,
- augur_data.repo.repo_name,
- e.total_lines,
- e.average_lines
+@app.route('/{}/complexity/project_comment_lines'.format(AUGUR_API_VERSION), methods=["GET"])
+def get_project_comment_lines():
+ comment_lines_sql = s.sql.text("""
+ SELECT
+ e.repo_id,
+ augur_data.repo.repo_git,
+ augur_data.repo.repo_name,
+ e.comment_lines,
+ e.avg_comment_lines
+ FROM
+ augur_data.repo,
+ (SELECT
+ d.repo_id,
+ SUM(d.comment_lines) AS comment_lines,
+ AVG(d.comment_lines)::INT AS avg_comment_lines
FROM
- augur_data.repo,
- (SELECT
- d.repo_id,
- SUM(d.total_lines) AS total_lines,
- AVG(d.total_lines)::INT AS average_lines
- FROM
- (SELECT
- augur_data.repo_labor.repo_id,
- augur_data.repo_labor.total_lines
- FROM
- augur_data.repo_labor,
- ( SELECT
- augur_data.repo_labor.repo_id,
- MAX ( data_collection_date ) AS last_collected
- FROM
- augur_data.repo_labor
- GROUP BY augur_data.repo_labor.repo_id) recent
- WHERE
- augur_data.repo_labor.repo_id = recent.repo_id
- AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
- GROUP BY d.repo_id) e
- WHERE augur_data.repo.repo_id = e.repo_id
- ORDER BY e.repo_id
- """)
- results = pd.read_sql(project_lines_sql, server.engine)
- data = results.to_json(orient="records", date_format='iso', date_unit='ms')
- return Response(response=data,
- status=200,
- mimetype="application/json")
+ (SELECT
+ augur_data.repo_labor.repo_id,
+ augur_data.repo_labor.comment_lines
+ FROM
+ augur_data.repo_labor,
+ ( SELECT
+ augur_data.repo_labor.repo_id,
+ MAX ( data_collection_date ) AS last_collected
+ FROM
+ augur_data.repo_labor
+ GROUP BY augur_data.repo_labor.repo_id) recent
+ WHERE
+ augur_data.repo_labor.repo_id = recent.repo_id
+ AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
+ GROUP BY d.repo_id) e
+ WHERE augur_data.repo.repo_id = e.repo_id
+ ORDER BY e.repo_id
+ """)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(comment_lines_sql, conn)
+ data = results.to_json(orient="records", date_format='iso', date_unit='ms')
+ return Response(response=data,
+ status=200,
+ mimetype="application/json")
- @server.app.route('/{}/complexity/project_comment_lines'.format(AUGUR_API_VERSION), methods=["GET"])
- def get_project_comment_lines():
- comment_lines_sql = s.sql.text("""
- SELECT
- e.repo_id,
- augur_data.repo.repo_git,
- augur_data.repo.repo_name,
- e.comment_lines,
- e.avg_comment_lines
+@app.route('/{}/complexity/project_blank_lines'.format(AUGUR_API_VERSION), methods=["GET"])
+def get_project_blank_lines():
+ blank_lines_sql = s.sql.text("""
+ SELECT
+ e.repo_id,
+ augur_data.repo.repo_git,
+ augur_data.repo.repo_name,
+ e.blank_lines,
+ e.avg_blank_lines
+ FROM
+ augur_data.repo,
+ (SELECT
+ d.repo_id,
+ SUM(d.blank_lines) AS blank_lines,
+ AVG(d.blank_lines)::int AS avg_blank_lines
FROM
- augur_data.repo,
- (SELECT
- d.repo_id,
- SUM(d.comment_lines) AS comment_lines,
- AVG(d.comment_lines)::INT AS avg_comment_lines
- FROM
- (SELECT
- augur_data.repo_labor.repo_id,
- augur_data.repo_labor.comment_lines
- FROM
+ (SELECT
+ augur_data.repo_labor.repo_id,
+ augur_data.repo_labor.blank_lines
+ FROM
augur_data.repo_labor,
( SELECT
augur_data.repo_labor.repo_id,
@@ -161,93 +209,57 @@ def get_project_comment_lines():
augur_data.repo_labor.repo_id = recent.repo_id
AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
GROUP BY d.repo_id) e
- WHERE augur_data.repo.repo_id = e.repo_id
- ORDER BY e.repo_id
+ WHERE augur_data.repo.repo_id = e.repo_id
+ ORDER BY e.repo_id
""")
- results = pd.read_sql(comment_lines_sql, server.engine)
- data = results.to_json(orient="records", date_format='iso', date_unit='ms')
- return Response(response=data,
- status=200,
- mimetype="application/json")
- @server.app.route('/{}/complexity/project_blank_lines'.format(AUGUR_API_VERSION), methods=["GET"])
- def get_project_blank_lines():
- blank_lines_sql = s.sql.text("""
- SELECT
- e.repo_id,
- augur_data.repo.repo_git,
- augur_data.repo.repo_name,
- e.blank_lines,
- e.avg_blank_lines
- FROM
- augur_data.repo,
- (SELECT
- d.repo_id,
- SUM(d.blank_lines) AS blank_lines,
- AVG(d.blank_lines)::int AS avg_blank_lines
- FROM
- (SELECT
- augur_data.repo_labor.repo_id,
- augur_data.repo_labor.blank_lines
- FROM
- augur_data.repo_labor,
- ( SELECT
- augur_data.repo_labor.repo_id,
- MAX ( data_collection_date ) AS last_collected
- FROM
- augur_data.repo_labor
- GROUP BY augur_data.repo_labor.repo_id) recent
- WHERE
- augur_data.repo_labor.repo_id = recent.repo_id
- AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
- GROUP BY d.repo_id) e
- WHERE augur_data.repo.repo_id = e.repo_id
- ORDER BY e.repo_id
- """)
- results = pd.read_sql(blank_lines_sql, server.engine)
- data = results.to_json(orient="records", date_format='iso', date_unit='ms')
- return Response(response=data,
- status=200,
- mimetype="application/json")
-
+ with engine.connect() as conn:
+ results = pd.read_sql(blank_lines_sql, conn)
+ data = results.to_json(orient="records", date_format='iso', date_unit='ms')
+ return Response(response=data,
+ status=200,
+ mimetype="application/json")
+
- @server.app.route('/{}/complexity/project_file_complexity'.format(AUGUR_API_VERSION), methods=["GET"])
- def get_project_file_complexity():
- project_file_complexity_sql = s.sql.text("""
- SELECT
- e.repo_id,
- augur_data.repo.repo_git,
- augur_data.repo.repo_name,
- e.sum_code_complexity,
- e.average_code_complexity
+@app.route('/{}/complexity/project_file_complexity'.format(AUGUR_API_VERSION), methods=["GET"])
+def get_project_file_complexity():
+ project_file_complexity_sql = s.sql.text("""
+ SELECT
+ e.repo_id,
+ augur_data.repo.repo_git,
+ augur_data.repo.repo_name,
+ e.sum_code_complexity,
+ e.average_code_complexity
+ FROM
+ augur_data.repo,
+ (SELECT
+ d.repo_id,
+ SUM(d.code_complexity) AS sum_code_complexity,
+ AVG(d.code_complexity)::int AS average_code_complexity
FROM
- augur_data.repo,
- (SELECT
- d.repo_id,
- SUM(d.code_complexity) AS sum_code_complexity,
- AVG(d.code_complexity)::int AS average_code_complexity
- FROM
- (SELECT
- augur_data.repo_labor.repo_id,
- augur_data.repo_labor.code_complexity
- FROM
- augur_data.repo_labor,
- ( SELECT
- augur_data.repo_labor.repo_id,
- MAX ( data_collection_date ) AS last_collected
- FROM
- augur_data.repo_labor
- GROUP BY augur_data.repo_labor.repo_id) recent
- WHERE
- augur_data.repo_labor.repo_id = recent.repo_id
- AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
- GROUP BY d.repo_id) e
- WHERE augur_data.repo.repo_id = e.repo_id
- ORDER BY e.repo_id
- """)
- results = pd.read_sql(project_file_complexity_sql, server.engine)
- data = results.to_json(orient="records", date_format='iso', date_unit='ms')
- return Response(response=data,
- status=200,
- mimetype="application/json")
+ (SELECT
+ augur_data.repo_labor.repo_id,
+ augur_data.repo_labor.code_complexity
+ FROM
+ augur_data.repo_labor,
+ ( SELECT
+ augur_data.repo_labor.repo_id,
+ MAX ( data_collection_date ) AS last_collected
+ FROM
+ augur_data.repo_labor
+ GROUP BY augur_data.repo_labor.repo_id) recent
+ WHERE
+ augur_data.repo_labor.repo_id = recent.repo_id
+ AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d
+ GROUP BY d.repo_id) e
+ WHERE augur_data.repo.repo_id = e.repo_id
+ ORDER BY e.repo_id
+ """)
+ with engine.connect() as conn:
+ results = pd.read_sql(project_file_complexity_sql, conn)
+ data = results.to_json(orient="records", date_format='iso', date_unit='ms')
+ return Response(response=data,
+ status=200,
+ mimetype="application/json")
+
diff --git a/augur/api/routes/contributor_reports.py b/augur/api/routes/contributor_reports.py
index 896e00fc0..c600e8141 100644
--- a/augur/api/routes/contributor_reports.py
+++ b/augur/api/routes/contributor_reports.py
@@ -293,7 +293,9 @@ def new_contributor_data_collection(repo_id, required_contributions):
WHERE RANK IN {rank_tuple}
""")
- df = pd.read_sql(contributor_query, engine)
+
+ with engine.connect() as conn:
+ df = pd.read_sql(contributor_query, conn)
df = df.loc[~df['full_name'].str.contains('bot', na=False)]
df = df.loc[~df['login'].str.contains('bot', na=False)]
@@ -334,7 +336,9 @@ def months_data_collection(start_date, end_date):
FROM generate_series (TIMESTAMP '{start_date}', TIMESTAMP '{end_date}', INTERVAL '1 month' ) created_month ) d ) x
) y
""")
- months_df = pd.read_sql(months_query, engine)
+
+ with engine.connect() as conn:
+ months_df = pd.read_sql(months_query, conn)
# add yearmonths to months_df
months_df[['year', 'month']] = months_df[['year', 'month']].astype(float).astype(int).astype(str)
diff --git a/augur/api/routes/dei.py b/augur/api/routes/dei.py
index dea79b79c..82324a8d6 100644
--- a/augur/api/routes/dei.py
+++ b/augur/api/routes/dei.py
@@ -52,7 +52,7 @@ def dei_track_repo(application: ClientApplication):
return jsonify({"status": "Repo already exists"})
frontend_repo_group: RepoGroup = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first()
- repo_id = Repo.insert(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="")
+ repo_id = Repo.insert_github_repo(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="")
if not repo_id:
return jsonify({"status": "Error adding repo"})
diff --git a/augur/api/routes/metadata.py b/augur/api/routes/metadata.py
index 389a3d9d1..f49dbb88f 100644
--- a/augur/api/routes/metadata.py
+++ b/augur/api/routes/metadata.py
@@ -47,7 +47,9 @@ def get_repo_info():
ORDER BY
repo.repo_name;
""")
- results = pd.read_sql(repo_info_sql, engine)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(repo_info_sql, conn)
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
parsed_data = json.loads(data)
return Response(response=data,
@@ -61,7 +63,9 @@ def contributions_count():
group by repo_git
order by contributions desc;
""")
- results = pd.read_sql(repo_info_sql, engine)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(repo_info_sql, conn)
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
parsed_data = json.loads(data)
return Response(response=data,
@@ -75,7 +79,9 @@ def contributors_count():
group by repo_git
order by contributors desc;
""")
- results = pd.read_sql(repo_info_sql, engine)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(repo_info_sql, conn)
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
parsed_data = json.loads(data)
return Response(response=data,
diff --git a/augur/api/routes/pull_request_reports.py b/augur/api/routes/pull_request_reports.py
index 02f6e235c..9e6577954 100644
--- a/augur/api/routes/pull_request_reports.py
+++ b/augur/api/routes/pull_request_reports.py
@@ -53,7 +53,7 @@ def pull_request_data_collection(repo_id, start_date, end_date):
( EXTRACT ( EPOCH FROM last_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_last_response,
first_response_time,
last_response_time,
- average_time_between_responses,
+ EXTRACT ( EPOCH FROM average_time_between_responses),
assigned_count,
review_requested_count,
labeled_count,
@@ -62,15 +62,15 @@ def pull_request_data_collection(repo_id, start_date, end_date):
referenced_count,
closed_count,
head_ref_force_pushed_count,
- merged_count,
+ merged_count::INT,
milestoned_count,
unlabeled_count,
head_ref_deleted_count,
comment_count,
- lines_added,
- lines_removed,
+ COALESCE(lines_added, 0),
+ COALESCE(lines_removed, 0),
commit_count,
- file_count
+ COALESCE(file_count, 0)
FROM
repo,
repo_groups,
@@ -87,46 +87,47 @@ def pull_request_data_collection(repo_id, start_date, end_date):
count(*) FILTER (WHERE action = 'head_ref_force_pushed') AS head_ref_force_pushed_count,
count(*) FILTER (WHERE action = 'head_ref_deleted') AS head_ref_deleted_count,
count(*) FILTER (WHERE action = 'milestoned') AS milestoned_count,
- count(*) FILTER (WHERE action = 'merged') AS merged_count,
- MIN(message.msg_timestamp) AS first_response_time,
- COUNT(DISTINCT message.msg_timestamp) AS comment_count,
- MAX(message.msg_timestamp) AS last_response_time,
- (MAX(message.msg_timestamp) - MIN(message.msg_timestamp)) / COUNT(DISTINCT message.msg_timestamp) AS average_time_between_responses
- FROM pull_request_events, pull_requests, repo, pull_request_message_ref, message
- WHERE repo.repo_id = {repo_id}
- AND repo.repo_id = pull_requests.repo_id
- AND pull_requests.pull_request_id = pull_request_events.pull_request_id
- AND pull_requests.pull_request_id = pull_request_message_ref.pull_request_id
- AND pull_request_message_ref.msg_id = message.msg_id
+ COALESCE(count(*) FILTER (WHERE action = 'merged'), 0) AS merged_count,
+ COALESCE(MIN(message.msg_timestamp), pull_requests.pr_merged_at, pull_requests.pr_closed_at) AS first_response_time,
+ COALESCE(COUNT(DISTINCT message.msg_timestamp), 0) AS comment_count,
+ COALESCE(MAX(message.msg_timestamp), pull_requests.pr_closed_at) AS last_response_time,
+ COALESCE((MAX(message.msg_timestamp) - MIN(message.msg_timestamp)) / COUNT(DISTINCT message.msg_timestamp), pull_requests.pr_created_at - pull_requests.pr_closed_at) AS average_time_between_responses
+ FROM pull_requests
+ LEFT OUTER JOIN pull_request_events on pull_requests.pull_request_id = pull_request_events.pull_request_id
+ JOIN repo on repo.repo_id = pull_requests.repo_id
+ LEFT OUTER JOIN pull_request_message_ref on pull_requests.pull_request_id = pull_request_message_ref.pull_request_id
+ LEFT OUTER JOIN message on pull_request_message_ref.msg_id = message.msg_id
+ WHERE repo.repo_id = 1
GROUP BY pull_requests.pull_request_id
) response_times
ON pull_requests.pull_request_id = response_times.pull_request_id
- LEFT OUTER JOIN (
- SELECT pull_request_commits.pull_request_id, count(DISTINCT pr_cmt_sha) AS commit_count FROM pull_request_commits, pull_requests, pull_request_meta
+ LEFT JOIN (
+ SELECT pull_request_commits.pull_request_id, count(DISTINCT pr_cmt_sha) AS commit_count
+ FROM pull_request_commits, pull_requests, pull_request_meta
WHERE pull_requests.pull_request_id = pull_request_commits.pull_request_id
AND pull_requests.pull_request_id = pull_request_meta.pull_request_id
- AND pull_requests.repo_id = {repo_id}
+ AND pull_requests.repo_id = 1
AND pr_cmt_sha <> pull_requests.pr_merge_commit_sha
AND pr_cmt_sha <> pull_request_meta.pr_sha
GROUP BY pull_request_commits.pull_request_id
) all_commit_counts
ON pull_requests.pull_request_id = all_commit_counts.pull_request_id
- LEFT OUTER JOIN (
+ LEFT JOIN (
SELECT MAX(pr_repo_meta_id), pull_request_meta.pull_request_id, pr_head_or_base, pr_src_meta_label
FROM pull_requests, pull_request_meta
WHERE pull_requests.pull_request_id = pull_request_meta.pull_request_id
- AND pull_requests.repo_id = {repo_id}
+ AND pull_requests.repo_id = 1
AND pr_head_or_base = 'base'
GROUP BY pull_request_meta.pull_request_id, pr_head_or_base, pr_src_meta_label
) base_labels
ON base_labels.pull_request_id = all_commit_counts.pull_request_id
- LEFT OUTER JOIN (
+ LEFT JOIN (
SELECT sum(cmt_added) AS lines_added, sum(cmt_removed) AS lines_removed, pull_request_commits.pull_request_id, count(DISTINCT cmt_filename) AS file_count
FROM pull_request_commits, commits, pull_requests, pull_request_meta
WHERE cmt_commit_hash = pr_cmt_sha
AND pull_requests.pull_request_id = pull_request_commits.pull_request_id
AND pull_requests.pull_request_id = pull_request_meta.pull_request_id
- AND pull_requests.repo_id = {repo_id}
+ AND pull_requests.repo_id = 1
AND commits.repo_id = pull_requests.repo_id
AND commits.cmt_commit_hash <> pull_requests.pr_merge_commit_sha
AND commits.cmt_commit_hash <> pull_request_meta.pr_sha
@@ -136,11 +137,13 @@ def pull_request_data_collection(repo_id, start_date, end_date):
WHERE
repo.repo_group_id = repo_groups.repo_group_id
AND repo.repo_id = pull_requests.repo_id
- AND repo.repo_id = {repo_id}
+ AND repo.repo_id = 1
ORDER BY
merged_count DESC
""")
- pr_all = pd.read_sql(pr_query, engine)
+
+ with engine.connect() as conn:
+ pr_all = pd.read_sql(pr_query, conn)
pr_all[['assigned_count',
'review_requested_count',
diff --git a/augur/api/routes/user.py b/augur/api/routes/user.py
index dfaeb81f7..62bc44068 100644
--- a/augur/api/routes/user.py
+++ b/augur/api/routes/user.py
@@ -227,7 +227,7 @@ def add_user_repo():
repo = request.args.get("repo_url")
group_name = request.args.get("group_name")
- result = current_user.add_repo(group_name, repo)
+ result = current_user.add_github_repo(group_name, repo)
return jsonify(result[1])
@@ -260,7 +260,7 @@ def add_user_org():
org = request.args.get("org_url")
group_name = request.args.get("group_name")
- result = current_user.add_org(group_name, org)
+ result = current_user.add_github_org(group_name, org)
return jsonify(result[1])
diff --git a/augur/api/routes/util.py b/augur/api/routes/util.py
index cd6a8ad3b..71d3526b9 100644
--- a/augur/api/routes/util.py
+++ b/augur/api/routes/util.py
@@ -1,10 +1,11 @@
#SPDX-License-Identifier: MIT
+from augur.api.routes import AUGUR_API_VERSION
+from ..server import app, engine
import base64
import sqlalchemy as s
import pandas as pd
import json
from flask import Response
-import logging
from augur.application.db.session import DatabaseSession
from augur.application.logs import AugurLogger
@@ -12,10 +13,6 @@
logger = AugurLogger("augur").get_logger()
-from augur.api.routes import AUGUR_API_VERSION
-from ..server import app, engine
-
-
@app.route('/{}/repo-groups'.format(AUGUR_API_VERSION))
def get_all_repo_groups(): #TODO: make this name automatic - wrapper?
repoGroupsSQL = s.sql.text("""
@@ -23,7 +20,9 @@ def get_all_repo_groups(): #TODO: make this name automatic - wrapper?
FROM repo_groups
ORDER BY rg_name
""")
- results = pd.read_sql(repoGroupsSQL, engine)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(repoGroupsSQL, conn)
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
return Response(response=data,
status=200,
@@ -52,13 +51,15 @@ def get_all_repos():
(select * from api_get_all_repos_issues) b
on
repo.repo_id = b.repo_id
- left outer join
- (select * from api_get_all_repo_prs) c
- on repo.repo_id=c.repo_id
+ left outer join
+ (select * from api_get_all_repo_prs) c
+ on repo.repo_id=c.repo_id
JOIN repo_groups ON repo_groups.repo_group_id = repo.repo_group_id
order by repo_name
""")
- results = pd.read_sql(get_all_repos_sql, engine)
+
+ with engine.connect() as conn:
+ results = pd.read_sql(get_all_repos_sql, conn)
results['url'] = results['url'].apply(lambda datum: datum.split('//')[1])
b64_urls = []
@@ -91,21 +92,65 @@ def get_repos_in_repo_group(repo_group_id):
(select * from api_get_all_repos_issues) b
on
repo.repo_id = b.repo_id
- left outer join
- (select * from api_get_all_repo_prs) c
- on repo.repo_id=c.repo_id
+ left outer join
+ (select * from api_get_all_repo_prs) c
+ on repo.repo_id=c.repo_id
JOIN repo_groups ON repo_groups.repo_group_id = repo.repo_group_id
WHERE
repo_groups.repo_group_id = :repo_group_id
ORDER BY repo.repo_git
""")
- results = pd.read_sql(repos_in_repo_groups_SQL, engine, params={'repo_group_id': repo_group_id})
+ with engine.connect() as conn:
+ results = pd.read_sql(repos_in_repo_groups_SQL, conn, params={'repo_group_id': repo_group_id})
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
return Response(response=data,
status=200,
mimetype="application/json")
+@app.route('/{}/repos/'.format(AUGUR_API_VERSION))
+def get_repo_by_id(repo_id: int) -> Response:
+ repo_by_id_SQL = s.sql.text("""
+ SELECT
+ repo.repo_id,
+ repo.repo_name,
+ repo.description,
+ repo.repo_git AS url,
+ a.commits_all_time,
+ b.issues_all_time,
+ c.pull_requests_all_time,
+ rg_name,
+ repo.repo_group_id
+ FROM
+ repo
+ LEFT OUTER JOIN
+ (SELECT * FROM api_get_all_repos_commits) a
+ ON repo.repo_id = a.repo_id
+ LEFT OUTER JOIN
+ (SELECT * FROM api_get_all_repos_issues) b
+ ON repo.repo_id = b.repo_id
+ LEFT OUTER JOIN
+ (SELECT * FROM api_get_all_repo_prs) c
+ ON repo.repo_id = c.repo_id
+ JOIN repo_groups ON repo_groups.repo_group_id = repo.repo_group_id
+ WHERE
+ repo.repo_id = :id
+ """)
+
+ results = pd.read_sql(repo_by_id_SQL, engine, params={"id": repo_id})
+ results["url"] = results["url"].apply(lambda datum: datum.split("//")[1]) # cut "https://" off the URL
+ results["base64_url"] = [base64.b64encode(results.at[i, "url"].encode()) for i in results.index]
+ data = results.to_json(orient="records", date_format="iso", date_unit="ms")
+
+ if not data or data == "[]":
+ return Response(response='{"status": "Repository ' + str(repo_id) + ' does not exist"}',
+ status=400,
+ mimetype="application/json")
+
+ return Response(response=data[1:-1], # cut off brackets at each end, turns list of length 1 into single value
+ status=200,
+ mimetype="application/json")
+
@app.route('/{}/owner//repo/'.format(AUGUR_API_VERSION))
def get_repo_by_git_name(owner, repo):
@@ -116,7 +161,8 @@ def get_repo_by_git_name(owner, repo):
GROUP BY repo_id, rg_name
""")
- results = pd.read_sql(get_repo_by_git_name_sql, engine, params={'owner': '%{}_'.format(owner), 'repo': repo,})
+ with engine.connect() as conn:
+ results = pd.read_sql(get_repo_by_git_name_sql, conn, params={'owner': '%{}%'.format(owner), 'repo': repo,})
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
return Response(response=data,
status=200,
@@ -132,7 +178,9 @@ def get_repo_by_name(rg_name, repo_name):
AND LOWER(rg_name) = LOWER(:rg_name)
AND LOWER(repo_name) = LOWER(:repo_name)
""")
- results = pd.read_sql(get_repo_by_name_sql, engine, params={'rg_name': rg_name, 'repo_name': repo_name})
+
+ with engine.connect() as conn:
+ results = pd.read_sql(get_repo_by_name_sql, conn, params={'rg_name': rg_name, 'repo_name': repo_name})
results['url'] = results['url'].apply(lambda datum: datum.split('//')[1])
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
return Response(response=data,
@@ -146,7 +194,9 @@ def get_group_by_name(rg_name):
FROM repo_groups
WHERE lower(rg_name) = lower(:rg_name)
""")
- results = pd.read_sql(groupSQL, engine, params={'rg_name': rg_name})
+
+ with engine.connect() as conn:
+ results = pd.read_sql(groupSQL, conn, params={'rg_name': rg_name})
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
return Response(response=data,
status=200,
@@ -160,7 +210,8 @@ def get_repos_for_dosocs():
WHERE a.setting='repo_directory'
""")
- results = pd.read_sql(get_repos_for_dosocs_SQL, engine)
+ with engine.connect() as conn:
+ results = pd.read_sql(get_repos_for_dosocs_SQL, conn)
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
return Response(response=data,
status=200,
@@ -188,7 +239,9 @@ def get_issues(repo_group_id, repo_id=None):
GROUP BY issues.issue_id
ORDER by OPEN_DAY DESC
""")
- results = pd.read_sql(get_issues_sql, engine, params={'repo_group_id': repo_group_id})
+
+ with engine.connect() as conn:
+ results = pd.read_sql(get_issues_sql, conn, params={'repo_group_id': repo_group_id})
else:
get_issues_sql = s.sql.text("""
SELECT issue_title,
@@ -208,7 +261,9 @@ def get_issues(repo_group_id, repo_id=None):
GROUP BY issues.issue_id, repo_name
ORDER by OPEN_DAY DESC
""")
- results = pd.read_sql(get_issues_sql, engine, params={'repo_id': repo_id})
+
+ with engine.connect() as conn:
+ results = pd.read_sql(get_issues_sql, conn, params={'repo_id': repo_id})
data = results.to_json(orient="records", date_format='iso', date_unit='ms')
return Response(response=data,
status=200,
diff --git a/augur/api/view/api.py b/augur/api/view/api.py
index 287b07943..598c0cdb6 100644
--- a/augur/api/view/api.py
+++ b/augur/api/view/api.py
@@ -102,7 +102,18 @@ def av_add_user_repo():
if rg_obj:
# add the orgs repos to the group
add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id)
-
+
+ # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
+ elif Repo.parse_gitlab_repo_url(url)[0]:
+
+ org_name, repo_name = Repo.parse_github_repo_url(url)
+ repo_git = f"https://gitlab.com/{org_name}/{repo_name}"
+
+ # TODO: gitlab ensure the whole repo git is inserted so it can be found here
+ repo_obj = Repo.get_by_repo_git(session, repo_git)
+ if repo_obj:
+ add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)
+
else:
invalid_urls.append(url)
diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py
index 8a9fc0597..72164a929 100644
--- a/augur/api/view/routes.py
+++ b/augur/api/view/routes.py
@@ -1,4 +1,8 @@
+"""
+Defines the api routes for the augur views
+"""
import logging
+import math
from flask import Flask, render_template, render_template_string, request, abort, jsonify, redirect, url_for, session, flash
from sqlalchemy.orm.exc import NoResultFound
from .utils import *
@@ -37,9 +41,9 @@ def root(path=""):
def logo(brand=None):
if brand is None:
return redirect(url_for('static', filename='img/augur_logo.png'))
- elif "augur" in brand:
+ if "augur" in brand:
return logo(None)
- elif "chaoss" in brand:
+ if "chaoss" in brand:
return redirect(url_for('static', filename='img/Chaoss_Logo_white.png'))
return ""
@@ -75,10 +79,16 @@ def repo_table_view():
if current_user.is_authenticated:
data = current_user.get_repos(page = page, sort = sorting, direction = direction, search=query)[0]
- page_count = (current_user.get_repo_count(search = query)[0] or 0) // pagination_offset
+ repos_count = (current_user.get_repo_count(search = query)[0] or 0)
else:
data = get_all_repos(page = page, sort = sorting, direction = direction, search=query)[0]
- page_count = (get_all_repos_count(search = query)[0] or 0) // pagination_offset
+ repos_count = (get_all_repos_count(search = query)[0] or 0)
+
+ page_count = math.ceil(repos_count / pagination_offset) - 1
+
+ if not data:
+ data = None
+
return render_module("repos-table", title="Repos", repos=data, query_key=query, activePage=page, pages=page_count, offset=pagination_offset, PS="repo_table_view", reverse = rev, sorting = sorting)
diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py
index 228935574..298e9950a 100644
--- a/augur/api/view/utils.py
+++ b/augur/api/view/utils.py
@@ -1,10 +1,24 @@
+"""
+Defines utility functions used by the augur api views
+"""
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from flask import render_template, flash, url_for, Flask
+from .init import init_logging
from .init import *
from ..server import app, db_session
from augur.application.config import AugurConfig
-import urllib.request, urllib.error, json, os, math, yaml, urllib3, time, logging, re
+import urllib.request, urllib.error, json, os, math, yaml, urllib3, time, logging, re, math
+
+from augur.application.db.session import DatabaseSession
+from augur.application.db.engine import DatabaseEngine
+from augur.application.db.models import User, Repo, RepoGroup, UserGroup, UserRepo
+from sqlalchemy import Column, Table, Integer, MetaData, or_
+from sqlalchemy.sql.operators import ilike_op, distinct_op
+from sqlalchemy.sql.functions import coalesce
+from augur.application.db.models.base import Base
+
+from sqlalchemy.orm import Query
init_logging()
@@ -66,6 +80,8 @@ def getSetting(key, section = "View"):
loadSettings()
+#version_check(settings)
+
""" ----------------------------------------------------------------
"""
def loadReports():
@@ -298,3 +314,6 @@ def render_message(messageTitle, messageBody = None, title = None, redirect = No
def render_module(module, **args):
args.setdefault("body", module)
return render_template('index.j2', **args)
+
+""" ----------------------------------------------------------------
+"""
diff --git a/augur/application/cli/_multicommand.py b/augur/application/cli/_multicommand.py
index 2df6e8b11..c0d8b1a96 100644
--- a/augur/application/cli/_multicommand.py
+++ b/augur/application/cli/_multicommand.py
@@ -27,7 +27,7 @@ def get_command(self, ctx, name):
try:
module = importlib.import_module('.' + name, 'augur.application.cli')
return module.cli
- except ModuleNotFoundError:
+ except ModuleNotFoundError as e:
pass
@click.command(cls=AugurMultiCommand, context_settings=CONTEXT_SETTINGS)
diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py
index 29afab2b0..9b6894a7d 100644
--- a/augur/application/cli/backend.py
+++ b/augur/application/cli/backend.py
@@ -19,7 +19,8 @@
from datetime import datetime
from augur import instance_id
-from augur.tasks.start_tasks import augur_collection_monitor, CollectionState, create_collection_status_records
+from augur.tasks.util.collection_state import CollectionState
+from augur.tasks.start_tasks import augur_collection_monitor, create_collection_status_records
from augur.tasks.git.facade_tasks import clone_repos
from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model
from augur.tasks.init.redis_connection import redis_connection
@@ -91,9 +92,12 @@ def start(disable_collection, development, port):
logger.info("Deleting old task schedule")
os.remove("celerybeat-schedule.db")
- celery_beat_process = None
- celery_command = "celery -A augur.tasks.init.celery_app.celery_app beat -l debug"
- celery_beat_process = subprocess.Popen(celery_command.split(" "))
+ with DatabaseSession(logger) as db_session:
+ config = AugurConfig(logger, db_session)
+ log_level = config.get_value("Logging", "log_level")
+ celery_beat_process = None
+ celery_command = f"celery -A augur.tasks.init.celery_app.celery_app beat -l {log_level.lower()}"
+ celery_beat_process = subprocess.Popen(celery_command.split(" "))
if not disable_collection:
diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py
index f09aaabbd..42d57ecc6 100644
--- a/augur/application/cli/db.py
+++ b/augur/application/cli/db.py
@@ -99,7 +99,7 @@ def add_repo_groups(filename):
"""
Create new repo groups in Augur's database
"""
- with DatabaseEngine() as engine, engine.connect() as connection:
+ with DatabaseEngine() as engine, engine.begin() as connection:
df = pd.read_sql(
s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups"),
@@ -248,7 +248,7 @@ def update_api_key(api_key):
"""
)
- with DatabaseEngine() as engine, engine.connect() as connection:
+ with DatabaseEngine() as engine, engine.begin() as connection:
connection.execute(update_api_key_sql, api_key=api_key)
logger.info(f"Updated Augur API key to: {api_key}")
diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py
index abdc6de54..756218139 100644
--- a/augur/application/db/data_parse.py
+++ b/augur/application/db/data_parse.py
@@ -37,8 +37,63 @@ def extract_needed_pr_label_data(labels: List[dict], repo_id: int, tool_source:
return label_dicts
-# retrieve only the needed data for pr assignees from the api response
+
+def extract_needed_mr_label_data(labels: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for mr label data from the api response
+
+ Arguments:
+ labels: List of dictionaries of label data
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of parsed label dicts
+ """
+
+ if len(labels) == 0:
+ return []
+
+ label_dicts = []
+ for label in labels:
+
+ label_dict = {
+ 'pr_src_id': label['id'],
+ 'pr_src_node_id': None,
+ 'pr_src_url': None,
+ 'pr_src_description': label['name'],
+ 'pr_src_color': label['color'],
+ # TODO: Populate this by making an api call for each label
+ 'pr_src_default_bool': None,
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source,
+ 'repo_id': repo_id
+ }
+
+ label_dicts.append(label_dict)
+
+ return label_dicts
+
+
def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for pr assignees from the api response
+
+ Arguments:
+ assignees: List of dictionaries of asignee data
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of parsed asignee dicts
+ """
if len(assignees) == 0:
return []
@@ -48,7 +103,6 @@ def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_so
for assignee in assignees:
assignee_dict = {
- # store the pr_url data on in the pr assignee data for now so we can relate it back to a pr later
'contrib_id': assignee["cntrb_id"],
'pr_assignee_src_id': int(assignee['id']),
'tool_source': tool_source,
@@ -61,8 +115,59 @@ def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_so
return assignee_dicts
-# retrieve only the needed data for pr reviewers from the api response
+def extract_needed_merge_request_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for merge request assignees from the api response
+
+ Arguments:
+ assignees: List of dictionaries of asignee data
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of parsed asignee dicts
+ """
+
+ if len(assignees) == 0:
+ return []
+
+ assignee_dicts = []
+ for assignee in assignees:
+
+ assignee_dict = {
+ 'contrib_id': None,
+ 'repo_id': repo_id,
+ # TODO: Temporarily setting this to id which the id of the contributor, unitl we can get the contrib_id set and create a unique on the contrib_id and the pull_request_id
+ 'pr_assignee_src_id': assignee["id"],
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source
+ }
+
+ assignee_dicts.append(assignee_dict)
+
+ return assignee_dicts
+
+
+
def extract_needed_pr_reviewer_data(reviewers: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for pr reviewers from the api response
+
+ Arguments:
+ reviewers: List of dictionaries of reviewer data
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of parsed reviewer dicts
+ """
if len(reviewers) == 0:
return []
@@ -247,6 +352,42 @@ def extract_needed_issue_assignee_data(assignees: List[dict], repo_id: int, tool
return assignee_dicts
+def extract_needed_gitlab_issue_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for gitlab issue assignees from the api response
+
+ Arguments:
+ assignees: List of dictionaries of gitlab assignee data
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of parsed assignee dicts
+ """
+
+ if len(assignees) == 0:
+ return []
+
+ assignee_dicts = []
+ for assignee in assignees:
+
+ assignee_dict = {
+ "cntrb_id": None,
+ "tool_source": tool_source,
+ "tool_version": tool_version,
+ "data_source": data_source,
+ "issue_assignee_src_id": assignee['id'],
+ "issue_assignee_src_node": None,
+ "repo_id": repo_id
+ }
+
+ assignee_dicts.append(assignee_dict)
+
+ return assignee_dicts
+
# retrieve only the needed data for pr labels from the api response
@@ -277,9 +418,62 @@ def extract_needed_issue_label_data(labels: List[dict], repo_id: int, tool_sourc
return label_dicts
+def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for gitlab issue labels from the api response
+
+ Arguments:
+ labels: List of dictionaries of gitlab issue label data
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of parsed label dicts
+ """
+
+ if len(labels) == 0:
+ return []
+
+ label_dicts = []
+ for label in labels:
+
+ label_dict = {
+ "label_text": label["name"],
+ "label_description": label.get("description", None),
+ "label_color": label['color'],
+ "tool_source": tool_source,
+ "tool_version": tool_version,
+ "data_source": data_source,
+ "label_src_id": label['id'],
+ "label_src_node_id": None,
+ "repo_id": repo_id
+ }
+
+ label_dicts.append(label_dict)
+
+ return label_dicts
+
+
-# retrieve only the needed data for pr labels from the api response
def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for pr labels from the api response
+
+ Arguments:
+ message: Message data dict
+ issue_id: id of the issue
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ Dict of message ref data.
+ """
message_ref_dict = {
'issue_id': issue_id,
@@ -311,9 +505,21 @@ def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo
def extract_needed_pr_data(pr, repo_id, tool_source, tool_version):
+ """
+ Retrieve only the needed data for the pr api response
+
+ Arguments:
+ pr: PR data dict
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+
+ Returns:
+ Parsed pr dict
+ """
- pr_dict = {
+ pr = {
'repo_id': repo_id,
'pr_url': pr['url'],
# 1-22-2022 inconsistent casting; sometimes int, sometimes float in bulk_insert
@@ -367,9 +573,23 @@ def extract_needed_pr_data(pr, repo_id, tool_source, tool_version):
'data_source': 'GitHub API'
}
- return pr_dict
+ return pr
def extract_needed_issue_data(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str):
+ """
+ Retrieve only the needed data for the issue api response
+
+ Arguments:
+ issue: Issue data dict
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: platform source
+
+
+ Returns:
+ Parsed issue dict
+ """
dict_data = {
'cntrb_id': None, # this the contributor who closed the issue
@@ -513,8 +733,438 @@ def extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id,
return review_row
+def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, tool_version):
+ """
+ Retrieve only the needed data for the pr gitlab api response
-
+ Arguments:
+ pr: PR data dict
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+
+
+ Returns:
+ Parsed pr dict
+ """
+
+ pr_dict = {
+ 'repo_id': repo_id,
+ 'pr_url': pr['web_url'],
+ 'pr_src_id': pr['id'],
+ 'pr_src_node_id': None,
+ 'pr_html_url': pr['web_url'],
+ 'pr_diff_url': None,
+ 'pr_patch_url': None,
+ 'pr_issue_url': None,
+ 'pr_augur_issue_id': None,
+ 'pr_src_number': pr['iid'],
+ 'pr_src_state': pr['state'],
+ 'pr_src_locked': pr['discussion_locked'],
+ 'pr_src_title': pr['title'],
+ # TODO: Add contributor logic for gitlab
+ 'pr_augur_contributor_id': None,
+ 'pr_body': pr['description'],
+ 'pr_created_at': pr['created_at'],
+ 'pr_updated_at': pr['updated_at'],
+ 'pr_closed_at': pr['closed_at'],
+ 'pr_merged_at': pr['merged_at'],
+ 'pr_merge_commit_sha': pr['merge_commit_sha'],
+ 'pr_teams': None,
+ 'pr_milestone': pr['milestone'].get('title') if pr['milestone'] else None,
+ 'pr_commits_url': None,
+ 'pr_review_comments_url': None,
+ 'pr_review_comment_url': None,
+ 'pr_comments_url': None,
+ 'pr_statuses_url': None,
+ 'pr_meta_head_id': None,
+ 'pr_meta_base_id': None,
+ 'pr_src_issue_url': None,
+ 'pr_src_comments_url': None,
+ 'pr_src_review_comments_url': None,
+ 'pr_src_commits_url': None,
+ 'pr_src_statuses_url': None,
+ 'pr_src_author_association': None,
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': 'Gitlab API'
+ }
+
+ return pr_dict
+
+
+def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str):
+ """
+ Retrieve only the needed data for the issue gitlab api response
+
+ Arguments:
+ issue: Issue data dict
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: platform source
+
+ Returns:
+ Parsed issue dict
+ """
+
+ issue_dict = {
+ "repo_id": repo_id,
+ "reporter_id": None,
+ "pull_request": None,
+ "pull_request_id": None,
+ "created_at": issue['created_at'],
+ "issue_title": issue['title'],
+ "issue_body": issue['description'] if 'description' in issue else None,
+ "comment_count": issue['user_notes_count'],
+ "updated_at": issue['updated_at'],
+ "closed_at": issue['closed_at'],
+ "repository_url": issue['_links']['project'],
+ "issue_url": issue['_links']['self'],
+ "labels_url": None,
+ "comments_url": issue['_links']['notes'],
+ "events_url": None,
+ "html_url": issue['_links']['self'],
+ "issue_state": issue['state'],
+ "issue_node_id": None,
+ "gh_issue_id": issue['id'],
+ "gh_issue_number": issue['iid'],
+ "gh_user_id": issue['author']['id'],
+ "tool_source": tool_source,
+ "tool_version": tool_version,
+ "data_source": data_source
+ }
+
+ return issue_dict
+
+
+
+def extract_gitlab_mr_event_data(event: dict, pr_id: int, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict:
+ """
+ Retrieve only the needed data for the mr event gitlab api response
+
+ Arguments:
+ event: Event data dict
+ pr_id: id of the pr
+ platform_id: id of the platform
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: platform source
+
+
+ Returns:
+ Parsed event dict
+ """
+
+ mr_event = {
+ 'pull_request_id': pr_id,
+ 'cntrb_id': None,
+ 'action': event['action_name'],
+ 'action_commit_hash': None,
+ 'created_at': event['created_at'],
+ 'issue_event_src_id': event['target_id'],
+ 'repo_id': repo_id,
+ 'platform_id': platform_id,
+ 'node_id': None,
+ 'node_url': None,
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source
+ }
+
+ return mr_event
+
+def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict:
+ """
+ Retrieve only the needed data for the issue event gitlab api response
+
+ Arguments:
+ event: Event data dict
+ issue_id: id of the issue
+ platform_id: id of the platform
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: platform source
+
+
+ Returns:
+ Parsed event dict
+ """
+
+ issue_event = {
+ "issue_event_src_id": event['target_id'],
+ "issue_id": issue_id,
+ "node_id": None,
+ "node_url": None,
+ "cntrb_id": None,
+ "created_at": event['created_at'],
+ "action": event["action_name"],
+ "action_commit_hash": None,
+ "platform_id": platform_id,
+ "repo_id" : repo_id,
+ "tool_source": tool_source,
+ "tool_version": tool_version,
+ "data_source": data_source
+ }
+
+ return issue_event
+
+
+def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for pr reviewers from the api response
+
+ Arguments:
+ data: List of dictionaries that contain mr reviewer data to parse
+ pull_request_id: id of the PR
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of extracted relevant data from needed mr reviwer data
+ """
+
+ if len(data) == 0:
+ return []
+
+ reviewer_dicts = []
+ for x in data:
+
+ for _ in x["suggested_approvers"]:
+
+ reviewer_dict = {
+ 'pull_request_id': pull_request_id,
+ 'cntrb_id': None,
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source
+ }
+
+ reviewer_dicts.append(reviewer_dict)
+
+ return reviewer_dicts
+
+
+def extract_needed_mr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source):
+ """
+ Retrieve only the needed data for mr commit data from the api response
+
+ Arguments:
+ commit: commit data dictionary
+ repo_id: augur id of the repository
+ pull_request_id: id of the PR
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ Dictionary of the extracted commit data
+ """
+
+ commit = {
+ 'pull_request_id': pull_request_id,
+ 'pr_cmt_sha': commit['id'],
+ 'pr_cmt_node_id': None,
+ 'pr_cmt_message': commit['message'],
+ 'repo_id': repo_id,
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source,
+ }
+
+ return commit
+
+
+def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool_source, tool_version, data_source):
+ """
+ Retrieve only the needed data for mr file data from the api response
+ Arguments:
+ gitlab_file_data: file data dictionary
+ repo_id: augur id of the repository
+ pull_request_id: id of the PR
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of dicts of parsed gitlab file changes
+ """
+ files = []
+
+ changes = gitlab_file_data["changes"]
+ for file_changes in changes:
+ try:
+ deletes = int(file_changes['diff'].split('@@')[1].strip().split(' ')[0].split(',')[1])
+ adds = int(file_changes['diff'].split('@@')[1].strip().split(' ')[1].split(',')[1])
+ except Exception:
+ deletes = 0
+ adds = 0
+
+ file_dict = {
+ 'pull_request_id': pull_request_id,
+ 'repo_id': repo_id,
+ 'pr_file_additions': adds,
+ 'pr_file_deletions': deletes,
+ 'pr_file_path': file_changes['old_path'],
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source,
+ }
+
+ files.append(file_dict)
+
+ return files
+
+
+def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, tool_version, data_source):
+ """
+ Retrieve only the needed data for mr metadata from the api response
+
+ Arguments:
+ mr_dict: mr data dictionary
+ repo_id: augur id of the repository
+ pull_request_id: id of the PR
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of dicts of parsed mr metadata
+ """
+ head = {'sha': mr_dict['diff_refs']['head_sha'],
+ 'ref': mr_dict['target_branch'],
+ 'label': str(mr_dict['target_project_id']) + ':' + mr_dict['target_branch'],
+ 'author': mr_dict['author']['username'],
+ 'repo': str(mr_dict['target_project_id'])
+ }
+
+ base = {'sha': mr_dict['diff_refs']['base_sha'],
+ 'ref': mr_dict['source_branch'],
+ 'label': str(mr_dict['source_project_id']) + ':' + mr_dict['source_branch'],
+ 'author': mr_dict['author']['username'],
+ 'repo': str(mr_dict['source_project_id'])
+ }
+
+ pr_meta_dict = {
+ 'head': head,
+ 'base': base
+ }
+ all_meta = []
+ for pr_side, pr_meta_data in pr_meta_dict.items():
+ pr_meta = {
+ 'pull_request_id': pull_request_id,
+ 'repo_id': repo_id,
+ 'pr_head_or_base': pr_side,
+ 'pr_src_meta_label': pr_meta_data['label'],
+ 'pr_src_meta_ref': pr_meta_data['ref'],
+ 'pr_sha': pr_meta_data['sha'],
+ 'cntrb_id': None,
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source
+ }
+ all_meta.append(pr_meta)
+
+ return all_meta
+
+
+def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Extract the message id for a given message on an issue from an api response
+ and connect it to the relevant repo id.
+
+ Arguments:
+ message: message data dict
+ issue_id: id of the issue
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ Dict containing the message ref id as well as the repo id.
+ """
+
+ message_ref_dict = {
+ 'issue_id': issue_id,
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source,
+ 'issue_msg_ref_src_comment_id': int(message['id']),
+ 'issue_msg_ref_src_node_id': None,
+ 'repo_id': repo_id
+ }
+
+ return message_ref_dict
+
+
+def extract_needed_gitlab_message_data(comment: dict, platform_id: int, tool_source: str, tool_version: str, data_source: str):
+ """
+ Extract specific metadata for a comment from an api response
+ and connect it to the relevant platform id.
+
+ Arguments:
+ comment: comment data dict
+ platform_id: augur id of the relevant platform
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ Dict containing parsed comment text and metadata
+ """
+
+ comment_dict = {
+ "pltfrm_id": platform_id,
+ "msg_text": comment['body'],
+ "msg_timestamp": comment['created_at'],
+ "cntrb_id": None,
+ "platform_msg_id": int(comment['id']),
+ "tool_source": tool_source,
+ "tool_version": tool_version,
+ "data_source": data_source
+ }
+
+ return comment_dict
+
+def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]:
+ """
+ Retrieve only the needed data for pr labels from the api response
+
+ Arguments:
+ comment: comment data dict
+ pull_request_id: id of the PR
+ repo_id: augur id of the repository
+ platform_id: augur id of the relevant platform
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ Dict containing the comment, pr and repo id of the parsed comment data.
+ """
+
+ pr_msg_ref = {
+ 'pull_request_id': pull_request_id,
+ 'pr_message_ref_src_comment_id': comment['id'],
+ 'repo_id': repo_id,
+ 'pr_message_ref_src_node_id': None,
+ 'tool_source': tool_source,
+ 'tool_version': tool_version,
+ 'data_source': data_source
+ }
+
+ return pr_msg_ref
diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py
index 95cb0725d..7f97e4bbd 100644
--- a/augur/application/db/models/augur_data.py
+++ b/augur/application/db/models/augur_data.py
@@ -267,6 +267,7 @@ class Contributor(Base):
@classmethod
def from_github(cls, contributor, tool_source, tool_version, data_source):
+ from augur.tasks.util.AugurUUID import GithubUUID
cntrb_id = GithubUUID()
cntrb_id["user"] = contributor["id"]
@@ -563,6 +564,8 @@ class RepoGroup(Base):
data_source = Column(String)
data_collection_date = Column(TIMESTAMP(precision=0))
+ repo = relationship("Repo", back_populates="repo_group")
+
@staticmethod
def is_valid_repo_group_id(session, repo_group_id: int) -> bool:
"""Deterime is repo_group_id exists.
@@ -865,8 +868,8 @@ class Repo(Base):
TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP")
)
- repo_group = relationship("RepoGroup")
- user_repo = relationship("UserRepo")
+ repo_group = relationship("RepoGroup", back_populates="repo")
+ user_repo = relationship("UserRepo", back_populates="repo")
collection_status = relationship("CollectionStatus", back_populates="repo")
issues = relationship("Issue", back_populates="repo")
prs = relationship("PullRequest", back_populates="repo")
@@ -926,6 +929,44 @@ def is_valid_github_repo(gh_session, url: str) -> bool:
return False, {"status": f"Github Error: {data['message']}"}
return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]}
+
+ @staticmethod
+ def is_valid_gitlab_repo(gl_session, url: str) -> bool:
+ """Determine whether a GitLab repo URL is valid.
+
+ Args:
+ gl_session: GitLab session object with API key
+ url: Repository URL
+
+ Returns:
+ True if repo URL is valid, False otherwise
+ """
+ from augur.tasks.github.util.github_paginator import hit_api
+
+ REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/"
+
+ owner, repo = Repo.parse_gitlab_repo_url(url)
+ if not owner or not repo:
+ return False, {"status": "Invalid repo URL"}
+
+ # Encode namespace and project name for the API request
+ project_identifier = f"{owner}%2F{repo}"
+ url = REPO_ENDPOINT.format(project_identifier)
+
+ attempts = 0
+ while attempts < 10:
+ response = hit_api(gl_session.oauths, url, logger)
+
+ if response.status_code == 404:
+ return False, {"status": "Invalid repo"}
+
+ if response.status_code == 200:
+ return True, {"status": "Valid repo"}
+
+ attempts += 1
+
+ return False, {"status": "Failed to validate repo after multiple attempts"}
+
@staticmethod
def parse_github_repo_url(url: str) -> tuple:
@@ -945,6 +986,29 @@ def parse_github_repo_url(url: str) -> tuple:
capturing_groups = result.groups()
+ owner = capturing_groups[0]
+ repo = capturing_groups[1]
+
+ return owner, repo
+
+ @staticmethod
+ def parse_gitlab_repo_url(url: str) -> tuple:
+ """ Gets the owner and repo from a gitlab url.
+
+ Args:
+ url: Gitlab url
+
+ Returns:
+ Tuple of owner and repo. Or a tuple of None and None if the url is invalid.
+ """
+
+ result = re.search(r"https?:\/\/gitlab\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$", url)
+
+ if not result:
+ return None, None
+
+ capturing_groups = result.groups()
+
owner = capturing_groups[0]
repo = capturing_groups[1]
@@ -971,12 +1035,60 @@ def parse_github_org_url(url):
return result.groups()[0]
@staticmethod
- def insert(session, url: str, repo_group_id: int, tool_source, repo_type):
+ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
+ """Add a repo to the repo table.
+
+ Args:
+ url: repo url
+ repo_group_id: group to assign repo to
+
+ Note:
+ If repo row exists then it will update the repo_group_id if param repo_group_id is not a default. If it does not exist is will simply insert the repo.
+ """
+
+ if not isinstance(url, str) or not isinstance(repo_group_id, int) or not isinstance(tool_source, str):
+ return None
+
+ if not RepoGroup.is_valid_repo_group_id(session, repo_group_id):
+ return None
+
+ if url.endswith("/"):
+ url = url[:-1]
+
+ url = url.lower()
+
+ owner, repo = Repo.parse_gitlab_repo_url(url)
+ if not owner or not repo:
+ return None
+
+ repo_data = {
+ "repo_group_id": repo_group_id,
+ "repo_git": url,
+ "repo_path": f"gitlab.com/{owner}/",
+ "repo_name": repo,
+ "repo_type": None,
+ "tool_source": tool_source,
+ "tool_version": "1.0",
+ "data_source": "Git"
+ }
+
+ repo_unique = ["repo_git"]
+ return_columns = ["repo_id"]
+ result = session.insert_data(repo_data, Repo, repo_unique, return_columns, on_conflict_update=False)
+
+ if not result:
+ return None
+
+ return result[0]["repo_id"]
+
+ @staticmethod
+ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type):
"""Add a repo to the repo table.
Args:
url: repo url
repo_group_id: group to assign repo to
+ repo_type: github or gitlab
Note:
If repo row exists then it will update the repo_group_id if param repo_group_id is not a default. If it does not exist is will simply insert the repo.
@@ -1207,10 +1319,6 @@ class Commit(Base):
primaryjoin="Commit.cmt_author_platform_username == Contributor.cntrb_login",
back_populates="commits"
)
- contributor1 = relationship(
- "Contributor",
- primaryjoin="Commit.cmt_author_platform_username == Contributor.cntrb_login",
- )
repo = relationship("Repo", back_populates="commits")
message_ref = relationship("CommitCommentRef", back_populates="cmt")
diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py
index f702d829a..47f28b12f 100644
--- a/augur/application/db/models/augur_operations.py
+++ b/augur/application/db/models/augur_operations.py
@@ -271,9 +271,9 @@ class User(Base):
{"schema": "augur_operations"}
)
- groups = relationship("UserGroup")
- tokens = relationship("UserSessionToken")
- applications = relationship("ClientApplication")
+ groups = relationship("UserGroup", back_populates="user")
+ tokens = relationship("UserSessionToken", back_populates="user")
+ applications = relationship("ClientApplication", back_populates="user")
_is_authenticated = False
_is_active = True
@@ -449,17 +449,30 @@ def remove_group(self, group_name):
return result
- def add_repo(self, group_name, repo_url):
+ def add_github_repo(self, group_name, repo_url):
from augur.tasks.github.util.github_task_session import GithubTaskSession
from augur.tasks.github.util.github_api_key_handler import NoValidKeysError
try:
with GithubTaskSession(logger) as session:
- result = UserRepo.add(session, repo_url, self.user_id, group_name)
+ result = UserRepo.add_github_repo(session, repo_url, self.user_id, group_name)
except NoValidKeysError:
return False, {"status": "No valid keys"}
return result
+
+ def add_gitlab_repo(self, group_name, repo_url):
+
+ from augur.tasks.gitlab.gitlab_task_session import GitlabTaskSession
+ from augur.tasks.github.util.github_api_key_handler import NoValidKeysError
+ try:
+ with GitlabTaskSession(logger) as session:
+ result = UserRepo.add_gitlab_repo(session, repo_url, self.user_id, group_name)
+ except NoValidKeysError:
+ return False, {"status": "No valid keys"}
+
+ return result
+
def remove_repo(self, group_name, repo_id):
@@ -468,14 +481,14 @@ def remove_repo(self, group_name, repo_id):
return result
- def add_org(self, group_name, org_url):
+ def add_github_org(self, group_name, org_url):
from augur.tasks.github.util.github_task_session import GithubTaskSession
from augur.tasks.github.util.github_api_key_handler import NoValidKeysError
try:
with GithubTaskSession(logger) as session:
- result = UserRepo.add_org_repos(session, org_url, self.user_id, group_name)
+ result = UserRepo.add_github_org_repos(session, org_url, self.user_id, group_name)
except NoValidKeysError:
return False, {"status": "No valid keys"}
@@ -628,8 +641,8 @@ class UserGroup(Base):
{"schema": "augur_operations"}
)
- user = relationship("User")
- repos = relationship("UserRepo")
+ user = relationship("User", back_populates="groups")
+ repos = relationship("UserRepo", back_populates="group")
@staticmethod
def insert(session, user_id:int, group_name:str) -> dict:
@@ -739,8 +752,8 @@ class UserRepo(Base):
ForeignKey("augur_data.repo.repo_id", name="user_repo_user_id_fkey"), primary_key=True, nullable=False
)
- repo = relationship("Repo")
- group = relationship("UserGroup")
+ repo = relationship("Repo", back_populates="user_repo")
+ group = relationship("UserGroup", back_populates="repos")
@staticmethod
def insert(session, repo_id: int, group_id:int = 1) -> bool:
@@ -769,9 +782,69 @@ def insert(session, repo_id: int, group_id:int = 1) -> bool:
return False
return data[0]["group_id"] == group_id and data[0]["repo_id"] == repo_id
+
+ @staticmethod
+ def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_group_id=None) -> dict:
+ """Add repo to the user repo table
+
+ Args:
+ urls: list of repo urls
+ user_id: id of user_id from users table
+ group_name: name of group to add repo to.
+ group_id: id of the group
+ valid_repo: boolean that indicates whether the repo has already been validated
+
+ Note:
+ Either the group_name or group_id can be passed not both
+
+ Returns:
+ Dict that contains the key "status" and additional useful data
+ """
+
+ if group_name and group_id:
+ return False, {"status": "Pass only the group name or group id not both"}
+
+ if not group_name and not group_id:
+ return False, {"status": "Need group name or group id to add a repo"}
+
+ if group_id is None:
+
+ group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name)
+ if group_id is None:
+ return False, {"status": "Invalid group name"}
+
+ if not from_org_list:
+ result = Repo.is_valid_gitlab_repo(session, url)
+ if not result[0]:
+ return False, {"status": result[1]["status"], "repo_url": url}
+
+ # if no repo_group_id is passed then assign the repo to the frontend repo group
+ if repo_group_id is None:
+
+ frontend_repo_group = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first()
+ if not frontend_repo_group:
+ return False, {"status": "Could not find repo group with name 'Frontend Repos'", "repo_url": url}
+
+ repo_group_id = frontend_repo_group.repo_group_id
+
+
+ repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend")
+ if not repo_id:
+ return False, {"status": "Repo insertion failed", "repo_url": url}
+
+ result = UserRepo.insert(session, repo_id, group_id)
+ if not result:
+ return False, {"status": "repo_user insertion failed", "repo_url": url}
+
+ #collection_status records are now only added during collection -IM 5/1/23
+ #status = CollectionStatus.insert(session, repo_id)
+ #if not status:
+ # return False, {"status": "Failed to create status for repo", "repo_url": url}
+
+ return True, {"status": "Repo Added", "repo_url": url}
@staticmethod
- def add(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict:
+ def add_github_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict:
"""Add repo to the user repo table
Args:
@@ -820,7 +893,7 @@ def add(session, url: List[str], user_id: int, group_name=None, group_id=None, f
repo_group_id = frontend_repo_group.repo_group_id
- repo_id = Repo.insert(session, url, repo_group_id, "Frontend", repo_type)
+ repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type)
if not repo_id:
return False, {"status": "Repo insertion failed", "repo_url": url}
@@ -862,7 +935,7 @@ def delete(session, repo_id:int, user_id:int, group_name:str) -> dict:
return True, {"status": "Repo Removed"}
@staticmethod
- def add_org_repos(session, url: List[str], user_id: int, group_name: int):
+ def add_github_org_repos(session, url: List[str], user_id: int, group_name: int):
"""Add list of orgs and their repos to a users repos.
Args:
@@ -911,7 +984,7 @@ def add_org_repos(session, url: List[str], user_id: int, group_name: int):
failed_repos = []
for repo in repos:
- result = UserRepo.add(session, repo, user_id, group_id=group_id, from_org_list=True, repo_type=type, repo_group_id=repo_group_id)
+ result = UserRepo.add_github_repo(session, repo, user_id, group_id=group_id, from_org_list=True, repo_type=type, repo_group_id=repo_group_id)
# keep track of all the repos that failed
if not result[0]:
@@ -949,9 +1022,9 @@ class UserSessionToken(Base):
application_id = Column(ForeignKey("augur_operations.client_applications.id", name="user_session_token_application_id_fkey"), nullable=False)
created_at = Column(BigInteger)
- user = relationship("User")
- application = relationship("ClientApplication")
- refresh_tokens = relationship("RefreshToken")
+ user = relationship("User", back_populates="tokens")
+ application = relationship("ClientApplication", back_populates="sessions")
+ refresh_tokens = relationship("RefreshToken", back_populates="user_session")
@staticmethod
def create(session, user_id, application_id, seconds_to_expire=86400):
@@ -991,9 +1064,9 @@ class ClientApplication(Base):
redirect_url = Column(String, nullable=False)
api_key = Column(String, nullable=False)
- user = relationship("User")
+ user = relationship("User", back_populates="applications")
sessions = relationship("UserSessionToken")
- subscriptions = relationship("Subscription")
+ subscriptions = relationship("Subscription", back_populates="application")
def __eq__(self, other):
return isinstance(other, ClientApplication) and str(self.id) == str(other.id)
@@ -1013,8 +1086,8 @@ class Subscription(Base):
application_id = Column(ForeignKey("augur_operations.client_applications.id", name="subscriptions_application_id_fkey"), primary_key=True)
type_id = Column(ForeignKey("augur_operations.subscription_types.id", name="subscriptions_type_id_fkey"), primary_key=True)
- application = relationship("ClientApplication")
- type = relationship("SubscriptionType")
+ application = relationship("ClientApplication", back_populates="subscriptions")
+ type = relationship("SubscriptionType", back_populates="subscriptions")
class SubscriptionType(Base):
__tablename__ = "subscription_types"
@@ -1027,7 +1100,7 @@ class SubscriptionType(Base):
id = Column(BigInteger, primary_key=True)
name = Column(String, nullable=False)
- subscriptions = relationship("Subscription")
+ subscriptions = relationship("Subscription", back_populates="type")
class RefreshToken(Base):
@@ -1040,7 +1113,7 @@ class RefreshToken(Base):
id = Column(String, primary_key=True)
user_session_token = Column(ForeignKey("augur_operations.user_session_tokens.token", name="refresh_token_session_token_id_fkey"), nullable=False)
- user_session = relationship("UserSessionToken")
+ user_session = relationship("UserSessionToken", back_populates="refresh_tokens")
@staticmethod
def create(session, user_session_token_id):
@@ -1159,16 +1232,28 @@ def insert(session, repo_id):
repo_git = repo.repo_git
collection_status_unique = ["repo_id"]
+ pr_issue_count = 0
+ github_weight = 0
+ if "github" in repo_git:
- try:
- pr_issue_count = get_repo_weight_by_issue(session.logger, repo_git)
- #session.logger.info(f"date weight: {calculate_date_weight_from_timestamps(repo.repo_added, None)}")
- github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None)
- except Exception as e:
- pr_issue_count = None
- github_weight = None
- session.logger.error(
- ''.join(traceback.format_exception(None, e, e.__traceback__)))
+ try:
+ pr_issue_count = get_repo_weight_by_issue(session.logger, repo_git)
+ #session.logger.info(f"date weight: {calculate_date_weight_from_timestamps(repo.repo_added, None)}")
+ github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None)
+ except Exception as e:
+ pr_issue_count = None
+ github_weight = None
+ session.logger.error(
+ ''.join(traceback.format_exception(None, e, e.__traceback__)))
+ else:
+ try:
+ pr_issue_count = 0
+ github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None)
+ except Exception as e:
+ pr_issue_count = None
+ github_weight = None
+ session.logger.error(
+ ''.join(traceback.format_exception(None, e, e.__traceback__)))
record = {
@@ -1178,6 +1263,7 @@ def insert(session, repo_id):
"secondary_weight": github_weight,
"ml_weight": github_weight
}
+
result = session.insert_data(record, CollectionStatus, collection_status_unique, on_conflict_update=False)
diff --git a/augur/application/db/session.py b/augur/application/db/session.py
index 2212c1fdc..22379ad05 100644
--- a/augur/application/db/session.py
+++ b/augur/application/db/session.py
@@ -85,7 +85,7 @@ def __del__(self):
def execute_sql(self, sql_text):
- with self.engine.connect() as connection:
+ with self.engine.begin() as connection:
return_data = connection.execute(sql_text)
@@ -93,10 +93,10 @@ def execute_sql(self, sql_text):
def fetchall_data_from_sql_text(self,sql_text):
- with self.engine.connect() as connection:
+ with self.engine.begin() as connection:
- result = connection.execute(sql_text).fetchall()
- return [dict(zip(row.keys(), row)) for row in result]
+ result = connection.execute(sql_text)
+ return [dict(row) for row in result.mappings()]
def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]:
@@ -174,7 +174,9 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s
while attempts < 10:
try:
- with EngineConnection(self.engine) as connection:
+ #begin keyword is needed for sqlalchemy 2.x
+ #this is because autocommit support was removed in 2.0
+ with self.engine.begin() as connection:
connection.execute(stmnt)
break
except OperationalError as e:
@@ -191,14 +193,16 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s
raise e
except Exception as e:
- if(len(data) == 1):
+ #self.logger.info(e)
+ if len(data) == 1:
raise e
- else:
- first_half = data[:len(data)//2]
- second_half = data[len(data)//2:]
+
+ time.sleep(3)
+ first_half = data[:len(data)//2]
+ second_half = data[len(data)//2:]
- self.insert_data(first_half, natural_keys, return_columns, string_fields, on_conflict_update)
- self.insert_data(second_half, natural_keys, return_columns, string_fields, on_conflict_update)
+ self.insert_data(first_half, table, natural_keys, return_columns, string_fields, on_conflict_update)
+ self.insert_data(second_half,table, natural_keys, return_columns, string_fields, on_conflict_update)
else:
self.logger.error("Unable to insert data in 10 attempts")
@@ -213,8 +217,8 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s
# othewise it gets the requested return columns and returns them as a list of dicts
while attempts < 10:
try:
- with EngineConnection(self.engine) as connection:
- return_data_tuples = connection.execute(stmnt).fetchall()
+ with self.engine.begin() as connection:
+ return_data_tuples = connection.execute(stmnt)
break
except OperationalError as e:
if isinstance(e.orig, DeadlockDetected):
@@ -228,14 +232,15 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s
raise e
except Exception as e:
- if(len(data) == 1):
+ if len(data) == 1:
raise e
- else:
- first_half = data[:len(data)//2]
- second_half = data[len(data)//2:]
+
+ time.sleep(3)
+ first_half = data[:len(data)//2]
+ second_half = data[len(data)//2:]
- self.insert_data(first_half, natural_keys, return_columns, string_fields, on_conflict_update)
- self.insert_data(second_half, natural_keys, return_columns, string_fields, on_conflict_update)
+ self.insert_data(first_half, table, natural_keys, return_columns, string_fields, on_conflict_update)
+ self.insert_data(second_half, table, natural_keys, return_columns, string_fields, on_conflict_update)
else:
self.logger.error("Unable to insert and return data in 10 attempts")
@@ -244,9 +249,11 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s
if deadlock_detected is True:
self.logger.error("Made it through even though Deadlock was detected")
- return_data = []
- for data_tuple in return_data_tuples:
- return_data.append(dict(data_tuple))
+ return_data = [dict(row) for row in return_data_tuples.mappings()]
+
+ #no longer working in sqlalchemy 2.x
+ #for data_tuple in return_data_tuples:
+ # return_data.append(dict(data_tuple))
# using on confilict do nothing does not return the
# present values so this does gets the return values
diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index d170ef243..94127a43b 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -5,7 +5,9 @@
from alembic import context
from augur.application.db.models.base import Base
-from augur.application.db.engine import DatabaseEngine
+from augur.application.db.engine import DatabaseEngine, get_database_string
+from sqlalchemy import create_engine, event
+from sqlalchemy.pool import NullPool
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
@@ -59,8 +61,20 @@ def run_migrations_online():
and associate a connection with the context.
"""
+ url = get_database_string()
+ engine = create_engine(url)
- with DatabaseEngine() as connectable, connectable.connect() as connection:
+ @event.listens_for(engine, "connect", insert=True)
+ def set_search_path(dbapi_connection, connection_record):
+ existing_autocommit = dbapi_connection.autocommit
+ dbapi_connection.autocommit = True
+ cursor = dbapi_connection.cursor()
+ cursor.execute("SET SESSION search_path=public,augur_data,augur_operations,spdx")
+ cursor.close()
+ dbapi_connection.autocommit = existing_autocommit
+
+
+ with engine.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
diff --git a/augur/application/schema/alembic/versions/1_augur_new_changes.py b/augur/application/schema/alembic/versions/1_augur_new_changes.py
index 0be3780a3..2e8440294 100644
--- a/augur/application/schema/alembic/versions/1_augur_new_changes.py
+++ b/augur/application/schema/alembic/versions/1_augur_new_changes.py
@@ -300,8 +300,9 @@ def change_cntrb_id_to_uuid_5(upgrade=True):
"""
INSERT INTO "augur_data"."contributors"("cntrb_id", "cntrb_login", "cntrb_email", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:cntrb_uuid, 'not-provided', NULL, NULL, '2019-06-13 11:33:39', NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, 'nobody', 'http://fake.me', 'http://fake.me', 'x', 'http://fake.me', NULL, 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', NULL, NULL, NULL, NULL, NULL, NULL, '2019-06-13 16:35:25');
"""
- ),
- cntrb_uuid=UnresolvableUUID().to_UUID()
+ ).bindparams(
+ cntrb_uuid=UnresolvableUUID().to_UUID()
+ )
)
conn.execute(
@@ -309,8 +310,9 @@ def change_cntrb_id_to_uuid_5(upgrade=True):
"""
INSERT INTO "augur_data"."contributors" ("cntrb_id", "cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:cntrb_uuid, 'nan', 'kannayoshihiro@gmail.com', 'KANNA Yoshihiro', 'UTMC', '2009-04-17 12:43:58', NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 'kannayoshihiro@gmail.com', '2021-01-28 21:56:10-06', 74832, 'nan', 'https://api.github.com/users/nan', 'https://github.com/nan', 'MDQ6VXNlcjc0ODMy', 'https://avatars.githubusercontent.com/u/74832?v=4', '', 'https://api.github.com/users/nan/followers', 'https://api.github.com/users/nan/following{/other_user}', 'https://api.github.com/users/nan/gists{/gist_id}', 'https://api.github.com/users/nan/starred{/owner}{/repo}', 'https://api.github.com/users/nan/subscriptions', 'https://api.github.com/users/nan/orgs', 'https://api.github.com/users/nan/repos', 'https://api.github.com/users/nan/events{/privacy}', 'https://api.github.com/users/nan/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'GitHub API Worker', '1.0.0', 'GitHub API', '2021-10-28 15:23:46');
"""
- ),
- cntrb_uuid=GithubUUID().to_UUID()
+ ).bindparams(
+ cntrb_uuid=GithubUUID().to_UUID()
+ )
)
else:
diff --git a/augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py b/augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py
new file mode 100644
index 000000000..f381ec48e
--- /dev/null
+++ b/augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py
@@ -0,0 +1,245 @@
+""" Updating materialized views and associated indices
+
+Revision ID: 26
+Revises: 25
+Create Date: 2023-08-23 18:17:22.651191
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy import text
+
+# revision identifiers, used by Alembic.
+revision = '26'
+down_revision = '25'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+
+ mview_keys_26()
+
+def downgrade():
+
+ upgrade=False
+
+ mview_keys_26(upgrade)
+
+def mview_keys_26(upgrade=True):
+
+ if upgrade:
+ conn = op.get_bind()
+ conn.execute(text("""
+ drop materialized view if exists augur_data.explorer_pr_assignments;
+ drop materialized view if exists augur_data.explorer_user_repos;
+ drop materialized view if exists augur_data.explorer_pr_response_times;
+ drop materialized view if exists augur_data.explorer_pr_response;
+ drop materialized view if exists augur_data.explorer_issue_assignments;"""))
+
+ conn.execute(text("""
+ create materialized view augur_data.explorer_pr_assignments as
+ SELECT
+ pr.pull_request_id,
+ pr.repo_id AS ID,
+ pr.pr_created_at AS created,
+ pr.pr_closed_at AS closed,
+ pre.created_at AS assign_date,
+ pre.ACTION AS assignment_action,
+ pre.cntrb_id AS assignee,
+ pre.node_id AS node_id
+ FROM
+ (
+ augur_data.pull_requests pr
+ LEFT JOIN augur_data.pull_request_events pre ON (
+ (
+ ( pr.pull_request_id = pre.pull_request_id )
+ AND (
+ ( pre.ACTION ) :: TEXT = ANY ( ARRAY [ ( 'unassigned' :: CHARACTER VARYING ) :: TEXT, ( 'assigned' :: CHARACTER VARYING ) :: TEXT ] )
+ )
+ )
+ )
+ );"""))
+ conn.execute(text("""
+ create materialized view augur_data.explorer_pr_response as
+ SELECT pr.pull_request_id,
+ pr.repo_id AS id,
+ pr.pr_augur_contributor_id AS cntrb_id,
+ m.msg_timestamp,
+ m.msg_cntrb_id,
+ pr.pr_created_at,
+ pr.pr_closed_at
+ FROM (augur_data.pull_requests pr
+ LEFT JOIN ( SELECT prr.pull_request_id,
+ m_1.msg_timestamp,
+ m_1.cntrb_id AS msg_cntrb_id
+ FROM augur_data.pull_request_review_message_ref prrmr,
+ augur_data.pull_requests pr_1,
+ augur_data.message m_1,
+ augur_data.pull_request_reviews prr
+ WHERE ((prrmr.pr_review_id = prr.pr_review_id) AND (prrmr.msg_id = m_1.msg_id) AND (prr.pull_request_id = pr_1.pull_request_id))
+ UNION
+ SELECT prmr.pull_request_id,
+ m_1.msg_timestamp,
+ m_1.cntrb_id AS msg_cntrb_id
+ FROM augur_data.pull_request_message_ref prmr,
+ augur_data.pull_requests pr_1,
+ augur_data.message m_1
+ WHERE ((prmr.pull_request_id = pr_1.pull_request_id) AND (prmr.msg_id = m_1.msg_id))) m ON ((m.pull_request_id = pr.pull_request_id)));"""))
+
+
+
+ conn.execute(text("""
+ create materialized view augur_data.explorer_user_repos as
+ SELECT a.login_name,
+ a.user_id,
+ b.group_id,
+ c.repo_id
+ FROM augur_operations.users a,
+ augur_operations.user_groups b,
+ augur_operations.user_repos c
+ WHERE ((a.user_id = b.user_id) AND (b.group_id = c.group_id))
+ ORDER BY a.user_id;"""))
+
+ conn.execute(text("""
+ create materialized view augur_data.explorer_pr_response_times as
+ SELECT repo.repo_id,
+ pull_requests.pr_src_id,
+ repo.repo_name,
+ pull_requests.pr_src_author_association,
+ repo_groups.rg_name AS repo_group,
+ pull_requests.pr_src_state,
+ pull_requests.pr_merged_at,
+ pull_requests.pr_created_at,
+ pull_requests.pr_closed_at,
+ date_part('year'::text, (pull_requests.pr_created_at)::date) AS created_year,
+ date_part('month'::text, (pull_requests.pr_created_at)::date) AS created_month,
+ date_part('year'::text, (pull_requests.pr_closed_at)::date) AS closed_year,
+ date_part('month'::text, (pull_requests.pr_closed_at)::date) AS closed_month,
+ base_labels.pr_src_meta_label,
+ base_labels.pr_head_or_base,
+ ((EXTRACT(epoch FROM pull_requests.pr_closed_at) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (3600)::numeric) AS hours_to_close,
+ ((EXTRACT(epoch FROM pull_requests.pr_closed_at) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (86400)::numeric) AS days_to_close,
+ ((EXTRACT(epoch FROM response_times.first_response_time) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (3600)::numeric) AS hours_to_first_response,
+ ((EXTRACT(epoch FROM response_times.first_response_time) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (86400)::numeric) AS days_to_first_response,
+ ((EXTRACT(epoch FROM response_times.last_response_time) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (3600)::numeric) AS hours_to_last_response,
+ ((EXTRACT(epoch FROM response_times.last_response_time) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (86400)::numeric) AS days_to_last_response,
+ response_times.first_response_time,
+ response_times.last_response_time,
+ response_times.average_time_between_responses,
+ response_times.assigned_count,
+ response_times.review_requested_count,
+ response_times.labeled_count,
+ response_times.subscribed_count,
+ response_times.mentioned_count,
+ response_times.referenced_count,
+ response_times.closed_count,
+ response_times.head_ref_force_pushed_count,
+ response_times.merged_count,
+ response_times.milestoned_count,
+ response_times.unlabeled_count,
+ response_times.head_ref_deleted_count,
+ response_times.comment_count,
+ master_merged_counts.lines_added,
+ master_merged_counts.lines_removed,
+ all_commit_counts.commit_count,
+ master_merged_counts.file_count
+ FROM augur_data.repo,
+ augur_data.repo_groups,
+ ((((augur_data.pull_requests
+ LEFT JOIN ( SELECT pull_requests_1.pull_request_id,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'assigned'::text)) AS assigned_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'review_requested'::text)) AS review_requested_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'labeled'::text)) AS labeled_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'unlabeled'::text)) AS unlabeled_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'subscribed'::text)) AS subscribed_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'mentioned'::text)) AS mentioned_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'referenced'::text)) AS referenced_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'closed'::text)) AS closed_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'head_ref_force_pushed'::text)) AS head_ref_force_pushed_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'head_ref_deleted'::text)) AS head_ref_deleted_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'milestoned'::text)) AS milestoned_count,
+ count(*) FILTER (WHERE ((pull_request_events.action)::text = 'merged'::text)) AS merged_count,
+ min(message.msg_timestamp) AS first_response_time,
+ count(DISTINCT message.msg_timestamp) AS comment_count,
+ max(message.msg_timestamp) AS last_response_time,
+ ((max(message.msg_timestamp) - min(message.msg_timestamp)) / (count(DISTINCT message.msg_timestamp))::double precision) AS average_time_between_responses
+ FROM augur_data.pull_request_events,
+ augur_data.pull_requests pull_requests_1,
+ augur_data.repo repo_1,
+ augur_data.pull_request_message_ref,
+ augur_data.message
+ WHERE ((repo_1.repo_id = pull_requests_1.repo_id) AND (pull_requests_1.pull_request_id = pull_request_events.pull_request_id) AND (pull_requests_1.pull_request_id = pull_request_message_ref.pull_request_id) AND (pull_request_message_ref.msg_id = message.msg_id))
+ GROUP BY pull_requests_1.pull_request_id) response_times ON ((pull_requests.pull_request_id = response_times.pull_request_id)))
+ LEFT JOIN ( SELECT pull_request_commits.pull_request_id,
+ count(DISTINCT pull_request_commits.pr_cmt_sha) AS commit_count
+ FROM augur_data.pull_request_commits,
+ augur_data.pull_requests pull_requests_1,
+ augur_data.pull_request_meta
+ WHERE ((pull_requests_1.pull_request_id = pull_request_commits.pull_request_id) AND (pull_requests_1.pull_request_id = pull_request_meta.pull_request_id) AND ((pull_request_commits.pr_cmt_sha)::text <> (pull_requests_1.pr_merge_commit_sha)::text) AND ((pull_request_commits.pr_cmt_sha)::text <> (pull_request_meta.pr_sha)::text))
+ GROUP BY pull_request_commits.pull_request_id) all_commit_counts ON ((pull_requests.pull_request_id = all_commit_counts.pull_request_id)))
+ LEFT JOIN ( SELECT max(pull_request_meta.pr_repo_meta_id) AS max,
+ pull_request_meta.pull_request_id,
+ pull_request_meta.pr_head_or_base,
+ pull_request_meta.pr_src_meta_label
+ FROM augur_data.pull_requests pull_requests_1,
+ augur_data.pull_request_meta
+ WHERE ((pull_requests_1.pull_request_id = pull_request_meta.pull_request_id) AND ((pull_request_meta.pr_head_or_base)::text = 'base'::text))
+ GROUP BY pull_request_meta.pull_request_id, pull_request_meta.pr_head_or_base, pull_request_meta.pr_src_meta_label) base_labels ON ((base_labels.pull_request_id = all_commit_counts.pull_request_id)))
+ LEFT JOIN ( SELECT sum(commits.cmt_added) AS lines_added,
+ sum(commits.cmt_removed) AS lines_removed,
+ pull_request_commits.pull_request_id,
+ count(DISTINCT commits.cmt_filename) AS file_count
+ FROM augur_data.pull_request_commits,
+ augur_data.commits,
+ augur_data.pull_requests pull_requests_1,
+ augur_data.pull_request_meta
+ WHERE (((commits.cmt_commit_hash)::text = (pull_request_commits.pr_cmt_sha)::text) AND (pull_requests_1.pull_request_id = pull_request_commits.pull_request_id) AND (pull_requests_1.pull_request_id = pull_request_meta.pull_request_id) AND (commits.repo_id = pull_requests_1.repo_id) AND ((commits.cmt_commit_hash)::text <> (pull_requests_1.pr_merge_commit_sha)::text) AND ((commits.cmt_commit_hash)::text <> (pull_request_meta.pr_sha)::text))
+ GROUP BY pull_request_commits.pull_request_id) master_merged_counts ON ((base_labels.pull_request_id = master_merged_counts.pull_request_id)))
+ WHERE ((repo.repo_group_id = repo_groups.repo_group_id) AND (repo.repo_id = pull_requests.repo_id))
+ ORDER BY response_times.merged_count DESC;"""))
+
+ conn.execute(text("""
+ create materialized view augur_data.explorer_issue_assignments as
+ SELECT
+ i.issue_id,
+ i.repo_id AS ID,
+ i.created_at AS created,
+ i.closed_at AS closed,
+ ie.created_at AS assign_date,
+ ie.ACTION AS assignment_action,
+ ie.cntrb_id AS assignee,
+ ie.node_id as node_id
+ FROM
+ (
+ augur_data.issues i
+ LEFT JOIN augur_data.issue_events ie ON (
+ (
+ ( i.issue_id = ie.issue_id )
+ AND (
+ ( ie.ACTION ) :: TEXT = ANY ( ARRAY [ ( 'unassigned' :: CHARACTER VARYING ) :: TEXT, ( 'assigned' :: CHARACTER VARYING ) :: TEXT ] )
+ )
+ )
+ )
+ );"""))
+
+ conn = op.get_bind()
+ conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_user_repos(login_name,user_id,group_id,repo_id);"""))
+ conn.execute(text("""COMMIT;"""))
+
+ conn = op.get_bind()
+ conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_pr_response_times(repo_id, pr_src_id, pr_src_meta_label);"""))
+ conn.execute(text("""COMMIT;"""))
+
+ conn = op.get_bind()
+ conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_pr_assignments(pull_request_id, id, node_id);"""))
+ conn.execute(text("""COMMIT;"""))
+
+ conn = op.get_bind()
+ conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_issue_assignments(issue_id, id, node_id);"""))
+ conn.execute(text("""COMMIT;"""))
+
+ conn = op.get_bind()
+ conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_pr_response(pull_request_id, id, cntrb_id, msg_cntrb_id, msg_timestamp);"""))
+ conn.execute(text("""COMMIT;"""))
\ No newline at end of file
diff --git a/augur/application/schema/alembic/versions/3_oauth_and_user_groups.py b/augur/application/schema/alembic/versions/3_oauth_and_user_groups.py
index 8d75b7a70..0d9c6d744 100644
--- a/augur/application/schema/alembic/versions/3_oauth_and_user_groups.py
+++ b/augur/application/schema/alembic/versions/3_oauth_and_user_groups.py
@@ -85,9 +85,9 @@ def upgrade():
table_changes = """
- ALTER TABLE user_repos
+ ALTER TABLE augur_operations.user_repos
ADD COLUMN group_id BIGINT,
- ADD CONSTRAINT user_repos_group_id_fkey FOREIGN KEY (group_id) REFERENCES user_groups(group_id),
+ ADD CONSTRAINT user_repos_group_id_fkey FOREIGN KEY (group_id) REFERENCES augur_operations.user_groups(group_id),
DROP COLUMN user_id,
ADD PRIMARY KEY (group_id, repo_id);
"""
diff --git a/augur/application/schema/alembic/versions/9_add_repo_group_for_frontend_repos.py b/augur/application/schema/alembic/versions/9_add_repo_group_for_frontend_repos.py
index 288f584cf..52a6e017d 100644
--- a/augur/application/schema/alembic/versions/9_add_repo_group_for_frontend_repos.py
+++ b/augur/application/schema/alembic/versions/9_add_repo_group_for_frontend_repos.py
@@ -25,7 +25,7 @@ def upgrade():
conn = op.get_bind()
result = conn.execute(text(f"""SELECT * FROM "augur_data"."repo_groups" WHERE rg_name='{repo_group_name}';""")).fetchall()
if len(result) == 0:
- conn.execute(f"""INSERT INTO "augur_data"."repo_groups" ("rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ('{repo_group_name}', 'DO NOT DELETE OR FRONTEND REPOS WILL BREAK', '', 0, '2023-02-17 15:00:00', NULL, NULL, NULL, NULL, NULL);""")
+ conn.execute(text(f"""INSERT INTO "augur_data"."repo_groups" ("rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ('{repo_group_name}', 'DO NOT DELETE OR FRONTEND REPOS WILL BREAK', '', 0, '2023-02-17 15:00:00', NULL, NULL, NULL, NULL, NULL);"""))
# ### end Alembic commands ###
diff --git a/augur/application/util.py b/augur/application/util.py
index 1915abdeb..03e591df9 100644
--- a/augur/application/util.py
+++ b/augur/application/util.py
@@ -25,6 +25,3 @@ def get_all_repos_count(**kwargs):
result = controller.get_repo_count(source="all", **kwargs)
return result
-
-
-
diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py
index 9a1b425f9..78fb0b4b5 100644
--- a/augur/tasks/data_analysis/clustering_worker/setup.py
+++ b/augur/tasks/data_analysis/clustering_worker/setup.py
@@ -29,11 +29,11 @@ def read(filename):
'psycopg2-binary==2.9.3',
#'sklearn==0.0.0',
'scikit-learn==1.1.3',
- 'numpy==1.22.0',
+ 'numpy==1.26.0',
'nltk==3.6.6',
'seaborn==0.11.1',
- 'pandas==1.3.5',
- 'matplotlib==3.5.1'
+ 'pandas==1.5.3',
+ 'matplotlib>=3.5.1'
],
classifiers=[
'Development Status :: 2 - Pre-Alpha',
diff --git a/augur/tasks/data_analysis/clustering_worker/tasks.py b/augur/tasks/data_analysis/clustering_worker/tasks.py
index 2d4f4973d..c102e6c22 100644
--- a/augur/tasks/data_analysis/clustering_worker/tasks.py
+++ b/augur/tasks/data_analysis/clustering_worker/tasks.py
@@ -116,7 +116,9 @@ def clustering_model(repo_git: str,logger,engine, session) -> None:
"""
)
# result = db.execute(delete_points_SQL, repo_id=repo_id, min_date=min_date)
- msg_df_cur_repo = pd.read_sql(get_messages_for_repo_sql, engine, params={"repo_id": repo_id})
+
+ with engine.connect() as conn:
+ msg_df_cur_repo = pd.read_sql(get_messages_for_repo_sql, conn, params={"repo_id": repo_id})
logger.info(msg_df_cur_repo.head())
logger.debug(f"Repo message df size: {len(msg_df_cur_repo.index)}")
@@ -303,7 +305,9 @@ def visualize_labels_PCA(features, labels, annotations, num_components, title):
AND prmr.msg_id=m.msg_id
"""
)
- msg_df_all = pd.read_sql(get_messages_sql, engine, params={})
+
+ with engine.connect() as conn:
+ msg_df_all = pd.read_sql(get_messages_sql, conn, params={})
# select only highly active repos
logger.debug("Selecting highly active repos")
diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py
index 8034112ad..4521a722e 100644
--- a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py
+++ b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py
@@ -56,9 +56,10 @@ def contributor_breadth_model() -> None:
) b
""")
- result = engine.execute(cntrb_login_query)
+ with engine.connect() as connection:
+ result = connection.execute(cntrb_login_query)
- current_cntrb_logins = [dict(row) for row in result]
+ current_cntrb_logins = [dict(row) for row in result.mappings()]
cntrb_newest_events_query = s.sql.text("""
@@ -68,8 +69,10 @@ def contributor_breadth_model() -> None:
GROUP BY c.gh_login;
""")
- cntrb_newest_events_list = engine.execute(cntrb_newest_events_query)
- cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list]
+ with engine.connect() as connection:
+ cntrb_newest_events_list = connection.execute(cntrb_newest_events_query)
+
+ cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()]
cntrb_newest_events_map = {}
for cntrb_event in cntrb_newest_events_list:
diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py
index 9a4e91c01..37d6557ec 100644
--- a/augur/tasks/data_analysis/discourse_analysis/setup.py
+++ b/augur/tasks/data_analysis/discourse_analysis/setup.py
@@ -28,13 +28,13 @@ def read(filename):
'requests==2.28.0',
'psycopg2-binary==2.9.3',
'click==8.0.3',
- 'scipy==1.7.3',
+ 'scipy>=1.10.0',
'nltk==3.6.6',
- 'pandas==1.3.5',
+ 'pandas==1.5.3',
'scikit-learn==1.1.3',
'textblob==0.15.3',
- 'python-crfsuite==0.9.8',
- 'sklearn-crfsuite==0.3.6',
+ 'python-crfsuite>=0.9.8',
+ 'sklearn-crfsuite>=0.3.6',
'tabulate==0.8.9'
], # python-crfsuite-0.9.8 sklearn-crfsuite-0.3.6 tabulate-0.8.9
entry_points={
diff --git a/augur/tasks/data_analysis/discourse_analysis/tasks.py b/augur/tasks/data_analysis/discourse_analysis/tasks.py
index 2febe8636..5a9941679 100644
--- a/augur/tasks/data_analysis/discourse_analysis/tasks.py
+++ b/augur/tasks/data_analysis/discourse_analysis/tasks.py
@@ -72,7 +72,9 @@ def discourse_analysis_model(repo_git: str,logger,engine) -> None:
""")
# result = db.execute(delete_points_SQL, repo_id=repo_id, min_date=min_date)
- msg_df_cur_repo = pd.read_sql(get_messages_for_repo_sql, engine, params={"repo_id": repo_id})
+
+ with engine.connect() as conn:
+ msg_df_cur_repo = pd.read_sql(get_messages_for_repo_sql, conn, params={"repo_id": repo_id})
msg_df_cur_repo = msg_df_cur_repo.sort_values(by=['thread_id']).reset_index(drop=True)
logger.info(msg_df_cur_repo.head())
diff --git a/augur/tasks/data_analysis/insight_worker/setup.py b/augur/tasks/data_analysis/insight_worker/setup.py
index 0eb35d8a7..1ee6e8a4b 100644
--- a/augur/tasks/data_analysis/insight_worker/setup.py
+++ b/augur/tasks/data_analysis/insight_worker/setup.py
@@ -29,9 +29,9 @@ def read(filename):
'requests==2.28.0',
'psycopg2-binary==2.9.3',
'click==8.0.3',
- 'scipy>=1.7.3',
+ 'scipy>=1.10.0',
'sklearn==0.0',
- 'numpy==1.22.0',
+ 'numpy==1.26.0',
],
entry_points={
'console_scripts': [
diff --git a/augur/tasks/data_analysis/insight_worker/tasks.py b/augur/tasks/data_analysis/insight_worker/tasks.py
index 7f506c8d1..37ae5f484 100644
--- a/augur/tasks/data_analysis/insight_worker/tasks.py
+++ b/augur/tasks/data_analysis/insight_worker/tasks.py
@@ -134,13 +134,16 @@ def insight_model(repo_git: str,logger,engine,session) -> None:
WHERE repo_insights.ri_metric = to_delete.ri_metric
AND repo_insights.ri_field = to_delete.ri_field
""")
- result = engine.execute(delete_points_SQL, repo_id=repo_id, min_date=min_date)
+
+ with engine.connect as conn:
+ result = conn.execute(delete_points_SQL, repo_id=repo_id, min_date=min_date)
# get table values to check for dupes later on
table_values_sql = s.sql.text("""SELECT * FROM repo_insights_records WHERE repo_id={}""".format(repo_id))
- insight_table_values = pd.read_sql(table_values_sql, engine, params={})
+ with engine.connect() as conn:
+ insight_table_values = pd.read_sql(table_values_sql,conn, params={})
to_model_columns = df.columns[0:len(metrics) + 1]
@@ -257,7 +260,7 @@ def classify_anomalies(df, metric):
repo_insight_record_obj.ri_id))
# Send insight to Jonah for slack bot
- send_insight(record, abs(next_recent_anomaly.iloc[0][metric] - mean), logger)
+ send_insight(record, abs(next_recent_anomaly.iloc[0][metric] - mean), logger,engine)
insight_count += 1
else:
@@ -526,8 +529,8 @@ def send_insight(insight, units_from_mean, logger, engine):
FROM repo, repo_groups
WHERE repo_id = {}
""".format(insight['repo_id']))
-
- repo = pd.read_sql(repoSQL, engine, params={}).iloc[0]
+ with engine.connect() as conn:
+ repo = pd.read_sql(repoSQL, conn, params={}).iloc[0]
begin_date = datetime.datetime.now() - datetime.timedelta(days=anomaly_days)
dict_date = insight['ri_date'].strftime("%Y-%m-%d %H:%M:%S")
@@ -565,7 +568,8 @@ def clear_insights(repo_id, new_endpoint, new_field, logger):
AND ri_field = '{}'
""".format(repo_id, new_endpoint, new_field)
try:
- result = engine.execute(deleteSQL)
+ with engine.connect() as conn:
+ result = conn.execute(deleteSQL)
except Exception as e:
logger.info("Error occured deleting insight slot: {}".format(e))
@@ -582,7 +586,8 @@ def clear_insights(repo_id, new_endpoint, new_field, logger):
AND ri_field = '{}'
""".format(repo_id, new_endpoint, new_field)
try:
- result = engine.execute(deleteSQL)
+ with engine.connect() as conn:
+ result = conn.execute(deleteSQL)
except Exception as e:
logger.info("Error occured deleting insight slot: {}".format(e))
@@ -602,7 +607,8 @@ def clear_insight(repo_id, new_score, new_metric, new_field, logger):
AND ri_field = '{}'
ORDER BY ri_score DESC
""".format(repo_id, new_metric, new_field))
- rec = json.loads(pd.read_sql(recordSQL, engine, params={}).to_json(orient='records'))
+ with engine.connect() as conn:
+ rec = json.loads(pd.read_sql(recordSQL, conn, params={}).to_json(orient='records'))
logger.info("recordsql: {}, \n{}".format(recordSQL, rec))
# If new score is higher, continue with deletion
if len(rec) > 0:
@@ -623,7 +629,8 @@ def clear_insight(repo_id, new_score, new_metric, new_field, logger):
AND ri_field = '{}'
""".format(record['repo_id'], record['ri_metric'], record['ri_field'])
try:
- result = engine.execute(deleteSQL)
+ with engine.connect() as conn:
+ result = conn.execute(deleteSQL)
except Exception as e:
logger.info("Error occured deleting insight slot: {}".format(e))
else:
@@ -637,7 +644,8 @@ def clear_insight(repo_id, new_score, new_metric, new_field, logger):
WHERE repo_id = {}
ORDER BY ri_score ASC
""".format(repo_id))
- ins = json.loads(pd.read_sql(insightSQL, engine, params={}).to_json(orient='records'))
+ with engine.connect() as conn:
+ ins = json.loads(pd.read_sql(insightSQL, conn, params={}).to_json(orient='records'))
logger.info("This repos insights: {}".format(ins))
# Determine if inisghts need to be deleted based on if there are more insights than we want stored,
@@ -675,7 +683,8 @@ def clear_insight(repo_id, new_score, new_metric, new_field, logger):
AND ri_metric = '{}'
""".format(insight['repo_id'], insight['ri_metric'])
try:
- result = engine.execute(deleteSQL)
+ with engine.connect() as conn:
+ result = conn.execute(deleteSQL)
except Exception as e:
logger.info("Error occured deleting insight slot: {}".format(e))
@@ -744,7 +753,9 @@ def filter_duplicates(cols, tables, og_data, logger, engine):
colSQL = s.sql.text("""
SELECT {} FROM {}
""".format(col, table_str))
- values = pd.read_sql(colSQL, engine, params={})
+
+ with engine.connect() as conn:
+ values = pd.read_sql(colSQL, conn, params={})
for obj in og_data:
if values.isin([obj[cols[col]]]).any().any():
diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py
index 311eb9b6f..a4f6a30c4 100644
--- a/augur/tasks/data_analysis/message_insights/setup.py
+++ b/augur/tasks/data_analysis/message_insights/setup.py
@@ -30,22 +30,22 @@ def read(filename):
'requests==2.28.0',
'psycopg2-binary==2.9.3',
'click==8.0.3',
- 'scipy==1.7.3',
+ 'scipy>=1.10.0',
'scikit-learn==1.1.3', #0.24.2',
- 'numpy==1.22.0',
+ 'numpy==1.26.0',
'nltk==3.6.6',
- 'pandas==1.3.5',
+ 'pandas==1.5.3',
'emoji==1.2.0',
- 'Keras<2.9.0rc0',
- 'Keras-Preprocessing==1.1.2',
- 'tensorflow==2.8.0',
- 'h5py~=3.6.0',
+ 'keras>=2.15.0',
+ 'Keras-Preprocessing',
+ 'tensorflow==2.15.0',
+ 'h5py==3.10.0',
'scikit-image==0.19.1',
- 'joblib==1.0.1',
+ 'joblib==1.2.0',
'xgboost',
'bs4==0.0.1',
'xlrd==2.0.1',
- 'gensim==4.2.0'
+ 'gensim>=4.2.0'
],
classifiers=[
'Development Status :: 3 - Alpha',
diff --git a/augur/tasks/data_analysis/message_insights/tasks.py b/augur/tasks/data_analysis/message_insights/tasks.py
index 1acec976c..4727d3def 100644
--- a/augur/tasks/data_analysis/message_insights/tasks.py
+++ b/augur/tasks/data_analysis/message_insights/tasks.py
@@ -59,7 +59,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None:
repo_exists_SQL = s.sql.text("""
SELECT exists (SELECT 1 FROM augur_data.message_analysis_summary WHERE repo_id = :repo_id LIMIT 1)""")
- df_rep = pd.read_sql_query(repo_exists_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ df_rep = pd.read_sql_query(repo_exists_SQL, conn, params={'repo_id': repo_id})
#full_train = not(df_rep['exists'].iloc[0])
logger.info(f'Full Train: {full_train}')
@@ -84,7 +85,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None:
where message.repo_id = :repo_id
""")
- df_past = pd.read_sql_query(past_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ df_past = pd.read_sql_query(past_SQL, conn, params={'repo_id': repo_id})
df_past['msg_timestamp'] = pd.to_datetime(df_past['msg_timestamp'])
df_past = df_past.sort_values(by='msg_timestamp')
@@ -124,7 +126,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None:
left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id
where message.repo_id = :repo_id""")
- df_message = pd.read_sql_query(join_SQL, engine, params={'repo_id': repo_id, 'begin_date': begin_date})
+ with engine.connect() as conn:
+ df_message = pd.read_sql_query(join_SQL, conn, params={'repo_id': repo_id, 'begin_date': begin_date})
logger.info(f'Messages dataframe dim: {df_message.shape}')
logger.info(f'Value 1: {df_message.shape[0]}')
@@ -159,7 +162,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None:
left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id
where issue_message_ref.repo_id = :repo_id""")
- df_past = pd.read_sql_query(merge_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ df_past = pd.read_sql_query(merge_SQL, conn, params={'repo_id': repo_id})
df_past = df_past.loc[df_past['novelty_flag'] == 0]
rec_errors = df_past['reconstruction_error'].tolist()
threshold = threshold_otsu(np.array(rec_errors))
@@ -345,7 +349,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None:
FROM message_analysis_summary
WHERE repo_id=:repo_id""")
- df_past = pd.read_sql_query(message_analysis_query, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ df_past = pd.read_sql_query(message_analysis_query, conn, params={'repo_id': repo_id})
# df_past = get_table_values(cols=['period', 'positive_ratio', 'negative_ratio', 'novel_count'],
# tables=['message_analysis_summary'],
@@ -414,12 +419,13 @@ def send_insight(repo_id, insights, logger, engine):
WHERE repo_id = {}
""".format(repo_id))
- repo = pd.read_sql(repoSQL, engine, params={}).iloc[0]
+ with engine.connect() as conn:
+ repo = pd.read_sql(repoSQL, conn, params={}).iloc[0]
to_send = {
'message_insight': True,
'repo_git': repo['repo_git'],
- 'insight_begin_date': begin_date.strftime("%Y-%m-%d %H:%M:%S"),
+ 'insight_begin_date': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
# date from when insights are calculated
'sentiment': insights[0], # sentiment insight dict
'novelty': insights[1], # novelty insight dict
@@ -449,13 +455,14 @@ def get_max_id(table, column, logger, engine, default=25150):
SELECT max({0}.{1}) AS {1}
FROM {0}
""".format(table, column))
- rs = pd.read_sql(max_id_sql, engine, params={})
+
+ with engine.connect() as conn:
+ rs = pd.read_sql(max_id_sql, conn, params={})
if rs.iloc[0][column] is not None:
max_id = int(rs.iloc[0][column]) + 1
logger.info("Found max id for {} column in the {} table: {}\n".format(column, table, max_id))
else:
max_id = default
- logger.warning("Could not find max id for {} column in the {} table... " +
- "using default set to: {}\n".format(column, table, max_id))
+ logger.warning(f"Could not find max id for {column} column in the {table} table... using default set to: {max_id}\n")
return max_id
diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py
index dc13c94bf..3341f24ff 100644
--- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py
+++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py
@@ -29,12 +29,12 @@ def read(filename):
'psycopg2-binary==2.9.3',
'sklearn==0.0',
'nltk==3.6.6',
- 'numpy==1.22.0',
- 'pandas==1.3.5',
+ 'numpy==1.26.0',
+ 'pandas==1.5.3',
'emoji==1.2.0',
- 'joblib==1.0.1',
+ 'joblib==1.2.0',
'xgboost==1.4.2',
- 'scipy==1.7.3'
+ 'scipy>=1.10.0'
],
classifiers=[
'Development Status :: 2 - Pre-Alpha',
diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/tasks.py b/augur/tasks/data_analysis/pull_request_analysis_worker/tasks.py
index c2816bed8..9d6d5be78 100644
--- a/augur/tasks/data_analysis/pull_request_analysis_worker/tasks.py
+++ b/augur/tasks/data_analysis/pull_request_analysis_worker/tasks.py
@@ -74,8 +74,8 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None:
and pull_requests.repo_id = :repo_id
and pr_src_state like 'open'
""")
-
- df_pr = pd.read_sql_query(pr_SQL, engine, params={'begin_date': begin_date, 'repo_id': repo_id})
+ with engine.connect() as conn:
+ df_pr = pd.read_sql_query(pr_SQL, conn, params={'begin_date': begin_date, 'repo_id': repo_id})
logger.info(f'PR Dataframe dim: {df_pr.shape}\n')
@@ -106,15 +106,16 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None:
select message.msg_id, msg_timestamp, msg_text, message.cntrb_id from augur_data.message
left outer join augur_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id
left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id where issue_message_ref.repo_id = :repo_id""")
-
- df_message = pd.read_sql_query(messages_SQL, engine, params={'repo_id': repo_id})
+ with engine.connect() as conn:
+ df_message = pd.read_sql_query(messages_SQL, conn, params={'repo_id': repo_id})
logger.info(f'Mapping messages to PR, find comment & participants counts')
# Map PR to its corresponding messages
pr_ref_sql = s.sql.text("select * from augur_data.pull_request_message_ref")
- df_pr_ref = pd.read_sql_query(pr_ref_sql, engine)
+ with engine.connect() as conn:
+ df_pr_ref = pd.read_sql_query(pr_ref_sql, conn)
df_merge = pd.merge(df_pr, df_pr_ref, on='pull_request_id', how='left')
df_merge = pd.merge(df_merge, df_message, on='msg_id', how='left')
df_merge = df_merge.dropna(subset=['msg_id'], axis=0)
@@ -167,7 +168,9 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None:
SELECT repo_id, pull_requests_merged, pull_request_count,watchers_count, last_updated FROM
augur_data.repo_info where repo_id = :repo_id
""")
- df_repo = pd.read_sql_query(repo_sql, engine, params={'repo_id': repo_id})
+
+ with engine.connect() as conn:
+ df_repo = pd.read_sql_query(repo_sql, conn, params={'repo_id': repo_id})
df_repo = df_repo.loc[df_repo.groupby('repo_id').last_updated.idxmax(), :]
df_repo = df_repo.drop(['last_updated'], axis=1)
diff --git a/augur/tasks/db/refresh_materialized_views.py b/augur/tasks/db/refresh_materialized_views.py
index 76420c253..f04d01552 100644
--- a/augur/tasks/db/refresh_materialized_views.py
+++ b/augur/tasks/db/refresh_materialized_views.py
@@ -59,15 +59,35 @@ def refresh_materialized_views():
COMMIT;
""")
+ mv9_refresh = s.sql.text("""
+ REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_user_repos with data;
+ COMMIT;
+ """)
- try:
- with DatabaseSession(logger, engine) as session:
- session.execute_sql(mv1_refresh)
- except Exception as e:
- logger.info(f"error is {e}")
- pass
+ mv10_refresh = s.sql.text("""
+
+ REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response_times with data;
+ COMMIT;
+ """)
+ mv11_refresh = s.sql.text("""
+
+ REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_assignments with data;
+ COMMIT;
+ """)
+
+ mv12_refresh = s.sql.text("""
+
+ REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_issue_assignments with data;
+ COMMIT;
+ """)
+
+ mv13_refresh = s.sql.text("""
+
+ REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response with data;
+ COMMIT;
+ """)
try:
with DatabaseSession(logger, engine) as session:
@@ -125,7 +145,40 @@ def refresh_materialized_views():
logger.info(f"error is {e}")
pass
+ try:
+ with DatabaseSession(logger, engine) as session:
+ session.execute_sql(mv9_refresh)
+ except Exception as e:
+ logger.info(f"error is {e}")
+ pass
+
+ try:
+ with DatabaseSession(logger, engine) as session:
+ session.execute_sql(mv10_refresh)
+ except Exception as e:
+ logger.info(f"error is {e}")
+ pass
+ try:
+ with DatabaseSession(logger, engine) as session:
+ session.execute_sql(mv11_refresh)
+ except Exception as e:
+ logger.info(f"error is {e}")
+ pass
+
+ try:
+ with DatabaseSession(logger, engine) as session:
+ session.execute_sql(mv12_refresh)
+ except Exception as e:
+ logger.info(f"error is {e}")
+ pass
+
+ try:
+ with DatabaseSession(logger, engine) as session:
+ session.execute_sql(mv13_refresh)
+ except Exception as e:
+ logger.info(f"error is {e}")
+ pass
diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py
index b8eb8b203..fffd79d33 100644
--- a/augur/tasks/frontend.py
+++ b/augur/tasks/frontend.py
@@ -30,15 +30,15 @@ def add_org_repo_list(user_id, group_name, urls):
valid_repos = []
for url in urls:
- # matches https://github.com/{org}/ or htts://github.com/{org}
+ # matches https://github.com/{org}/ or http://github.com/{org}
if Repo.parse_github_org_url(url):
- added = user.add_org(group_name, url)[0]
+ added = user.add_github_org(group_name, url)[0]
if added:
valid_orgs.append(url)
- # matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo}
+ # matches https://github.com/{org}/{repo}/ or http://github.com/{org}/{repo}
elif Repo.parse_github_repo_url(url)[0]:
- added = user.add_repo(group_name, url)[0]
+ added = user.add_github_repo(group_name, url)[0]
if added:
valid_repos.append(url)
@@ -46,7 +46,7 @@ def add_org_repo_list(user_id, group_name, urls):
elif (match := parse_org_and_repo_name(url)):
org, repo = match.groups()
repo_url = f"https://github.com/{org}/{repo}/"
- added = user.add_repo(group_name, repo_url)[0]
+ added = user.add_github_repo(group_name, repo_url)[0]
if added:
valid_repos.append(url)
@@ -54,9 +54,17 @@ def add_org_repo_list(user_id, group_name, urls):
elif (match := parse_org_name(url)):
org = match.group(1)
org_url = f"https://github.com/{org}/"
- added = user.add_org(group_name, org_url)[0]
+ added = user.add_github_org(group_name, org_url)[0]
if added:
valid_orgs.append(url)
+
+ # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
+ elif Repo.parse_gitlab_repo_url(url)[0]:
+
+ added = user.add_gitlab_repo(group_name, url)[0]
+ if added:
+ valid_repos.append(url)
+
else:
invalid_urls.append(url)
@@ -66,24 +74,25 @@ def add_org_repo_list(user_id, group_name, urls):
-
+# TODO: Change to github specific
@celery.task
def add_repo(user_id, group_name, repo_url):
logger = logging.getLogger(add_org.__name__)
with GithubTaskSession(logger) as session:
- result = UserRepo.add(session, repo_url, user_id, group_name)
+ result = UserRepo.add_github_repo(session, repo_url, user_id, group_name)
print(repo_url, result)
+# TODO: Change to github specific
@celery.task
def add_org(user_id, group_name, org_url):
logger = logging.getLogger(add_org.__name__)
with GithubTaskSession(logger) as session:
- result = UserRepo.add_org_repos(session, org_url, user_id, group_name)
+ result = UserRepo.add_github_org_repos(session, org_url, user_id, group_name)
print(org_url, result)
diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py
index c763a2a2c..ee3dc047f 100644
--- a/augur/tasks/git/facade_tasks.py
+++ b/augur/tasks/git/facade_tasks.py
@@ -31,7 +31,8 @@
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_facade_weight_time_factor, get_repo_commit_count, update_facade_scheduling_fields, get_facade_weight_with_commit_count, facade_bulk_insert_commits
from augur.tasks.github.facade_github.tasks import *
-from augur.tasks.util.collection_util import CollectionState, get_collection_status_repo_git_from_filter
+from augur.tasks.util.collection_state import CollectionState
+from augur.tasks.util.collection_util import get_collection_status_repo_git_from_filter
from augur.tasks.git.util.facade_worker.facade_worker.repofetch import GitCloneError, git_repo_initialize
diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 304574bc8..cf7d2d1e5 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -6,20 +6,24 @@
from augur.tasks.github.util.util import parse_json_response
import logging
from datetime import datetime
-from enum import Enum
+from augur.tasks.util.collection_state import CollectionState
from augur.application.db.util import execute_session_query
-class CollectionState(Enum):
- SUCCESS = "Success"
- PENDING = "Pending"
- ERROR = "Error"
- COLLECTING = "Collecting"
-def update_repo_with_dict(current_dict,new_dict,logger,db):
-
+def update_repo_with_dict(repo,new_dict,logger,db):
+ """
+ Update a repository record in the database using a dictionary tagged with
+ the appropriate table fields
+
+ Args:
+ repo: orm repo object to update
+ new_dict: dict of new values to add to the repo record
+ logger: logging object
+ db: db object
+ """
- to_insert = current_dict
+ to_insert = repo.__dict__
del to_insert['_sa_instance_state']
to_insert.update(new_dict)
@@ -45,7 +49,6 @@ def ping_github_for_repo_move(augur_db, key_auth, repo, logger,collection_hook='
owner, name = get_owner_repo(repo.repo_git)
url = f"https://api.github.com/repos/{owner}/{name}"
- current_repo_dict = repo.__dict__
attempts = 0
while attempts < 10:
@@ -56,64 +59,71 @@ def ping_github_for_repo_move(augur_db, key_auth, repo, logger,collection_hook='
attempts += 1
- #Mark as errored if not found
- if response_from_gh.status_code == 404:
- logger.error(f"Repo {repo.repo_git} responded 404 when pinged!")
+ #Update Url and retry if 301
+ #301 moved permanently
+ if response_from_gh.status_code == 301:
+
+ owner, name = extract_owner_and_repo_from_endpoint(key_auth, response_from_gh.headers['location'], logger)
+ try:
+ old_description = str(repo.description)
+ except Exception:
+ old_description = ""
+
+ #Create new repo object to update existing
repo_update_dict = {
- 'repo_git': repo.repo_git,
- 'repo_path': None,
- 'repo_name': None,
- 'description': f"During our check for this repo on {datetime.today().strftime('%Y-%m-%d')}, a 404 error was returned. The repository does not appear to have moved. Instead, it appears to be deleted",
- 'data_collection_date': datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
+ 'repo_git': f"https://github.com/{owner}/{name}",
+ 'repo_path': None,
+ 'repo_name': None,
+ 'description': f"(Originally hosted at {url}) {old_description}"
}
- update_repo_with_dict(current_repo_dict, repo_update_dict, logger, augur_db)
-
- raise Exception(f"ERROR: Repo not found at requested host {repo.repo_git}")
- elif attempts >= 10:
- logger.warning(f"Could not check if repo moved because the api timed out 10 times. Url: {url}")
- return
-
+ update_repo_with_dict(repo, repo_update_dict, logger,augur_db)
- #skip if not moved
- #301 moved permanently
- if response_from_gh.status_code != 301:
- logger.info(f"Repo found at url: {url}")
- return
+ raise Exception("ERROR: Repo has moved! Resetting Collection!")
- owner, name = extract_owner_and_repo_from_endpoint(key_auth, response_from_gh.headers['location'], logger)
-
-
- try:
- old_description = str(repo.description)
- except:
- old_description = ""
+ #Mark as ignore if 404
+ if response_from_gh.status_code == 404:
+ repo_update_dict = {
+ 'repo_git': repo.repo_git,
+ 'repo_path': None,
+ 'repo_name': None,
+ 'description': f"During our check for this repo on {datetime.today().strftime('%Y-%m-%d')}, a 404 error was returned. The repository does not appear to have moved. Instead, it appears to be deleted",
+ 'data_collection_date': datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
+ }
- #Create new repo object to update existing
- repo_update_dict = {
- 'repo_git': f"https://github.com/{owner}/{name}",
- 'repo_path': None,
- 'repo_name': None,
- 'description': f"(Originally hosted at {url}) {old_description}"
- }
+ update_repo_with_dict(repo, repo_update_dict, logger, augur_db)
- update_repo_with_dict(current_repo_dict, repo_update_dict, logger,augur_db)
+ statusQuery = augur_db.session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo.repo_id)
- statusQuery = augur_db.session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo.repo_id)
+ collectionRecord = execute_session_query(statusQuery,'one')
- collectionRecord = execute_session_query(statusQuery,'one')
- if collection_hook == 'core':
- collectionRecord.core_status = CollectionState.PENDING.value
+ collectionRecord.core_status = CollectionState.IGNORE.value
collectionRecord.core_task_id = None
collectionRecord.core_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
- elif collection_hook == 'secondary':
- collectionRecord.secondary_status = CollectionState.PENDING.value
+
+ collectionRecord.secondary_status = CollectionState.IGNORE.value
collectionRecord.secondary_task_id = None
collectionRecord.secondary_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
- augur_db.session.commit()
+ collectionRecord.facade_status = CollectionState.IGNORE.value
+ collectionRecord.facade_task_id = None
+ collectionRecord.facade_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
- raise Exception("ERROR: Repo has moved! Marked repo as pending and stopped collection")
+ collectionRecord.ml_status = CollectionState.IGNORE.value
+ collectionRecord.ml_task_id = None
+ collectionRecord.ml_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
+
+ augur_db.session.commit()
+ raise Exception("ERROR: Repo has moved! Resetting Collection!")
+
+
+ if attempts >= 10:
+ logger.error(f"Could not check if repo moved because the api timed out 10 times. Url: {url}")
+ raise Exception(f"ERROR: Could not get api response for repo: {url}")
+ #skip if not 404
+ logger.info(f"Repo found at url: {url}")
+ return
+
diff --git a/augur/tasks/github/events/tasks.py b/augur/tasks/github/events/tasks.py
index 129afd0de..640079d85 100644
--- a/augur/tasks/github/events/tasks.py
+++ b/augur/tasks/github/events/tasks.py
@@ -210,9 +210,11 @@ def update_issue_closed_cntrbs_from_events(engine, repo_id):
SELECT issue_id, cntrb_id from RankedIssues where rn=1 and repo_id={repo_id} and cntrb_id is not NULL
""")
- result = engine.execute(get_ranked_issues).fetchall()
- update_data = [{'issue_id': row['issue_id'], 'cntrb_id': row['cntrb_id'], 'repo_id': repo_id} for row in result]
+ with engine.connect() as conn:
+ result = conn.execute(get_ranked_issues).fetchall()
+
+ update_data = [{'issue_id': row[0], 'cntrb_id': row[1], 'repo_id': repo_id} for row in result]
with engine.connect() as connection:
update_stmt = s.text("""
UPDATE issues
diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py
index 577f17c32..26d102753 100644
--- a/augur/tasks/github/facade_github/tasks.py
+++ b/augur/tasks/github/facade_github/tasks.py
@@ -252,8 +252,8 @@ def insert_facade_contributors(repo_id):
""").bindparams(repo_id=repo_id)
#Execute statement with session.
- result = manifest.augur_db.execute_sql(new_contrib_sql).fetchall()
- new_contribs = [dict(zip(row.keys(), row)) for row in result]
+ result = manifest.augur_db.execute_sql(new_contrib_sql)
+ new_contribs = [dict(row) for row in result.mappings()]
#print(new_contribs)
@@ -303,8 +303,8 @@ def insert_facade_contributors(repo_id):
#existing_cntrb_emails = json.loads(pd.read_sql(resolve_email_to_cntrb_id_sql, self.db, params={
# 'repo_id': repo_id}).to_json(orient="records"))
- result = session.execute_sql(resolve_email_to_cntrb_id_sql).fetchall()
- existing_cntrb_emails = [dict(zip(row.keys(), row)) for row in result]
+ result = session.execute_sql(resolve_email_to_cntrb_id_sql)
+ existing_cntrb_emails = [dict(row) for row in result.mappings()]
print(existing_cntrb_emails)
link_commits_to_contributor(session,list(existing_cntrb_emails))
diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py
index 5380b8bf1..0ba793470 100644
--- a/augur/tasks/github/issues/tasks.py
+++ b/augur/tasks/github/issues/tasks.py
@@ -195,7 +195,7 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None:
issue_assignee_dicts += add_key_value_pair_to_dicts(other_issue_data["assignees"], "issue_id", issue_id)
- logger.info(f"{task_name}: Inserting other issue data of lengths: Labels: {len(issue_label_dicts)} - Assignees: {len(issue_assignee_dicts)}")
+ logger.info(f"{task_name}: Inserting other github issue data of lengths: Labels: {len(issue_label_dicts)} - Assignees: {len(issue_assignee_dicts)}")
# inserting issue labels
# we are using label_src_id and issue_id to determine if the label is already in the database.
diff --git a/augur/tasks/github/messages/tasks.py b/augur/tasks/github/messages/tasks.py
index 6e23434ba..4dfd3a634 100644
--- a/augur/tasks/github/messages/tasks.py
+++ b/augur/tasks/github/messages/tasks.py
@@ -187,7 +187,8 @@ def process_messages(messages, task_name, repo_id, logger, augur_db):
message_string_fields = ["msg_text"]
message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys,
return_columns=message_return_columns, string_fields=message_string_fields)
-
+ if message_return_data is None:
+ return
pr_message_ref_dicts = []
issue_message_ref_dicts = []
diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py
index e7ebcd945..81b4c4397 100644
--- a/augur/tasks/github/pull_requests/files_model/core.py
+++ b/augur/tasks/github/pull_requests/files_model/core.py
@@ -20,8 +20,8 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth):
pr_numbers = []
#pd.read_sql(pr_number_sql, self.db, params={})
- result = augur_db.execute_sql(pr_number_sql).fetchall()
- pr_numbers = [dict(zip(row.keys(), row)) for row in result]
+ result = augur_db.execute_sql(pr_number_sql)#.fetchall()
+ pr_numbers = [dict(row) for row in result.mappings()]
query = augur_db.session.query(Repo).filter(Repo.repo_id == repo_id)
repo = execute_session_query(query, 'one')
diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py
index 3af6e39e0..8db394754 100644
--- a/augur/tasks/github/pull_requests/tasks.py
+++ b/augur/tasks/github/pull_requests/tasks.py
@@ -74,9 +74,18 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth) -> None:
return all_data
-
-def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db):
+def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db):
+ """
+ Parse and insert all retrieved PR data.
+
+ Arguments:
+ pull_requests: List of paginated pr endpoint data
+ task_name: Name of the calling task and the repo
+ repo_id: augur id of the repository
+ logger: logging object
+ augur_db: sqlalchemy db object
+ """
tool_source = "Pr Task"
tool_version = "2.0"
data_source = "Github API"
diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py
index f3050fc1b..5957d4cb5 100644
--- a/augur/tasks/github/releases/core.py
+++ b/augur/tasks/github/releases/core.py
@@ -84,7 +84,8 @@ def insert_release(augur_db, logger, repo_id, owner, release, tag_only = False):
release_inf = get_release_inf(repo_id, release, tag_only)
#Do an upsert
- augur_db.insert_data(release_inf,Release,['release_id'])
+ string_fields = ["release_name", "release_description", "release_author", "release_tag_name"]
+ augur_db.insert_data(release_inf,Release,['release_id'], string_fields=string_fields)
logger.info(f"Inserted info for {owner}/{repo_id}/{release['name']}\n")
diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py
index 8a19430e8..20ce07f06 100644
--- a/augur/tasks/github/util/github_api_key_handler.py
+++ b/augur/tasks/github/util/github_api_key_handler.py
@@ -32,7 +32,7 @@ def __init__(self, session: DatabaseSession):
self.logger = session.logger
self.config = AugurConfig(self.logger, session)
- self.oauth_redis_key = "oauth_keys_list"
+ self.oauth_redis_key = "github_oauth_keys_list"
self.redis_key_list = RedisList(self.oauth_redis_key)
diff --git a/augur/tasks/github/util/github_paginator.py b/augur/tasks/github/util/github_paginator.py
index 548d25b0f..31c14565d 100644
--- a/augur/tasks/github/util/github_paginator.py
+++ b/augur/tasks/github/util/github_paginator.py
@@ -154,6 +154,8 @@ class GithubApiResult(Enum):
SECONDARY_RATE_LIMIT = 4
RATE_LIMIT_EXCEEDED = 5
ABUSE_MECHANISM_TRIGGERED = 6
+ # TODO: Add bad credentials detection that removes key
+ # from redis if bad credentials are detected
BAD_CREDENTIALS = 7
HTML = 8
EMPTY_STRING = 9
diff --git a/augur/tasks/github/util/util.py b/augur/tasks/github/util/util.py
index fbb23dd6e..42989dcca 100644
--- a/augur/tasks/github/util/util.py
+++ b/augur/tasks/github/util/util.py
@@ -54,10 +54,21 @@ def parse_json_response(logger: logging.Logger, response: httpx.Response) -> dic
try:
return response.json()
except json.decoder.JSONDecodeError as e:
- logger.warning(f"invalid return from GitHub. Response was: {response.text}. Exception: {e}")
+ logger.warning(f"invalid return. Response was: {response.text}. Exception: {e}")
return json.loads(json.dumps(response.text))
def get_repo_weight_by_issue(logger,repo_git):
+ """
+ Retrieve the sum of the number of issues and prs in a repository from a graphql query.
+
+ Arguments:
+ logger: logger object
+ repo_git: repository url
+
+ Returns:
+ Sum of issues and prs for that repo
+ """
+
from augur.tasks.github.util.gh_graphql_entities import GitHubRepo as GitHubRepoGraphql
owner,name = get_owner_repo(repo_git)
diff --git a/augur/tasks/gitlab/events_task.py b/augur/tasks/gitlab/events_task.py
new file mode 100644
index 000000000..8058831ba
--- /dev/null
+++ b/augur/tasks/gitlab/events_task.py
@@ -0,0 +1,209 @@
+"""
+Module to define the task methods to collect gitlab event data for augur
+"""
+import logging
+
+from augur.tasks.init.celery_app import celery_app as celery
+from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
+from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler
+from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest
+from augur.application.db.data_parse import extract_gitlab_mr_event_data, extract_gitlab_issue_event_data
+from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts
+from augur.application.db.models import Repo, Issue, IssueEvent, PullRequest, PullRequestEvent
+from augur.application.db.util import execute_session_query
+
+platform_id = 2
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_gitlab_issue_events(repo_git) -> int:
+ """
+ Retrieve and parse gitlab events for the desired repo
+
+ Arguments:
+ repo_git: the repo url string
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger = logging.getLogger(collect_gitlab_issue_events.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ events = retrieve_all_gitlab_event_data("issue", repo_git, logger, manifest.key_auth)
+
+ if events:
+ logger.info(f"Length of gitlab issue events: {len(events)}")
+ process_issue_events(events, f"{owner}/{repo}: Gitlab Issue Events task", repo_id, logger, augur_db)
+ else:
+ logger.info(f"{owner}/{repo} has no gitlab issue events")
+
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_gitlab_merge_request_events(repo_git) -> int:
+ """
+ Retrieve and parse gitlab mrs for the desired repo
+
+ Arguments:
+ repo_git: the repo url string
+ """
+
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger = logging.getLogger(collect_gitlab_issue_events.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ events = retrieve_all_gitlab_event_data("merge_request", repo_git, logger, manifest.key_auth)
+
+ if events:
+ logger.info(f"Length of gitlab merge request events: {len(events)}")
+ process_mr_events(events, f"{owner}/{repo}: Gitlab MR Events task", repo_id, logger, augur_db)
+ else:
+ logger.info(f"{owner}/{repo} has no gitlab merge request events")
+
+
+def retrieve_all_gitlab_event_data(gtype, repo_git, logger, key_auth) -> None:
+ """
+ Retrieve only the needed data for mr label data from the api response
+
+ Arguments:
+ gtype: type of event data
+ repo_git: url of the relevant repo
+ logger: loggin object
+ key_auth: key auth cache and rotator object
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger.info(f"Collecting gitlab issue events for {owner}/{repo}")
+
+ url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type={gtype}"
+ events = GitlabApiHandler(key_auth, logger)
+
+ all_data = []
+ num_pages = events.get_num_pages(url)
+ for page_data, page in events.iter_pages(url):
+
+ if page_data is None:
+ return all_data
+
+ if len(page_data) == 0:
+ logger.debug(
+ f"{owner}/{repo}: Gitlab {gtype} Events Page {page} contains no data...returning")
+ logger.info(f"{owner}/{repo}: {gtype} Events Page {page} of {num_pages}")
+ return all_data
+
+ logger.info(f"{owner}/{repo}: Gitlab {gtype} Events Page {page} of {num_pages}")
+
+ all_data += page_data
+
+ return all_data
+
+def process_issue_events(events, task_name, repo_id, logger, augur_db):
+ """
+ Retrieve only the needed data for mr label data from the api response
+
+ Arguments:
+ events: List of dictionaries of issue event data
+ task_name: name of the task as well as the repo being processed
+ repo_id: augur id of the repo
+ logger: logging object
+ augur_db: sqlalchemy db object
+ """
+
+ tool_source = "Gitlab issue events task"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ issue_event_dicts = []
+
+ # create mapping from issue number to issue id of current issues
+ issue_url_to_id_map = {}
+ issues = augur_db.session.query(Issue).filter(Issue.repo_id == repo_id).all()
+ for issue in issues:
+ issue_url_to_id_map[issue.gh_issue_number] = issue.issue_id
+
+ for event in events:
+
+ issue_number = event["target_iid"]
+
+ try:
+ issue_id = issue_url_to_id_map[issue_number]
+ except KeyError:
+ logger.info(f"{task_name}: Could not find related issue")
+ logger.info(f"{task_name}: We were searching for an issue with number {issue_number} in repo {repo_id}")
+ logger.info(f"{task_name}: Skipping")
+ continue
+
+ issue_event_dicts.append(
+ extract_gitlab_issue_event_data(event, issue_id, platform_id, repo_id,
+ tool_source, tool_version, data_source)
+ )
+
+ logger.info(f"{task_name}: Inserting {len(issue_event_dicts)} gitlab issue events")
+ issue_event_natural_keys = ["issue_id", "issue_event_src_id"]
+ augur_db.insert_data(issue_event_dicts, IssueEvent, issue_event_natural_keys)
+
+
+def process_mr_events(events, task_name, repo_id, logger, augur_db):
+ """
+ Retrieve only the needed data for mr events from the api response
+
+ Arguments:
+ labels: List of dictionaries of label data
+ repo_id: augur id of the repository
+ tool_source: The part of augur that processed the data
+ tool_version: The version of the augur task that processed the data
+ data_source: The source of the data
+
+
+ Returns:
+ List of parsed label dicts
+ """
+
+ tool_source = "Gitlab mr events task"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ mr_event_dicts = []
+
+ # create mapping from mr number to pull request id of current mrs
+ mr_number_to_id_map = {}
+ mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all()
+ for mr in mrs:
+ mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id
+
+ for event in events:
+
+ mr_number = event["target_iid"]
+
+ try:
+ issue_id = mr_number_to_id_map[mr_number]
+ except KeyError:
+ logger.info(f"{task_name}: Could not find related mr")
+ logger.info(f"{task_name}: We were searching for an mr with number {mr_number} in repo {repo_id}")
+ logger.info(f"{task_name}: Skipping")
+ continue
+
+ mr_event_dicts.append(
+ extract_gitlab_mr_event_data(event, issue_id, platform_id, repo_id,
+ tool_source, tool_version, data_source)
+ )
+
+ # TODO: Add unique key for this
+ logger.info(f"{task_name}: Inserting {len(mr_event_dicts)} gitlab mr events")
+ mr_event_natural_keys = ["pull_request_id", "issue_event_src_id"]
+ augur_db.insert_data(mr_event_dicts, PullRequestEvent, mr_event_natural_keys)
+
+
diff --git a/augur/tasks/gitlab/gitlab_api_handler.py b/augur/tasks/gitlab/gitlab_api_handler.py
new file mode 100644
index 000000000..5303d606e
--- /dev/null
+++ b/augur/tasks/gitlab/gitlab_api_handler.py
@@ -0,0 +1,386 @@
+"""
+Defines a GitlabApiHandler class to paginate and handle interaction with GitLab's
+api through automatic use of relevant key auth and pagination tools.
+"""
+import httpx
+import time
+import logging
+
+from typing import List, Optional, Union, Generator, Tuple
+from urllib.parse import urlencode, urlparse, parse_qs, urlunparse
+from enum import Enum
+
+from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth
+from augur.tasks.github.util.util import parse_json_response
+
+class GitlabApiResult(Enum):
+ """All the different results of querying the Gitlab API."""
+
+ SUCCESS = 0
+ TIMEOUT = 1
+ NO_MORE_ATTEMPTS = 2
+ NOT_FOUND = 3
+ SECONDARY_RATE_LIMIT = 4
+ RATE_LIMIT_EXCEEDED = 5
+ ABUSE_MECHANISM_TRIGGERED = 6
+ # TODO: Add bad credentials detection that removes key
+ # from redis if bad credentials are detected
+ BAD_CREDENTIALS = 7
+
+class GitlabApiHandler():
+ """This class is a sequence that handles retrieving data from the Gitlab API.
+
+ Attributes:
+ url (str): The url that we are collecting data
+ key_mangager (GitlabRandomKeyAuth): Custom httpx auth class
+ that randomizes the github api key a request gets.
+ This is how the requests are getting their api keys
+ logger (logging.Logger): Logger that handler printing information to files and stdout
+ """
+
+ def __init__(self, key_manager: GitlabRandomKeyAuth, logger: logging.Logger):
+ """Initialize the class GitlabPaginator.
+
+ Args:
+ url: url that the data is being collected
+ key_manager: class that randomly selects a Gitlab API key for each request
+ logger: handles logging
+ from_datetime: collects data after this datatime (not yet implemented)
+ to_datetime: collects data before this datatime (not yet implemented)
+ """
+ self.key_manager = key_manager
+ self.logger = logger
+
+ def get_length(self, url):
+ """Get the length of the Gitlab API data.
+
+ Returns:
+ The length of the Gitlab API data at the url.
+
+ Examples:
+ This function is called when len() is called on the GitlabPaginator class for example.
+
+ issues = GitlabPaginator(url, session.oauths, logger)
+ issue_len = len(issues)
+ """
+
+ num_pages = self.get_num_pages(url)
+
+ self.logger.info(f"Num pages: {num_pages}")
+
+ params = {"page": num_pages}
+ url = add_query_params(url, params)
+
+ # get the amount of data on last page
+ data, _, result = self.retrieve_data(url)
+
+ if result == GitlabApiResult.SUCCESS:
+ return (100 * (num_pages -1)) + len(data)
+
+ self.logger.debug("Unable to retrieve data length from api")
+ return 0
+
+ def iter(self, url) -> Generator[Optional[dict], None, None]:
+ """Provide data from Gitlab API via a generator that yields one dict at a time.
+
+ Yields:
+ A piece of data from the github api as the specified url
+ """
+
+ url = self._set_paginaton_query_params(url)
+
+ data_list, response, result = self.retrieve_data(url)
+
+ if result != GitlabApiResult.SUCCESS:
+ self.logger.debug("Failed to retrieve the data even though 10 attempts were given")
+ yield None
+ return
+
+ # yield the first page data
+ for data in data_list:
+ yield data
+
+ while 'next' in response.links.keys():
+ next_page = response.links['next']['url']
+
+ # Here we don't need to pass in params with the page, or the default params because the url from the headers already has those values
+ data_list, response, result = self.retrieve_data(next_page)
+
+ if result != GitlabApiResult.SUCCESS:
+ self.logger.debug("Failed to retrieve the data even though 10 attempts were given")
+ return
+
+ for data in data_list:
+ yield data
+
+ def iter_pages(self, url) -> Generator[Tuple[Optional[List[dict]], int], None, None]:
+ """Provide data from Gitlab API via a generator that yields a page of dicts at a time.
+
+ Returns:
+ A page of data from the Gitlab API at the specified url
+ """
+
+ url = self._set_paginaton_query_params(url)
+
+ # retrieves the data for the given url
+ data_list, response, result = self.retrieve_data(url)
+
+ if result != GitlabApiResult.SUCCESS:
+ self.logger.debug("Failed to retrieve the data even though 10 attempts were given")
+ yield None, None
+ return
+
+ # this retrieves the page for the given url
+ page_number = get_url_page_number(url)
+
+ # yields the first page of data and its page number
+ yield data_list, page_number
+
+ while 'next' in response.links.keys():
+
+ # gets the next page from the last responses header
+ next_page = response.links['next']['url']
+
+ # Here we don't need to pass in params with the page, or the default params because the url from the headers already has those values
+ data_list, response, result = self.retrieve_data(next_page)
+
+ if result != GitlabApiResult.SUCCESS:
+ self.logger.debug(f"Failed to retrieve the data for even though 10 attempts were given. Url: {next_page}")
+ return
+
+ page_number = get_url_page_number(next_page)
+
+ # if either the data or response is None then yield None and return
+ if data_list is None or response is None:
+ return
+
+ # yield the data from the page and its number
+ yield data_list, page_number
+
+ def retrieve_data(self, url: str) -> Tuple[Optional[List[dict]], Optional[httpx.Response]]:
+ """Attempt to retrieve data at given url.
+
+ Args:
+ url: The url to retrieve the data from
+
+ Returns
+ The response object from hitting the url and the data on the page
+ """
+
+ timeout = 30
+ timeout_count = 0
+ num_attempts = 1
+ while num_attempts <= 10:
+
+ response = hit_api(self.key_manager, url, self.logger, timeout)
+
+ num_attempts += 1
+
+ if response is None:
+ if timeout_count == 10:
+ self.logger.error(f"Request timed out 10 times for {url}")
+ return None, None, GitlabApiResult.TIMEOUT
+
+ timeout = timeout * 1.1
+ num_attempts += 1
+ continue
+
+ if response.status_code == 500:
+ self.logger.error(f"Gitlab returned {response.status_code} error when fetching {url}. Message: {response.json()}")
+ continue
+
+ if response.status_code == 429:
+
+ current_epoch = int(time.time())
+ epoch_when_key_resets = int(response.headers["ratelimit-reset"])
+ key_reset_time = epoch_when_key_resets - current_epoch
+
+ if key_reset_time < 0:
+ self.logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}")
+ key_reset_time = 0
+
+ self.logger.info(f"\n\n\nGitlab API rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)")
+ time.sleep(key_reset_time)
+ continue
+
+ if response.status_code == 404:
+ self.logger.info(f"ERROR: 404 not found for {url}")
+ return [], response, GitlabApiResult.NOT_FOUND
+
+ if response.status_code == 204:
+ return [], response, GitlabApiResult.SUCCESS
+
+ if response.status_code >= 200 and response.status_code <=299:
+
+ page_data = parse_json_response(self.logger, response)
+ return page_data, response, GitlabApiResult.SUCCESS
+
+ self.logger.warning(f"Unhandled gitlab response. Status code: {response.status_code}. Body: {response.json()}")
+
+
+
+ self.logger.error("Unable to collect data in 10 attempts")
+ return None, None, GitlabApiResult.NO_MORE_ATTEMPTS
+
+ def get_num_pages(self, url) -> Optional[int]:
+ """Get the number of pages of data that a url can paginate through.
+
+ Returns:
+ The number of pages a url can access
+ """
+
+ url = self._set_paginaton_query_params(url)
+
+ timeout: float = 5
+ num_attempts = 0
+ while num_attempts < 10:
+ r = self.hit_api(url=url, timeout=timeout, method="HEAD")
+
+ if r:
+ break
+
+ timeout = timeout * 1.2
+ else:
+ raise RuntimeError("Unable to get the number of pages of data in 10 attempts")
+
+ if 'last' not in r.links.keys():
+ return 1
+
+ # get the last url from header
+ last_page_url = r.links['last']['url']
+
+ parsed_url = urlparse(last_page_url)
+ try:
+ num_pages = int(parse_qs(parsed_url.query)['page'][0])
+ except (KeyError, ValueError):
+ return None
+
+ return num_pages
+
+ def hit_api(self, url, timeout, method):
+ """Attempt to retrieve data at given url.
+
+ Args:
+ url: The url to retrieve the data from
+ timeout: time to wait until timeout
+ method: GET, POST, etc.
+
+ Returns
+ The response object from hitting the url and the data on the page
+ """
+
+ return hit_api(self.key_manager, url, self.logger, timeout, method=method)
+
+ def _set_paginaton_query_params(self, url):
+
+ remove_fields = ["per_page", "page"]
+ url = clean_url(url, remove_fields)
+
+ # we need to add query params directly to the url, instead of passing the param to the httpx.Client.request
+ # this is because github will only append specified params to the links in the headers if they are a part
+ # of the url, and not the params with the request
+ params = {"per_page": 100}
+ url = add_query_params(url, params)
+
+ return url
+
+################################################################################
+
+# Url Helper Method to remove query parameters from the url
+def clean_url(url: str, keys: List[str]) -> str:
+ """Remove query params from url.
+
+ Args:
+ url: the url that is being modified
+ keys: the query params that are being removed
+
+ Returns:
+ A url with the params in keys removed
+ """
+ u = urlparse(url)
+ query = parse_qs(u.query, keep_blank_values=True)
+
+ for key in keys:
+ query.pop(key, None)
+
+ u = u._replace(query=urlencode(query, True))
+
+ return urlunparse(u)
+
+
+def add_query_params(url: str, additional_params: dict) -> str:
+ """Add query params to a url.
+
+ Args:
+ url: the url that is being modified
+ additional_params: key value pairs specifying the parameters to be added
+
+ Returns:
+ The url with the key value pairs in additional_params added as query params
+ """
+ url_components = urlparse(url)
+ original_params = parse_qs(url_components.query)
+ # Before Python 3.5 you could update original_params with
+ # additional_params, but here all the variables are immutable.
+ merged_params = {**original_params, **additional_params}
+ updated_query = urlencode(merged_params, doseq=True)
+ # _replace() is how you can create a new NamedTuple with a changed field
+ return url_components._replace(query=updated_query).geturl()
+
+
+def get_url_page_number(url: str) -> int:
+ """Parse the page number from the url.
+
+ Note:
+ If the url does not contain a page number the function returns 1
+
+ Args:
+ url: url to get the page number from
+
+ Returns:
+ The page number that the url contains
+ """
+ parsed_url = urlparse(url)
+ try:
+ # if page is not a url query param then this is page 1
+ page_number = int(parse_qs(parsed_url.query)['page'][0])
+
+ except KeyError:
+ return 1
+
+ return page_number
+
+################################################################################
+
+def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', ) -> Optional[httpx.Response]:
+ """Ping the api and get the data back for the page.
+
+ Returns:
+ A httpx response that contains the data. None if a timeout occurs
+ """
+ # self.logger.info(f"Hitting endpoint with {method} request: {url}...\n")
+
+ with httpx.Client() as client:
+
+ try:
+ response = client.request(
+ method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=True)
+
+ except TimeoutError:
+ logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n")
+ time.sleep(round(timeout))
+ return None
+ except httpx.TimeoutException:
+ logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n")
+ time.sleep(round(timeout))
+ return None
+ except httpx.NetworkError:
+ logger.info(f"Network Error. Sleeping {round(timeout)} seconds and trying again...\n")
+ time.sleep(round(timeout))
+ return None
+ except httpx.ProtocolError:
+ logger.info(f"Protocol Error. Sleeping {round(timeout*1.5)} seconds and trying again...\n")
+ time.sleep(round(timeout*1.5))
+ return None
+
+ return response
diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py
new file mode 100644
index 000000000..20bc1219c
--- /dev/null
+++ b/augur/tasks/gitlab/gitlab_api_key_handler.py
@@ -0,0 +1,176 @@
+"""
+Defines the handler logic needed to effectively fetch GitLab auth keys
+from either the redis cache or the database. Follows the same patterns as
+the github api key handler.
+"""
+import httpx
+import time
+import random
+
+from typing import Optional, List
+
+from augur.tasks.util.redis_list import RedisList
+from augur.application.db.session import DatabaseSession
+from augur.application.config import AugurConfig
+from sqlalchemy import func
+
+
+class NoValidKeysError(Exception):
+ """Defines an exception that is thrown when no gitlab keys are valid"""
+
+
+class GitlabApiKeyHandler():
+ """Handles Gitlab API key retrieval from the database and redis
+
+ Attributes:
+ session (DatabaseSession): Database connection
+ logger (logging.Logger): Handles all logs
+ oauth_redis_key (str): The key where the gitlab api keys are cached in redis
+ redis_key_list (RedisList): Acts like a python list, and interacts directly with the redis cache
+ config_key (str): The api key that is stored in the users config table
+ key: (List[str]): List of keys retrieve from database or cache
+ """
+
+ def __init__(self, session: DatabaseSession):
+
+ self.session = session
+ self.logger = session.logger
+ self.config = AugurConfig(self.logger, session)
+
+ self.oauth_redis_key = "gitlab_oauth_keys_list"
+
+ self.redis_key_list = RedisList(self.oauth_redis_key)
+
+ self.config_key = self.get_config_key()
+
+ self.keys = self.get_api_keys()
+
+ self.logger.info(f"Retrieved {len(self.keys)} gitlab api keys for use")
+
+ def get_random_key(self):
+ """Retrieves a random key from the list of keys
+
+ Returns:
+ A random gitlab api key
+ """
+
+ return random.choice(self.keys)
+
+ def get_config_key(self) -> str:
+ """Retrieves the users gitlab api key from their config table
+
+ Returns:
+ Github API key from config table
+ """
+ return self.config.get_value("Keys", "gitlab_api_key")
+
+ def get_api_keys_from_database(self) -> List[str]:
+ """Retieves all gitlab api keys from database
+
+ Note:
+ It retrieves all the keys from the database except the one defined in the users config
+
+ Returns:
+ Github api keys that are in the database
+ """
+ from augur.application.db.models import WorkerOauth
+
+ select = WorkerOauth.access_token
+ # randomizing the order at db time
+ #select.order_by(func.random())
+ where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'gitlab']
+
+ return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()]
+ #return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()]
+
+
+ def get_api_keys(self) -> List[str]:
+ """Retrieves all valid Github API Keys
+
+ Note:
+ It checks to see if the keys are in the redis cache first.
+ It removes bad keys before returning.
+ If keys were taken from the database, it caches all the valid keys that were found
+
+ Returns:
+ Valid Github api keys
+ """
+
+ redis_keys = list(self.redis_key_list)
+
+ if redis_keys:
+ return redis_keys
+
+ attempts = 0
+ while attempts < 3:
+
+ try:
+ keys = self.get_api_keys_from_database()
+ break
+ except Exception as e:
+ self.logger.error(f"Ran into issue when fetching key from database:\n {e}\n")
+ self.logger.error("Sleeping for 5 seconds...")
+ time.sleep(5)
+ attempts += 1
+
+ if self.config_key is not None:
+ keys += [self.config_key]
+
+ if len(keys) == 0:
+ return []
+
+ valid_keys = []
+ with httpx.Client() as client:
+
+ for key in keys:
+
+ # removes key if it returns "Bad Credentials"
+ if self.is_bad_api_key(client, key) is False:
+ valid_keys.append(key)
+ else:
+ print(f"WARNING: The key '{key}' is not a valid key. Hint: If valid in past it may have expired")
+
+ # just in case the mulitprocessing adds extra values to the list.
+ # we are clearing it before we push the values we got
+ self.redis_key_list.clear()
+
+ # add all the keys to redis
+ self.redis_key_list.extend(valid_keys)
+
+ if not valid_keys:
+ raise NoValidKeysError("No valid gitlab api keys found in the config or worker oauth table")
+
+
+ # shuffling the keys so not all processes get the same keys in the same order
+ #valid_now = valid_keys
+ #try:
+ #self.logger.info(f'valid keys before shuffle: {valid_keys}')
+ #valid_keys = random.sample(valid_keys, len(valid_keys))
+ #self.logger.info(f'valid keys AFTER shuffle: {valid_keys}')
+ #except Exception as e:
+ # self.logger.debug(f'{e}')
+ # valid_keys = valid_now
+ # pass
+
+ return valid_keys
+
+ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool:
+ """Determines if a Gitlab API key is bad
+
+ Args:
+ client: makes the http requests
+ oauth_key: gitlab api key that is being tested
+
+ Returns:
+ True if key is bad. False if the key is good
+ """
+
+ url = "https://gitlab.com/api/v4/user"
+
+ headers = {'Authorization': f'Bearer {oauth_key}'}
+
+ response = client.request(method="GET", url=url, headers=headers, timeout=180)
+ if response.status_code == 401:
+ return True
+
+ return False
\ No newline at end of file
diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py
new file mode 100644
index 000000000..64ba31dd1
--- /dev/null
+++ b/augur/tasks/gitlab/gitlab_random_key_auth.py
@@ -0,0 +1,26 @@
+"""Defines the GitlabRandomKeyAuth class"""
+
+from augur.tasks.util.random_key_auth import RandomKeyAuth
+from augur.tasks.gitlab.gitlab_api_key_handler import GitlabApiKeyHandler
+from augur.application.db.session import DatabaseSession
+
+
+class GitlabRandomKeyAuth(RandomKeyAuth):
+ """Defines a gitlab specific RandomKeyAuth class so
+ gitlab collections can have a class randomly selects an api key for each request
+ """
+
+ def __init__(self, session: DatabaseSession, logger):
+ """Creates a GitlabRandomKeyAuth object and initializes the RandomKeyAuth parent class"""
+
+
+ # gets the gitlab api keys from the database via the GitlabApiKeyHandler
+ gitlab_api_keys = GitlabApiKeyHandler(session).keys
+
+ if not gitlab_api_keys:
+ print("Failed to find github api keys. This is usually because your key has expired")
+
+ header_name = "Authorization"
+ key_format = "Bearer {0}"
+
+ super().__init__(gitlab_api_keys, header_name, session.logger, key_format)
\ No newline at end of file
diff --git a/augur/tasks/gitlab/gitlab_task_session.py b/augur/tasks/gitlab/gitlab_task_session.py
new file mode 100644
index 000000000..58a6e6437
--- /dev/null
+++ b/augur/tasks/gitlab/gitlab_task_session.py
@@ -0,0 +1,55 @@
+"""
+Defines a GitLab-specific session and manifest object for use in GitLab tasks
+"""
+from logging import Logger
+
+from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth
+from augur.application.db.session import DatabaseSession
+
+class GitlabTaskManifest:
+ """
+ Manifest object that represents the state and common elements of
+ the specified task. GitLab version for the GitLab tasks.
+
+ Attributes:
+ augur_db: sqlalchemy db object
+ key_auth: GitLab specific key auth retrieval collection
+ logger: logging object
+ platform_id: GitLab specific platform id (github is 1)
+ """
+
+ def __init__(self, logger):
+
+ from augur.tasks.init.celery_app import engine
+
+ self.augur_db = DatabaseSession(logger, engine)
+ self.key_auth = GitlabRandomKeyAuth(self.augur_db.session, logger)
+ self.logger = logger
+ self.platform_id = 2
+
+ def __enter__(self):
+
+ return self
+
+ def __exit__(self, exception_type, exception_value, exception_traceback):
+
+ self.augur_db.close()
+
+class GitlabTaskSession(DatabaseSession):
+ """ORM session used in gitlab tasks.
+ This class adds the platform_id and the gitlab key authentication class,
+ to the already existing DatabaseSession so there is a central location to access
+ api keys and a single platform_id reference
+
+ Attributes:
+ oauths (GitlabRandomKeyAuth): Class that handles randomly assigning gitlab api keys to httpx requests
+ platform_id (int): The id that refers to the Gitlab platform
+ """
+
+ def __init__(self, logger: Logger, engine=None):
+
+ super().__init__(logger, engine=engine)
+
+ self.oauths = GitlabRandomKeyAuth(self, logger)
+ self.platform_id = 2
+
diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py
new file mode 100644
index 000000000..cf6e5e5da
--- /dev/null
+++ b/augur/tasks/gitlab/issues_task.py
@@ -0,0 +1,320 @@
+"""
+Defines the set of tasks used to retrieve GitLab issue data.
+"""
+import logging
+import traceback
+
+from augur.tasks.init.celery_app import celery_app as celery
+from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
+from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler
+from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest
+from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue, extract_needed_gitlab_issue_label_data, extract_needed_gitlab_issue_assignee_data, extract_needed_gitlab_issue_message_ref_data, extract_needed_gitlab_message_data
+from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts
+from augur.application.db.models import Issue, IssueLabel, IssueAssignee, IssueMessageRef, Message, Repo
+from augur.application.db.util import execute_session_query
+
+platform_id = 2
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_gitlab_issues(repo_git : str) -> int:
+ """
+ Retrieve and parse gitlab issues for the desired repo
+
+ Arguments:
+ repo_git: the repo url string
+ """
+
+ logger = logging.getLogger(collect_gitlab_issues.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ try:
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ owner, repo = get_owner_repo(repo_git)
+
+ issue_data = retrieve_all_gitlab_issue_data(repo_git, logger, manifest.key_auth)
+
+ if issue_data:
+ issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db)
+
+ return issue_ids
+ else:
+ logger.info(f"{owner}/{repo} has no issues")
+ return []
+ except Exception as e:
+ logger.error(f"Could not collect gitlab issues for repo {repo_git}\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}")
+ return -1
+
+
+
+def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None:
+ """
+ Retrieve only the needed data for issues from the api response
+
+ Arguments:
+ repo_git: url of the relevant repo
+ logger: loggin object
+ key_auth: key auth cache and rotator object
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger.info(f"Collecting gitlab issues for {owner}/{repo}")
+
+ url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues?with_labels_details=True"
+ issues = GitlabApiHandler(key_auth, logger)
+
+ all_data = []
+ num_pages = issues.get_num_pages(url)
+ for page_data, page in issues.iter_pages(url):
+
+ if page_data is None:
+ return all_data
+
+ if len(page_data) == 0:
+ logger.debug(
+ f"{owner}/{repo}: Gitlab Issues Page {page} contains no data...returning")
+ logger.info(f"{owner}/{repo}: Issues Page {page} of {num_pages}")
+ return all_data
+
+ logger.info(f"{owner}/{repo}: Gitlab Issues Page {page} of {num_pages}")
+
+ all_data += page_data
+
+ return all_data
+
+def process_issues(issues, task_name, repo_id, logger, augur_db) -> None:
+ """
+ Retrieve only the needed data for issues from the api response
+
+ Arguments:
+ issues: List of dictionaries of issue data
+ task_name: name of the task as well as the repo being processed
+ repo_id: augur id of the repo
+ logger: logging object
+ augur_db: sqlalchemy db object
+ """
+
+ # get repo_id or have it passed
+ tool_source = "Gitlab Issue Task"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ issue_dicts = []
+ issue_ids = []
+ issue_mapping_data = {}
+ for issue in issues:
+
+ issue_ids.append(issue["iid"])
+
+ issue_dicts.append(
+ extract_needed_issue_data_from_gitlab_issue(issue, repo_id, tool_source, tool_version, data_source)
+ )
+
+ issue_labels = extract_needed_gitlab_issue_label_data(issue["labels"], repo_id,
+ tool_source, tool_version, data_source)
+
+ issue_assignees = extract_needed_gitlab_issue_assignee_data(issue["assignees"], repo_id,
+ tool_source, tool_version, data_source)
+
+ mapping_data_key = issue["id"]
+ issue_mapping_data[mapping_data_key] = {
+ "labels": issue_labels,
+ "assignees": issue_assignees,
+ }
+
+
+ if len(issue_dicts) == 0:
+ print("No gitlab issues found while processing")
+ return
+
+ logger.info(f"{task_name}: Inserting {len(issue_dicts)} gitlab issues")
+ issue_natural_keys = ["repo_id", "gh_issue_id"]
+ issue_string_columns = ["issue_title", "issue_body"]
+ issue_return_columns = ["gh_issue_id", "issue_id"]
+
+ issue_return_data = augur_db.insert_data(issue_dicts, Issue, issue_natural_keys, return_columns=issue_return_columns, string_fields=issue_string_columns)
+
+ issue_label_dicts = []
+ issue_assignee_dicts = []
+ for data in issue_return_data:
+
+ gh_issue_id = data["gh_issue_id"]
+ issue_id = data["issue_id"]
+
+ try:
+ other_issue_data = issue_mapping_data[gh_issue_id]
+ except KeyError as e:
+ logger.info(f"{task_name}: Cold not find other gitlab issue data. This should never happen. Error: {e}")
+
+
+ # add the issue id to the lables and assignees, then add them to a list of dicts that will be inserted soon
+ dict_key = "issue_id"
+ issue_label_dicts += add_key_value_pair_to_dicts(other_issue_data["labels"], dict_key, issue_id)
+ issue_assignee_dicts += add_key_value_pair_to_dicts(other_issue_data["assignees"], dict_key, issue_id)
+
+
+ logger.info(f"{task_name}: Inserting other gitlab issue data of lengths: Labels: {len(issue_label_dicts)} - Assignees: {len(issue_assignee_dicts)}")
+
+ # inserting issue labels
+ # we are using label_src_id and issue_id to determine if the label is already in the database.
+ issue_label_natural_keys = ['label_src_id', 'issue_id']
+ issue_label_string_fields = ["label_text", "label_description"]
+ augur_db.insert_data(issue_label_dicts, IssueLabel,
+ issue_label_natural_keys, string_fields=issue_label_string_fields)
+
+ # inserting issue assignees
+ # we are using issue_assignee_src_id and issue_id to determine if the label is already in the database.
+ # issue_assignee_natural_keys = ['issue_assignee_src_id', 'issue_id']
+ # augur_db.insert_data(issue_assignee_dicts, IssueAssignee, issue_assignee_natural_keys)
+
+ return issue_ids
+
+
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_gitlab_issue_comments(issue_ids, repo_git) -> int:
+ """
+ Retrieve and parse gitlab events for the desired repo
+
+ Arguments:
+ issue_ids: Set of issue ids to collect coments for
+ repo_git: repo url
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger = logging.getLogger(collect_gitlab_issues.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ comments = retrieve_all_gitlab_issue_comments(manifest.key_auth, logger, issue_ids, repo_git)
+
+ if comments:
+ logger.info(f"Length of comments: {len(comments)}")
+ process_gitlab_issue_messages(comments, f"{owner}/{repo}: Gitlab issue messages task", repo_id, logger, augur_db)
+ else:
+ logger.info(f"{owner}/{repo} has no gitlab issue comments")
+
+
+def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git):
+ """
+ Retrieve only the needed data for issue comments
+
+ Arguments:
+ key_auth: key auth cache and rotator object
+ logger: loggin object
+ issue_ids: ids of issues to find comements for
+ repo_git: repo url
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ all_comments = {}
+ issue_count = len(issue_ids)
+ index = 1
+
+ comments = GitlabApiHandler(key_auth, logger)
+
+ for id in issue_ids:
+
+ logger.info(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}")
+
+ url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues/{id}/notes"
+
+ for page_data, page in comments.iter_pages(url):
+
+ if page_data is None or len(page_data) == 0:
+ break
+
+ if id in all_comments:
+ all_comments[id].extend(page_data)
+ else:
+ all_comments[id] = page_data
+
+ index += 1
+
+ return all_comments
+
+
+def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db):
+ """
+ Retrieve only the needed data for issue messages from the api response
+
+ Arguments:
+ data: List of dictionaries of issue event data
+ task_name: name of the task as well as the repo being processed
+ repo_id: augur id of the repo
+ logger: logging object
+ augur_db: sqlalchemy db object
+ """
+
+ tool_source = "Gitlab issue comments"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ # create mapping from mr number to pull request id of current mrs
+ issue_number_to_id_map = {}
+ issues = augur_db.session.query(Issue).filter(Issue.repo_id == repo_id).all()
+ for issue in issues:
+ issue_number_to_id_map[issue.gh_issue_number] = issue.issue_id
+
+ message_dicts = []
+ message_ref_mapping_data = {}
+ for id, messages in data.items():
+
+ try:
+ issue_id = issue_number_to_id_map[id]
+ except KeyError:
+ logger.info(f"{task_name}: Could not find related issue")
+ logger.info(f"{task_name}: We were searching for issue number {id} in repo {repo_id}")
+ logger.info(f"{task_name}: Skipping")
+ continue
+
+ for message in messages:
+
+ issue_message_ref_data = extract_needed_gitlab_issue_message_ref_data(message, issue_id, repo_id, tool_source, tool_version, data_source)
+
+ message_ref_mapping_data[message["id"]] = {
+ "msg_ref_data": issue_message_ref_data
+ }
+
+ message_dicts.append(
+ extract_needed_gitlab_message_data(message, platform_id, tool_source, tool_version, data_source)
+ )
+
+
+ logger.info(f"{task_name}: Inserting {len(message_dicts)} messages")
+ message_natural_keys = ["platform_msg_id"]
+ message_return_columns = ["msg_id", "platform_msg_id"]
+ message_string_fields = ["msg_text"]
+ message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys,
+ return_columns=message_return_columns, string_fields=message_string_fields)
+
+ issue_message_ref_dicts = []
+ for data in message_return_data:
+
+ augur_msg_id = data["msg_id"]
+ platform_message_id = data["platform_msg_id"]
+
+ ref = message_ref_mapping_data[platform_message_id]
+ message_ref_data = ref["msg_ref_data"]
+ message_ref_data["msg_id"] = augur_msg_id
+
+ issue_message_ref_dicts.append(message_ref_data)
+
+ logger.info(f"{task_name}: Inserting {len(issue_message_ref_dicts)} gitlab issue messages ref rows")
+ issue_message_ref_natural_keys = ["issue_id", "issue_msg_ref_src_comment_id"]
+ augur_db.insert_data(issue_message_ref_dicts, IssueMessageRef, issue_message_ref_natural_keys)
+
+
diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py
new file mode 100644
index 000000000..ccf3c7e01
--- /dev/null
+++ b/augur/tasks/gitlab/merge_request_task.py
@@ -0,0 +1,560 @@
+import logging
+
+from augur.tasks.init.celery_app import celery_app as celery
+from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
+from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler
+from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest
+from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_mr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data
+from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts
+from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message
+from augur.application.db.util import execute_session_query
+
+platform_id = 2
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_gitlab_merge_requests(repo_git: str) -> int:
+ """
+ Retrieve and parse gitlab MRs for the desired repo
+
+ Arguments:
+ repo_git: the repo url string
+ """
+
+
+ logger = logging.getLogger(collect_gitlab_merge_requests.__name__)
+
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ repo_id = augur_db.session.query(Repo).filter(
+ Repo.repo_git == repo_git).one().repo_id
+
+ owner, repo = get_owner_repo(repo_git)
+ mr_data = retrieve_all_mr_data(repo_git, logger, manifest.key_auth)
+
+ if mr_data:
+ mr_ids = process_merge_requests(mr_data, f"{owner}/{repo}: Mr task", repo_id, logger, augur_db)
+
+ return mr_ids
+ else:
+ logger.info(f"{owner}/{repo} has no merge requests")
+ return []
+
+
+def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None:
+ """
+ Retrieve only the needed data for MRs from the api response
+
+ Arguments:
+ repo_git: url of the relevant repo
+ logger: loggin object
+ key_auth: key auth cache and rotator object
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger.info(f"Collecting pull requests for {owner}/{repo}")
+
+ url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests?with_labels_details=True"
+ mrs = GitlabApiHandler(key_auth, logger)
+
+ all_data = []
+ num_pages = mrs.get_num_pages(url)
+ for page_data, page in mrs.iter_pages(url):
+
+ if page_data is None:
+ return all_data
+
+ if len(page_data) == 0:
+ logger.debug(
+ f"{owner}/{repo} Mrs Page {page} contains no data...returning")
+ logger.info(f"{owner}/{repo} Mrs Page {page} of {num_pages}")
+ return all_data
+
+ logger.info(f"{owner}/{repo} Mrs Page {page} of {num_pages}")
+
+ all_data += page_data
+
+ return all_data
+
+
+def process_merge_requests(data, task_name, repo_id, logger, augur_db):
+ """
+ Retrieve only the needed data for mr label data from the api response
+
+ Arguments:
+ data: collection of mr data
+ task_name: name of the task as well as the repo being processed
+ repo_id: augur id of the repo
+ logger: logging object
+ augur_db: sqlalchemy db object
+
+ Returns:
+ List of parsed MR ids.
+ """
+
+ tool_source = "Mr Task"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ merge_requests = []
+ mr_ids = []
+ mr_mapping_data = {}
+ for mr in data:
+
+ mr_ids.append(mr["iid"])
+
+ merge_requests.append(extract_needed_pr_data_from_gitlab_merge_request(mr, repo_id, tool_source, tool_version))
+
+ assignees = extract_needed_merge_request_assignee_data(mr["assignees"], repo_id, tool_source, tool_version, data_source)
+
+ labels = extract_needed_mr_label_data(mr["labels"], repo_id, tool_source, tool_version, data_source)
+
+ mapping_data_key = mr["id"]
+ mr_mapping_data[mapping_data_key] = {
+ "assignees": assignees,
+ "labels": labels
+ }
+
+ logger.info(f"{task_name}: Inserting mrs of length: {len(merge_requests)}")
+ pr_natural_keys = ["repo_id", "pr_src_id"]
+ pr_string_fields = ["pr_src_title", "pr_body"]
+ pr_return_columns = ["pull_request_id", "pr_src_id"]
+ pr_return_data = augur_db.insert_data(merge_requests, PullRequest, pr_natural_keys, return_columns=pr_return_columns, string_fields=pr_string_fields)
+
+
+ mr_assignee_dicts = []
+ mr_label_dicts = []
+ for data in pr_return_data:
+
+ mr_src_id = data["pr_src_id"]
+ pull_request_id = data["pull_request_id"]
+
+ try:
+ other_mr_data = mr_mapping_data[mr_src_id]
+ except KeyError as e:
+ logger.info(f"Cold not find other pr data. This should never happen. Error: {e}")
+
+ dict_key = "pull_request_id"
+ mr_assignee_dicts += add_key_value_pair_to_dicts(other_mr_data["assignees"], dict_key, pull_request_id)
+ mr_label_dicts += add_key_value_pair_to_dicts(other_mr_data["labels"], dict_key, pull_request_id)
+
+ logger.info(f"{task_name}: Inserting other pr data of lengths: Labels: {len(mr_label_dicts)} - Assignees: {len(mr_assignee_dicts)}")
+
+ # TODO: Setup unique key on asignees with a value of ('cntrb_id', 'pull_request_id') and add 'cntrb_id' to assingee data
+ # mr_assignee_natural_keys = ['pr_assignee_src_id', 'pull_request_id']
+ # augur_db.insert_data(mr_assignee_dicts, PullRequestAssignee, mr_assignee_natural_keys)
+
+ pr_label_natural_keys = ['pr_src_id', 'pull_request_id']
+ pr_label_string_fields = ["pr_src_description"]
+ augur_db.insert_data(mr_label_dicts, PullRequestLabel, pr_label_natural_keys, string_fields=pr_label_string_fields)
+
+ return mr_ids
+
+
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_merge_request_comments(mr_ids, repo_git) -> int:
+ """
+ Retrieve and parse gitlab events for the desired repo
+
+ Arguments:
+ mr_ids: ids of MRs to paginate comments for
+ repo_git: the repo url string
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger = logging.getLogger(collect_merge_request_comments.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/notes".format(owner=owner, repo=repo, id="{id}")
+ comments = retrieve_merge_request_data(mr_ids, url, "comments", owner, repo, manifest.key_auth, logger, response_type="list")
+
+ if comments:
+ logger.info(f"Length of merge request comments: {len(comments)}")
+ process_gitlab_mr_messages(comments, f"{owner}/{repo}: Gitlab mr messages task", repo_id, logger, augur_db)
+ else:
+ logger.info(f"{owner}/{repo} has no gitlab merge request comments")
+
+
+def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db):
+ """
+ Retrieve only the needed data for mr label data from the api response
+
+ Arguments:
+ data: List of dictionaries of mr message data
+ task_name: name of the task as well as the repo being processed
+ repo_id: augur id of the repo
+ logger: logging object
+ augur_db: sqlalchemy db object
+ """
+
+ tool_source = "Gitlab mr comments"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ # create mapping from mr number to pull request id of current mrs
+ mr_number_to_id_map = {}
+ mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all()
+ for mr in mrs:
+ mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id
+
+ message_dicts = []
+ message_ref_mapping_data = {}
+ for id, messages in data.items():
+
+ try:
+ pull_request_id = mr_number_to_id_map[id]
+ except KeyError:
+ logger.info(f"{task_name}: Could not find related mr")
+ logger.info(f"{task_name}: We were searching for mr number {id} in repo {repo_id}")
+ logger.info(f"{task_name}: Skipping")
+ continue
+
+ for message in messages:
+
+ mr_message_ref_data = extract_needed_gitlab_mr_message_ref_data(message, pull_request_id, repo_id, tool_source, tool_version, data_source)
+
+ message_ref_mapping_data[message["id"]] = {
+ "msg_ref_data": mr_message_ref_data
+ }
+
+ message_dicts.append(
+ extract_needed_gitlab_message_data(message, platform_id, tool_source, tool_version, data_source)
+ )
+
+
+ logger.info(f"{task_name}: Inserting {len(message_dicts)} messages")
+ message_natural_keys = ["platform_msg_id"]
+ message_return_columns = ["msg_id", "platform_msg_id"]
+ message_string_fields = ["msg_text"]
+ message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys,
+ return_columns=message_return_columns, string_fields=message_string_fields)
+
+ mr_message_ref_dicts = []
+ for data in message_return_data:
+
+ augur_msg_id = data["msg_id"]
+ platform_message_id = data["platform_msg_id"]
+
+ ref = message_ref_mapping_data[platform_message_id]
+ message_ref_data = ref["msg_ref_data"]
+ message_ref_data["msg_id"] = augur_msg_id
+
+ mr_message_ref_dicts.append(message_ref_data)
+
+ logger.info(f"{task_name}: Inserting {len(mr_message_ref_dicts)} mr messages ref rows")
+ mr_message_ref_natural_keys = ["pull_request_id", "pr_message_ref_src_comment_id"]
+ augur_db.insert_data(mr_message_ref_dicts, PullRequestMessageRef, mr_message_ref_natural_keys)
+
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_merge_request_metadata(mr_ids, repo_git) -> int:
+ """
+ Retrieve and parse gitlab events for the desired repo
+
+ Arguments:
+ mr_ids: list of mr ids to find metadata for
+ repo_git: the repo url string
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger = logging.getLogger(collect_merge_request_metadata.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}".format(owner=owner, repo=repo, id="{id}")
+ metadata_list = retrieve_merge_request_data(mr_ids, url, "metadata", owner, repo, manifest.key_auth, logger, response_type="dict")
+
+ if metadata_list:
+ logger.info(f"Length of merge request metadata: {len(metadata_list)}")
+ process_mr_metadata(metadata_list, f"{owner}/{repo}: Mr metadata task", repo_id, logger, augur_db)
+ else:
+ logger.info(f"{owner}/{repo} has no gitlab merge request metadata")
+
+def process_mr_metadata(data, task_name, repo_id, logger, augur_db):
+ """
+ Retrieve only the needed data for mr label data from the api response
+
+ Arguments:
+ data: List of dictionaries of mr metadata
+ task_name: name of the task as well as the repo being processed
+ repo_id: augur id of the repo
+ logger: logging object
+ augur_db: sqlalchemy db object
+ """
+
+ tool_source = "Mr Metadata Task"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ # create mapping from mr number to pull request id of current mrs
+ mr_number_to_id_map = {}
+ mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all()
+ for mr in mrs:
+ mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id
+
+ all_metadata = []
+ for id, metadata in data.items():
+
+ pull_request_id = mr_number_to_id_map[id]
+
+ all_metadata.extend(extract_needed_mr_metadata(metadata, repo_id, pull_request_id, tool_source, tool_version, data_source))
+
+ logger.info(f"{task_name}: Inserting {len(all_metadata)} merge request metadata")
+ pr_metadata_natural_keys = ['pull_request_id', 'pr_head_or_base', 'pr_sha']
+ augur_db.insert_data(all_metadata, PullRequestMeta, pr_metadata_natural_keys)
+
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_merge_request_reviewers(mr_ids, repo_git) -> int:
+ """
+ Retrieve and parse mr reviewers for the desired repo
+
+ Arguments:
+ mr_ids: mrs to search for reviewers for
+ repo_git: the repo url string
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger = logging.getLogger(collect_merge_request_reviewers.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/approvals".format(owner=owner, repo=repo, id="{id}")
+ reviewers = retrieve_merge_request_data(mr_ids, url, "reviewers", owner, repo, manifest.key_auth, logger, response_type="dict")
+
+ if reviewers:
+ logger.info(f"Length of merge request reviewers: {len(reviewers)}")
+ process_mr_reviewers(reviewers, f"{owner}/{repo}: Mr reviewer task", repo_id, logger, augur_db)
+ else:
+ logger.info(f"{owner}/{repo} has no gitlab merge request reviewers")
+
+def process_mr_reviewers(data, task_name, repo_id, logger, augur_db):
+ """
+ Retrieve only the needed data for mr Reviewer data from the api response
+
+ Arguments:
+ data: List of dictionaries of mr Reviewer data
+ repo_id: augur id of the repo
+ logger: logging object
+ augur_db: sqlalchemy db object
+ """
+
+ tool_source = "Mr Reviewer Task"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ logger.info(f"Running {task_name}...")
+
+ # create mapping from mr number to pull request id of current mrs
+ mr_number_to_id_map = {}
+ mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all()
+ for mr in mrs:
+ mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id
+
+ all_reviewers = []
+ for id, values in data.items():
+
+ pull_request_id = mr_number_to_id_map[id]
+
+ reviewers = extract_needed_mr_reviewer_data(values, pull_request_id, tool_source, tool_version, data_source)
+
+ all_reviewers += reviewers
+
+ # TODO: Need to add unique key with pull_request_id and cntrb_id to insert gitlab reviewers
+ # pr_reviewer_natural_keys = ["pull_request_id", "cntrb_id"]
+ # augur_db.insert_data(all_reviewers, PullRequestReviewer, pr_reviewer_natural_keys)
+
+
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_merge_request_commits(mr_ids, repo_git) -> int:
+ """
+ Retrieve and parse mr commits for the desired repo
+
+ Arguments:
+ mr_ids: ids of mrs to get commits for
+ repo_git: the repo url string
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger = logging.getLogger(collect_merge_request_commits.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/commits".format(owner=owner, repo=repo, id="{id}")
+ commits = retrieve_merge_request_data(mr_ids, url, "commits", owner, repo, manifest.key_auth, logger, response_type="list")
+
+ if commits:
+ logger.info(f"Length of merge request commits: {len(commits)}")
+ process_mr_commits(commits, f"{owner}/{repo}: Mr commit task", repo_id, logger, augur_db)
+ else:
+ logger.info(f"{owner}/{repo} has no gitlab merge request commits")
+
+
+def process_mr_commits(data, task_name, repo_id, logger, augur_db):
+ """
+ Retrieve only the needed data for mr commits from the api response
+
+ Arguments:
+ data: List of dictionaries of mr commit data
+ task_name: name of the task as well as the repo being processed
+ repo_id: augur id of the repo
+ logger: logging object
+ augur_db: sqlalchemy db object
+ """
+
+ tool_source = "Mr Commit Task"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ # create mapping from mr number to pull request id of current mrs
+ mr_number_to_id_map = {}
+ mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all()
+ for mr in mrs:
+ mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id
+
+ all_commits = []
+ for id, values in data.items():
+
+ pull_request_id = mr_number_to_id_map[id]
+
+ for commit in values:
+
+ all_commits.append(extract_needed_mr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source))
+
+
+ logger.info(f"{task_name}: Inserting {len(all_commits)} merge request commits")
+ pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"]
+ augur_db.insert_data(all_commits,PullRequestCommit,pr_commits_natural_keys)
+
+
+
+@celery.task(base=AugurCoreRepoCollectionTask)
+def collect_merge_request_files(mr_ids, repo_git) -> int:
+ """
+ Retrieve and parse gitlab events for the desired repo
+
+ Arguments:
+ mr_ids: the ids of mrs to get files for.
+ repo_git: the repo url string
+ """
+
+ owner, repo = get_owner_repo(repo_git)
+
+ logger = logging.getLogger(collect_merge_request_files.__name__)
+ with GitlabTaskManifest(logger) as manifest:
+
+ augur_db = manifest.augur_db
+
+ query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
+ repo_obj = execute_session_query(query, 'one')
+ repo_id = repo_obj.repo_id
+
+ url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/changes".format(owner=owner, repo=repo, id="{id}")
+ files = retrieve_merge_request_data(mr_ids, url, "files", owner, repo, manifest.key_auth, logger, response_type="dict")
+
+ if files:
+ logger.info(f"Length of merge request files: {len(files)}")
+ process_mr_files(files, f"{owner}/{repo}: Mr files task", repo_id, logger, augur_db)
+ else:
+ logger.info(f"{owner}/{repo} has no gitlab merge request files")
+
+def process_mr_files(data, task_name, repo_id, logger, augur_db):
+
+ tool_source = "Mr files Task"
+ tool_version = "2.0"
+ data_source = "Gitlab API"
+
+ # create mapping from mr number to pull request id of current mrs
+ mr_number_to_id_map = {}
+ mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all()
+ for mr in mrs:
+ mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id
+
+ all_files = []
+ for id, gitlab_file_data in data.items():
+
+ pull_request_id = mr_number_to_id_map[id]
+
+ all_files.extend(extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool_source, tool_version, data_source))
+
+ logger.info(f"{task_name}: Inserting {len(all_files)} merge request files")
+ pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"]
+ augur_db.insert_data(all_files, PullRequestFile, pr_file_natural_keys)
+
+
+def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, response_type):
+ """
+ Retrieve specific mr data from the GitLab api.
+
+ Arguments:
+ ids: mr ids to paginate info for
+ url: endpoint to paginate or hit
+ name: name of data to collect
+ owner: owner of the repo
+ repo: repo name
+ key_auth: key auth cache and rotator object
+ logger: loggin object
+ response_type: type of data to get from the api
+ """
+
+ all_data = {}
+ mr_count = len(ids)
+ index = 1
+
+ api_handler = GitlabApiHandler(key_auth, logger)
+ for id in ids:
+
+ print(f"Collecting {owner}/{repo} gitlab merge request {name} for merge request {index} of {mr_count}")
+ formatted_url = url.format(id=id)
+
+ if response_type == "dict":
+ page_data, _, _ = api_handler.retrieve_data(formatted_url)
+ if page_data:
+ all_data[id] = page_data
+
+ elif response_type == "list":
+
+ for page_data, _ in api_handler.iter_pages(formatted_url):
+
+ if page_data is None or len(page_data) == 0:
+ break
+
+ if id in all_data:
+ all_data[id].extend(page_data)
+ else:
+ all_data[id] = page_data
+ else:
+ raise Exception(f"Unexpected response type: {response_type}")
+
+ index += 1
+
+ return all_data
diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py
index 706541d1c..274305449 100644
--- a/augur/tasks/init/celery_app.py
+++ b/augur/tasks/init/celery_app.py
@@ -20,16 +20,7 @@
from augur.application.db.engine import get_database_string
from augur.tasks.init import get_redis_conn_values, get_rabbitmq_conn_string
from augur.application.db.models import CollectionStatus, Repo
-
-class CollectionState(Enum):
- SUCCESS = "Success"
- PENDING = "Pending"
- ERROR = "Error"
- COLLECTING = "Collecting"
- INITIALIZING = "Initializing"
- UPDATE = "Update"
- FAILED_CLONE = "Failed Clone"
-
+from augur.tasks.util.collection_state import CollectionState
logger = logging.getLogger(__name__)
@@ -50,6 +41,10 @@ class CollectionState(Enum):
'augur.tasks.github.pull_requests.commits_model.tasks',
'augur.tasks.github.traffic.tasks']
+gitlab_tasks = ['augur.tasks.gitlab.merge_request_task',
+ 'augur.tasks.gitlab.issues_task',
+ 'augur.tasks.gitlab.events_task']
+
git_tasks = ['augur.tasks.git.facade_tasks',
'augur.tasks.git.dependency_tasks.tasks',
'augur.tasks.git.dependency_libyear_tasks.tasks',
@@ -66,7 +61,7 @@ class CollectionState(Enum):
frontend_tasks = ['augur.tasks.frontend']
-tasks = start_tasks + github_tasks + git_tasks + materialized_view_tasks + frontend_tasks
+tasks = start_tasks + github_tasks + gitlab_tasks + git_tasks + materialized_view_tasks + frontend_tasks
if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1":
tasks += data_analysis_tasks
@@ -81,7 +76,7 @@ class CollectionState(Enum):
#Classes for tasks that take a repo_git as an argument.
class AugurCoreRepoCollectionTask(celery.Task):
- def augur_handle_task_failure(self,exc,task_id,repo_git,logger_name,collection_hook='core'):
+ def augur_handle_task_failure(self,exc,task_id,repo_git,logger_name,collection_hook='core',after_fail=CollectionState.ERROR.value):
from augur.tasks.init.celery_app import engine
logger = AugurLogger(logger_name).get_logger()
@@ -100,7 +95,7 @@ def augur_handle_task_failure(self,exc,task_id,repo_git,logger_name,collection_h
prevStatus = getattr(repoStatus, f"{collection_hook}_status")
if prevStatus == CollectionState.COLLECTING.value or prevStatus == CollectionState.INITIALIZING.value:
- setattr(repoStatus, f"{collection_hook}_status", CollectionState.ERROR.value)
+ setattr(repoStatus, f"{collection_hook}_status", after_fail)
setattr(repoStatus, f"{collection_hook}_task_id", None)
session.commit()
@@ -125,6 +120,7 @@ def on_failure(self,exc,task_id,args,kwargs,einfo):
repo_git = args[0]
self.augur_handle_task_failure(exc,task_id,repo_git, "ml_task_failure", collection_hook='ml')
+
#task_cls='augur.tasks.init.celery_app:AugurCoreRepoCollectionTask'
celery_app = Celery('tasks', broker=BROKER_URL, backend=BACKEND_URL, include=tasks)
@@ -205,7 +201,7 @@ def setup_periodic_tasks(sender, **kwargs):
"""
from celery.schedules import crontab
from augur.tasks.start_tasks import augur_collection_monitor, augur_collection_update_weights
- from augur.tasks.start_tasks import non_repo_domain_tasks
+ from augur.tasks.start_tasks import non_repo_domain_tasks, retry_errored_repos
from augur.tasks.git.facade_tasks import clone_repos
from augur.tasks.db.refresh_materialized_views import refresh_materialized_views
from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model
@@ -230,6 +226,9 @@ def setup_periodic_tasks(sender, **kwargs):
logger.info(f"Scheduling update of collection weights on midnight each day")
sender.add_periodic_task(crontab(hour=0, minute=0),augur_collection_update_weights.s())
+ logger.info(f"Setting 404 repos to be marked for retry on midnight each day")
+ sender.add_periodic_task(crontab(hour=0, minute=0),retry_errored_repos.s())
+
logger.info(f"Scheduling contributor breadth every 30 days")
thirty_days_in_seconds = 30*24*60*60
sender.add_periodic_task(thirty_days_in_seconds, contributor_breadth_model.s())
diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py
index 54068d30a..a9ba7e163 100644
--- a/augur/tasks/start_tasks.py
+++ b/augur/tasks/start_tasks.py
@@ -24,15 +24,18 @@
from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits
from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics
from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data
+from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_comments, collect_merge_request_metadata, collect_merge_request_reviewers, collect_merge_request_commits, collect_merge_request_files
+from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments
+from augur.tasks.gitlab.events_task import collect_gitlab_issue_events, collect_gitlab_merge_request_events
from augur.tasks.git.facade_tasks import *
from augur.tasks.db.refresh_materialized_views import *
# from augur.tasks.data_analysis import *
from augur.tasks.init.celery_app import celery_app as celery
from augur.application.db.session import DatabaseSession
from logging import Logger
-from enum import Enum
from augur.tasks.util.redis_list import RedisList
from augur.application.db.models import CollectionStatus, Repo
+from augur.tasks.util.collection_state import CollectionState
from augur.tasks.util.collection_util import *
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_facade_weight_time_factor
@@ -93,6 +96,27 @@ def primary_repo_collect_phase(repo_git):
return repo_task_group
+def primary_repo_collect_phase_gitlab(repo_git):
+
+ logger = logging.getLogger(primary_repo_collect_phase_gitlab.__name__)
+
+ jobs = group(
+ chain(collect_gitlab_merge_requests.si(repo_git), group(
+ #collect_merge_request_comments.s(repo_git),
+ #collect_merge_request_reviewers.s(repo_git),
+ collect_merge_request_metadata.s(repo_git),
+ collect_merge_request_commits.s(repo_git),
+ collect_merge_request_files.s(repo_git),
+ collect_gitlab_merge_request_events.si(repo_git),
+ )),
+ chain(collect_gitlab_issues.si(repo_git), group(
+ #collect_gitlab_issue_comments.s(repo_git),
+ collect_gitlab_issue_events.si(repo_git),
+ )),
+ )
+
+ return jobs
+
#This phase creates the message for secondary collection tasks.
#These are less important and have their own worker.
@@ -102,8 +126,8 @@ def secondary_repo_collect_phase(repo_git):
repo_task_group = group(
process_pull_request_files.si(repo_git),
process_pull_request_commits.si(repo_git),
- process_ossf_dependency_metrics.si(repo_git),
- chain(collect_pull_request_reviews.si(repo_git), collect_pull_request_review_comments.si(repo_git))
+ chain(collect_pull_request_reviews.si(repo_git), collect_pull_request_review_comments.si(repo_git)),
+ process_ossf_dependency_metrics.si(repo_git)
)
return repo_task_group
@@ -146,20 +170,23 @@ def non_repo_domain_tasks():
def build_primary_repo_collect_request(session,enabled_phase_names, days_until_collect_again = 1):
#Add all required tasks to a list and pass it to the CollectionRequest
primary_enabled_phases = []
+ primary_gitlab_enabled_phases = []
#Primary jobs
if prelim_phase.__name__ in enabled_phase_names:
primary_enabled_phases.append(prelim_phase)
primary_enabled_phases.append(primary_repo_collect_phase)
+ primary_gitlab_enabled_phases.append(primary_repo_collect_phase_gitlab)
#task success is scheduled no matter what the config says.
def core_task_success_util_gen(repo_git):
return core_task_success_util.si(repo_git)
primary_enabled_phases.append(core_task_success_util_gen)
+ primary_gitlab_enabled_phases.append(core_task_success_util_gen)
- primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=7)
+ primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=7, gitlab_phases=primary_gitlab_enabled_phases)
primary_request.get_valid_repos(session)
return primary_request
@@ -301,9 +328,41 @@ def augur_collection_update_weights():
session.commit()
#git_update_commit_count_weight(repo_git)
+@celery.task
+def retry_errored_repos():
+ """
+ Periodic task to reset repositories that have errored and try again.
+ """
+ from augur.tasks.init.celery_app import engine
+ logger = logging.getLogger(create_collection_status_records.__name__)
+
+ #TODO: Isaac needs to normalize the status's to be abstract in the
+ #collection_status table once augur dev is less unstable.
+ with DatabaseSession(logger,engine) as session:
+ query = s.sql.text(f"""UPDATE repo SET secondary_status = {CollectionState.PENDING.value}"""
+ f""" WHERE secondary_status = '{CollectionState.ERROR.value}' ;"""
+ f"""UPDATE repo SET core_status = {CollectionState.PENDING.value}"""
+ f""" WHERE core_status = '{CollectionState.ERROR.value}' ;"""
+ f"""UPDATE repo SET facade_status = {CollectionState.PENDING.value}"""
+ f""" WHERE facade_status = '{CollectionState.ERROR.value}' ;"""
+ f"""UPDATE repo SET ml_status = {CollectionState.PENDING.value}"""
+ f""" WHERE ml_status = '{CollectionState.ERROR.value}' ;"""
+ )
+
+ session.execute_sql(query)
+
+
+
#Retry this task for every issue so that repos that were added manually get the chance to be added to the collection_status table.
@celery.task(autoretry_for=(Exception,), retry_backoff=True, retry_backoff_max=300, retry_jitter=True, max_retries=None)
def create_collection_status_records():
+ """
+ Automatic task that runs and checks for repos that haven't been given a collection_status
+ record corresponding to the state of their collection at the monent.
+
+ A special celery task that automatically retries itself and has no max retries.
+ """
+
from augur.tasks.init.celery_app import engine
logger = logging.getLogger(create_collection_status_records.__name__)
diff --git a/augur/tasks/util/collection_state.py b/augur/tasks/util/collection_state.py
new file mode 100644
index 000000000..b5b8f0d26
--- /dev/null
+++ b/augur/tasks/util/collection_state.py
@@ -0,0 +1,30 @@
+
+from enum import Enum
+
+class CollectionState(Enum):
+ """
+ Enum of possible states a repository's collection
+ can have whether it is core, secondary, facade, etc.
+
+ Attributes:
+
+ SUCCESS: State of success for the jobs in that collection hook
+ PENDING: Means the repo has not had collection run at all
+ ERROR: The collection hook has crashed
+ COLLECTING: The collection hook is running
+ INITIALIZING: Only for facade, indicates the repo is being cloned via git
+ UPDATE: Only for facade, indicates the repo has been cloned
+ FAILED_CLONE: Only for facade, indicates the clone has failed (usually 404)
+ STANDBY: Indicates the repo has been paused
+ IGNORE: Repo has encountered an error and we will not try again (usually 404)
+ """
+
+ SUCCESS = "Success"
+ PENDING = "Pending"
+ ERROR = "Error"
+ COLLECTING = "Collecting"
+ INITIALIZING = "Initializing"
+ UPDATE = "Update"
+ FAILED_CLONE = "Failed Clone"
+ STANDBY = "Standby"
+ IGNORE = "Ignore"
diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py
index 4d5b663a2..89ae5f3d5 100644
--- a/augur/tasks/util/collection_util.py
+++ b/augur/tasks/util/collection_util.py
@@ -24,18 +24,9 @@
from augur.tasks.github.util.github_task_session import GithubTaskManifest
from augur.application.db.session import DatabaseSession
from augur.tasks.util.worker_util import calculate_date_weight_from_timestamps
+from augur.tasks.util.collection_state import CollectionState
-# class syntax
-class CollectionState(Enum):
- SUCCESS = "Success"
- PENDING = "Pending"
- ERROR = "Error"
- COLLECTING = "Collecting"
- INITIALIZING = "Initializing"
- UPDATE = "Update"
- FAILED_CLONE = "Failed Clone"
-
def get_list_of_all_users(session):
#Get a list of all users.
query = s.sql.text("""
@@ -132,9 +123,10 @@ def get_required_conditions_for_ml_repos(allow_collected_before = False, days_un
class CollectionRequest:
- def __init__(self,name,phases,max_repo = 10,days_until_collect_again = 1):
+ def __init__(self,name,phases,max_repo = 10,days_until_collect_again = 1, gitlab_phases=None):
self.name = name
self.phases = phases
+ self.gitlab_phases = gitlab_phases
self.max_repo = max_repo
self.days_until_collect_again = days_until_collect_again
self.new_status = CollectionState.PENDING.value
@@ -587,27 +579,44 @@ def send_messages(self):
for col_hook in self.collection_hooks:
self.logger.info(f"Starting collection on {len(col_hook.repo_list)} {col_hook.name} repos")
-
+
for repo_git in col_hook.repo_list:
- #repo = self.session.query(Repo).filter(Repo.repo_git == repo_git).one()
- #repo_id = repo.repo_id
-
- augur_collection_sequence = []
- for job in col_hook.phases:
- #Add the phase to the sequence in order as a celery task.
- #The preliminary task creates the larger task chain
- augur_collection_sequence.append(job(repo_git))
-
- #augur_collection_sequence.append(core_task_success_util.si(repo_git))
- #Link all phases in a chain and send to celery
- augur_collection_chain = chain(*augur_collection_sequence)
- task_id = augur_collection_chain.apply_async().task_id
-
- self.logger.info(f"Setting repo {col_hook.name} status to collecting for repo: {repo_git}")
-
- #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated
- yield repo_git, task_id, col_hook.name
+ repo = self.session.query(Repo).filter(Repo.repo_git == repo_git).one()
+ if "github" in repo.repo_git:
+ augur_collection_sequence = []
+ for job in col_hook.phases:
+ #Add the phase to the sequence in order as a celery task.
+ #The preliminary task creates the larger task chain
+ augur_collection_sequence.append(job(repo_git))
+
+ #augur_collection_sequence.append(core_task_success_util.si(repo_git))
+ #Link all phases in a chain and send to celery
+ augur_collection_chain = chain(*augur_collection_sequence)
+ task_id = augur_collection_chain.apply_async().task_id
+
+ self.logger.info(f"Setting github repo {col_hook.name} status to collecting for repo: {repo_git}")
+
+ #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated
+ yield repo_git, task_id, col_hook.name
+ else:
+ if col_hook.gitlab_phases is not None:
+
+ augur_collection_sequence = []
+ for job in col_hook.gitlab_phases:
+ #Add the phase to the sequence in order as a celery task.
+ #The preliminary task creates the larger task chain
+ augur_collection_sequence.append(job(repo_git))
+
+ #augur_collection_sequence.append(core_task_success_util.si(repo_git))
+ #Link all phases in a chain and send to celery
+ augur_collection_chain = chain(*augur_collection_sequence)
+ task_id = augur_collection_chain.apply_async().task_id
+
+ self.logger.info(f"Setting gitlab repo {col_hook.name} status to collecting for repo: {repo_git}")
+
+ #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated
+ yield repo_git, task_id, col_hook.name
#def start_block_of_repos(logger,session,repo_git_identifiers,phases,repos_type,hook="core"):
#
diff --git a/augur/tasks/util/worker_util.py b/augur/tasks/util/worker_util.py
index 6380ed22b..84c177724 100644
--- a/augur/tasks/util/worker_util.py
+++ b/augur/tasks/util/worker_util.py
@@ -138,7 +138,7 @@ def parse_json_from_subprocess_call(logger, subprocess_arr, cwd=None):
try:
required_output = json.loads(output)
except json.decoder.JSONDecodeError as e:
- session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
+ logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
raise e
return required_output
diff --git a/augur/templates/login.j2 b/augur/templates/login.j2
index c71d02d50..faaab620e 100644
--- a/augur/templates/login.j2
+++ b/augur/templates/login.j2
@@ -108,7 +108,7 @@