Skip to content

Commit

Permalink
Merge pull request #2891 from chaoss/docker-dev-fix
Browse files Browse the repository at this point in the history
Docker dev fix: Fixes to commit comments and events
  • Loading branch information
sgoggins authored Aug 15, 2024
2 parents 17038d4 + eb4fa84 commit b778fcd
Show file tree
Hide file tree
Showing 20 changed files with 13,586 additions and 1,658 deletions.
7 changes: 7 additions & 0 deletions augur/api/routes/pull_request_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
from bokeh.models.glyphs import Rect
from bokeh.transform import dodge, factor_cmap, transform

# from selenium.webdriver import Firefox, FirefoxOptions
# options = FirefoxOptions()
# options.headless = True
# webdriver = Firefox(options=options)
#export_png(item, path, webdriver=webdriver)

warnings.filterwarnings('ignore')

from augur.api.routes import AUGUR_API_VERSION
Expand Down Expand Up @@ -604,6 +610,7 @@ def average_commits_per_PR():
# opts = FirefoxOptions()
# opts.add_argument("--headless")
# driver = webdriver.Firefox(firefox_options=opts)
# filename = export_png(grid, timeout=180, webdriver=webdriver)
filename = export_png(grid, timeout=180)

return send_file(filename)
Expand Down
2 changes: 1 addition & 1 deletion augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def determine_worker_processes(ratio,maximum):
sleep_time += 6

#60% of estimate, Maximum value of 45 : Reduced because it can be lower
core_num_processes = determine_worker_processes(.40, 50)
core_num_processes = determine_worker_processes(.40, 90)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
Expand Down
2 changes: 1 addition & 1 deletion augur/application/cli/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def determine_worker_processes(ratio,maximum):
sleep_time += 6

#60% of estimate, Maximum value of 45: Reduced because not needed
core_num_processes = determine_worker_processes(.40, 50)
core_num_processes = determine_worker_processes(.40, 90)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
Expand Down
4 changes: 2 additions & 2 deletions augur/application/cli/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def start():
secondary_worker_process = None

scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=50 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=50 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=90 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=20 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"

scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" "))
core_worker_process = subprocess.Popen(core_worker.split(" "))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datetime import datetime

from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.github.util.github_data_access import GithubDataAccess
from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException
from augur.application.db.models import ContributorRepo
from augur.application.db.lib import bulk_insert_dicts
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
Expand Down Expand Up @@ -100,17 +100,22 @@ def contributor_breadth_model(self) -> None:


cntrb_events = []
for event in github_data_access.paginate_resource(repo_cntrb_url):
try:
for event in github_data_access.paginate_resource(repo_cntrb_url):

cntrb_events.append(event)
cntrb_events.append(event)

event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ")
if event_age < newest_event_in_db:
logger.info("Found cntrb events we already have...skipping the rest")
break
event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ")
if event_age < newest_event_in_db:
logger.info("Found cntrb events we already have...skipping the rest")
break

if len(cntrb_events) == 0:
logger.info("There are no cntrb events, or new events for this user.\n")
if len(cntrb_events) == 0:
logger.info("There are no cntrb events, or new events for this user.\n")
continue

except UrlNotFoundException as e:
logger.warning(e)
continue

events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import requests
import logging
import traceback
import logging
import traceback

logger = logging.getLogger(__name__)

Expand All @@ -9,87 +9,81 @@ def get_NPM_data(package):
r = requests.get(url)
if r.status_code < 400:
return r.json()
logger.warning(f"Failed to fetch data for package {package}. HTTP Status: {r.status_code}")
return {}


def clean_version(version):
version = [v for v in version if v.isdigit() or v == '.']
return ''.join(version)

def split_version(version):
#Split version string into list seperated by .
#assign elements of list to respective variables.
version_list = list(version.split('.'))
patch = version_list.pop(-1)
minor = version_list.pop(-1)
major = version_list[0]

return major,minor,patch


return major, minor, patch

def get_latest_patch(version, data):
if 'versions' not in data:
logger.error(f"'versions' key not found in the NPM data for version {version}. Data: {data}")
raise KeyError("'versions' key not found")

versions = data['versions']
try:
index = list(versions.keys()).index(version)
except ValueError as e:
logger.error(f"Version {version} not found in the 'versions' list. Error: {e}")
raise e

major,minor,patch = split_version(version)
major, minor, patch = split_version(version)
consider_version = version
for v in list(versions.keys())[index:]:
if v.split('.')[0]==major:
if v.split('.')[1]== minor:
if v.split('.')[2]>patch:
if v.split('.')[0] == major:
if v.split('.')[1] == minor:
if v.split('.')[2] > patch:
consider_version = v
return consider_version


def get_lastest_minor(version, data):
try:
versions = data['versions']
except Exception as e:
logger.info(
''.join(traceback.format_exception(None, e, e.__traceback__)))
# raise e

if 'versions' not in data:
logger.error(f"'versions' key not found in the NPM data. Data: {data}")
raise KeyError("'versions' key not found")

versions = data['versions']
try:
index = list(versions.keys()).index(version)
except ValueError as e:
logger.info(f'error is {e} on the NPM. Some kind of value error. Probably a VALUES error for Node, #AmIRight?')
logger.info(f"Version {version} not found in the 'versions' list. Error: {e}")
raise e

major,minor,patch = split_version(version)

major, minor, patch = split_version(version)
consider_version = get_latest_patch(version, data)
for v in list(versions.keys())[index:]:
if v.split('.')[0]==major:
if v.split('.')[1]>minor:
consider_version = v
return consider_version

if v.split('.')[0] == major:
if v.split('.')[1] > minor:
consider_version = v
return consider_version

def get_npm_release_date(data, version):
release_time = data['time'][version]
release_time = data['time'].get(version)
if release_time:
return release_time
logger.warning(f"Release time not found for version {version}")
return None


def get_npm_latest_version(data):
return data['dist-tags']['latest']
return data['dist-tags'].get('latest', 'unknown')

#add code here
def get_npm_current_version(data, requirement):
if requirement[0]=='~':
if requirement[0] == '~':
try:
return get_latest_patch(clean_version(requirement), data)
except ValueError:
return None
elif requirement[0]=='^':
elif requirement[0] == '^':
try:
return get_lastest_minor(clean_version(requirement), data)
except ValueError:
return None
else:
return requirement
return requirement
25 changes: 24 additions & 1 deletion augur/tasks/git/dependency_tasks/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from augur.tasks.git.dependency_tasks.core import *
from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask, AugurSecondaryRepoCollectionTask
from augur.tasks.util.metadata_exception import MetadataException


@celery.task(base=AugurFacadeRepoCollectionTask)
Expand All @@ -20,4 +21,26 @@ def process_ossf_dependency_metrics(self, repo_git):

logger = logging.getLogger(process_ossf_dependency_metrics.__name__)

generate_scorecard(logger, repo_git)
try:
generate_scorecard(logger, repo_git)
except Exception as e:
logger.warning(f'Exception generating scorecard: {e}')
tracer = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
logger.warning(f'Full stack trace of OpenSSF scorecard error: {tracer}')
raise MetadataException(f"An error occurred while generating the scorecard: {str(e)}")

"""
This try/except block is an attempt to get more information about this occasional error:
```bash
Traceback (most recent call last):
File "/home/ubuntu/github/virtualenvs/hosted/lib/python3.11/site-packages/billiard/pool.py", line 366, in workloop
put((READY, (job, i, result, inqW_fd)))
File "/home/ubuntu/github/virtualenvs/hosted/lib/python3.11/site-packages/billiard/queues.py", line 366, in put
self.send_payload(ForkingPickler.dumps(obj))
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/github/virtualenvs/hosted/lib/python3.11/site-packages/billiard/reduction.py", line 56, in dumps
cls(buf, protocol).dump(obj)
billiard.pool.MaybeEncodingError: Error sending result: ''(1, <ExceptionInfo: MetadataException("\'checks\' | Additional metadata: required_output: {}")>, None)''. Reason: ''PicklingError("Can\'t pickle <class \'augur.tasks.util.metadata_exception.MetadataException\'>: it\'s not the same object as augur.tasks.util.metadata_exception.MetadataException")''.
```
"""
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,13 @@ def generate_commit_record(repos_id,commit,filename,
#db_local.commit()
execute_sql(store_working_commit)

# commit_message = check_output(
# f"git --git-dir {repo_loc} log --format=%B -n 1 {commit}".split()
# ).strip()

commit_message = check_output(
f"git --git-dir {repo_loc} log --format=%B -n 1 {commit}".split()
).strip()

).decode('utf-8').strip()

msg_record = {
'repo_id' : repo_id,
Expand Down
1 change: 1 addition & 0 deletions augur/tasks/github/contributors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import time
import logging
import traceback

from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/github/detect_move/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c


session.commit()
raise Exception("ERROR: Repo has moved! Resetting Collection!")
raise Exception("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!")


if attempts >= 10:
Expand Down
16 changes: 11 additions & 5 deletions augur/tasks/github/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ def collect_events(repo_git: str):

def bulk_events_collection_endpoint_contains_all_data(key_auth, logger, owner, repo):

url = f"https://api.github.com/repos/{owner}/{repo}/issues/events"
url = f"https://api.github.com/repos/{owner}/{repo}/issues/events?per_page=100"

github_data_access = GithubDataAccess(key_auth, logger)

page_count = github_data_access.get_resource_page_count(url)

if page_count > 300:
raise Exception(f"Either github raised the paginator page limit for things like events and messages, or is_pagination_limited_by_max_github_pages is being used on a resource that does not have a page limit. Url: {url}")
raise Warning(f"Page Count is {page_count}. Either github raised the paginator page limit for things like events and messages, or is_pagination_limited_by_max_github_pages is being used on a resource that does not have a page limit. Url: {url}")

return page_count != 300

Expand Down Expand Up @@ -89,7 +89,7 @@ def collect_pr_and_issues_events_by_number(repo_id, repo_git: str, logger, key_a
query = text(f"""
(select pr_src_number as number from pull_requests WHERE repo_id={repo_id} order by pr_created_at desc)
UNION
(select gh_issues_number as number from issues WHERE repo_id={repo_id} order by created_at desc);
(select gh_issue_number as number from issues WHERE repo_id={repo_id} order by created_at desc);
""")

result = connection.execute(query).fetchall()
Expand Down Expand Up @@ -130,9 +130,15 @@ def process_events(events, task_name, repo_id, logger):
for event in events:

event, contributor = process_github_event_contributors(logger, event, tool_source, tool_version, data_source)

#logger.info(f'This is the event pack: {event}')
# event_mapping_data is the pr or issue data needed to relate the event to an issue or pr
event_mapping_data = event["issue"]

if 'issue' in event:
if event["issue"] is not None:
event_mapping_data = event["issue"]
else:
event_mapping_data = None
logger.warning(f'issue is not a value in event JSON: {event}')

if event_mapping_data is None:
not_mapable_event_count += 1
Expand Down
22 changes: 19 additions & 3 deletions augur/tasks/github/repo_info/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,28 @@

def query_committers_count(key_auth, logger, owner, repo):

data = {}
logger.info('Querying committers count\n')
url = f'https://api.github.com/repos/{owner}/{repo}/contributors?per_page=100'

github_data_access = GithubDataAccess(key_auth, logger)
## If the repository is empty there are zero committers, and the API returns nothing at all. Response
## header of 200 along with an empty JSON.
try:
github_data_access = GithubDataAccess(key_auth, logger)
try:
data = github_data_access.get_resource_count(url)
except Exception as e:
logger.warning(f"JSON Decode error: {e} indicating there are no committers or the repository is empty or archived.")
data = 0
pass
if not data:
logger.warning("The API Returned an empty JSON object.")
else:
logger.warning("Committer count data returned in JSON")
except ValueError:
logger.warning("The API did not return valid JSON for committer count. This usually occurs on empty or archived repositories.")
data=0

return github_data_access.get_resource_count(url)
return data

def get_repo_data(logger, url, response):
data = {}
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
version: '3'
services:
augur-db:
image: postgres:14
image: postgres:16
restart: unless-stopped
environment:
- "POSTGRES_DB=augur"
Expand Down
Loading

0 comments on commit b778fcd

Please sign in to comment.