Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

PR metric implementations & PR worker bug fixes #334

Merged
merged 6 commits into from
Jul 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 250 additions & 10 deletions augur/datasources/augur_db/augur_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,182 @@ def code_changes_lines(self, repo_group_id, repo_id=None, period='day', begin_da
'begin_date': begin_date, 'end_date': end_date})
return results

@annotate(tag='reviews')
def reviews(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
""" Returns a timeseris of new reviews or pull requests opened

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
:param end_date: Specifies the end date, defaults to datetime.now()
:return: DataFrame of new reviews/period
"""
if not begin_date:
begin_date = '1970-1-1'
if not end_date:
end_date = datetime.datetime.now().strftime('%Y-%m-%d')

if not repo_id:
reviews_SQL = s.sql.text("""
SELECT
pull_requests.repo_id,
repo_name,
DATE_TRUNC(:period, pull_requests.pr_created_at) AS date,
COUNT(pr_src_id) AS pull_requests
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
WHERE pull_requests.repo_id IN
(SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
AND pull_requests.pr_created_at
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
AND to_timestamp(:end_date, 'YYYY-MM-DD')
GROUP BY pull_requests.repo_id, repo_name, date
ORDER BY pull_requests.repo_id, date
""")

results = pd.read_sql(reviews_SQL, self.db,
params={'period': period, 'repo_group_id': repo_group_id,
'begin_date': begin_date, 'end_date': end_date })
return results

else:
reviews_SQL = s.sql.text("""
SELECT
repo_name,
DATE_TRUNC(:period, pull_requests.pr_created_at) AS date,
COUNT(pr_src_id) AS pull_requests
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
WHERE pull_requests.repo_id = :repo_id
AND pull_requests.pr_created_at
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD HH24:MI:SS')
AND to_timestamp(:end_date, 'YYYY-MM-DD HH24:MI:SS')
GROUP BY date, repo_name
ORDER BY date
""")

results = pd.read_sql(reviews_SQL, self.db,
params={'period': period, 'repo_id': repo_id,
'begin_date': begin_date, 'end_date': end_date})
return results

@annotate(tag='reviews-accepted')
def reviews_accepted(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
"""Returns a timeseries of number of reviews or pull requests accepted.

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
:param end_date: Specifies the end date, defaults to datetime.now()
:return: DataFrame of accepted reviews/period
"""
if not begin_date:
begin_date = '1970-1-1'
if not end_date:
end_date = datetime.datetime.now().strftime('%Y-%m-%d')

if not repo_id:
reviews_accepted_SQL = s.sql.text("""
SELECT
pull_requests.repo_id,
repo.repo_name,
DATE_TRUNC(:period, pull_requests.pr_merged_at) AS date,
COUNT(pr_src_id) AS pull_requests
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
WHERE pull_requests.repo_id IN
(SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
AND pr_merged_at IS NOT NULL
AND pr_merged_at
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
AND to_timestamp(:end_date, 'YYYY-MM-DD')
GROUP BY pull_requests.repo_id, repo_name, date
ORDER BY pull_requests.repo_id, date
""")

results = pd.read_sql(reviews_accepted_SQL, self.db,
params={'period': period, 'repo_group_id': repo_group_id,
'begin_date': begin_date, 'end_date': end_date})
return results
else:
reviews_accepted_SQL = s.sql.text("""
SELECT
repo.repo_name,
DATE_TRUNC(:period, pull_requests.pr_merged_at) AS date,
COUNT(pr_src_id) AS pull_requests
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
WHERE pull_requests.repo_id = :repo_id
AND pr_merged_at IS NOT NULL
AND pr_merged_at
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
AND to_timestamp(:end_date, 'YYYY-MM-DD')
GROUP BY date, repo.repo_name
ORDER BY date
""")

results = pd.read_sql(reviews_accepted_SQL, self.db,
params={'period': period, 'repo_id': repo_id,
'begin_date': begin_date, 'end_date': end_date})
return results

@annotate(tag='reviews-declined')
def reviews_declined(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
""" Returns a time series of reivews declined

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
:param end_date: Specifies the end date, defaults to datetime.now()
:return: DataFrame of declined reviews/period
"""
if not begin_date:
begin_date = '1970-1-1'
if not end_date:
end_date = datetime.datetime.now().strftime('%Y-%m-%d')

if not repo_id:
reviews_declined_SQL = s.sql.text("""
SELECT
pull_requests.repo_id,
repo.repo_name,
DATE_TRUNC(:period, pull_requests.pr_closed_at) AS date,
COUNT(pr_src_id) AS pull_requests
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
WHERE pull_requests.repo_id IN
(SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
AND pr_src_state = 'closed' AND pr_merged_at IS NULL
AND pr_closed_at
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
AND to_timestamp(:end_date, 'YYYY-MM-DD')
GROUP BY pull_requests.repo_id, repo_name, date
ORDER BY pull_requests.repo_id, date
""")

results = pd.read_sql(reviews_declined_SQL, self.db,
params={'period': period, 'repo_group_id': repo_group_id,
'begin_date': begin_date, 'end_date': end_date })
return results
else:
reviews_declined_SQL = s.sql.text("""
SELECT
repo.repo_name,
DATE_TRUNC(:period, pull_requests.pr_closed_at) AS date,
COUNT(pr_src_id) AS pull_requests
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
WHERE pull_requests.repo_id = :repo_id
AND pr_src_state = 'closed' AND pr_merged_at IS NULL
AND pr_closed_at
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
AND to_timestamp(:end_date, 'YYYY-MM-DD')
GROUP BY date, repo.repo_name
ORDER BY date
""")

results = pd.read_sql(reviews_declined_SQL, self.db,
params={'period': period, 'repo_id': repo_id,
'begin_date': begin_date, 'end_date': end_date})
return results

@annotate(tag='issues-new')
def issues_new(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
"""Returns a timeseries of new issues opened.
Expand Down Expand Up @@ -855,6 +1031,70 @@ def issues_closed(self, repo_group_id, repo_id=None, period='day', begin_date=No
'begin_date': begin_date, 'end_date': end_date})
return results

@annotate(tag='review-duration')
def review_duration(self, repo_group_id, repo_id=None, begin_date=None, end_date=None):
""" Returns the duartion of each accepted review.

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
:param end_date: Specifies the end date, defaults to datetime.now()
:return: DataFrame of pull request id with the corresponding duration
"""
if not begin_date:
begin_date = '1970-1-1'
if not end_date:
end_date = datetime.datetime.now().strftime('%Y-%m-%d')

if not repo_id:
review_duration_SQL = s.sql.text("""
SELECT
pull_requests.repo_id,
repo.repo_name,
pull_requests.pull_request_id,
pull_requests.pr_created_at AS created_at,
pull_requests.pr_merged_at AS merged_at,
(pr_merged_at - pr_created_at) AS duration
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
WHERE pull_requests.repo_id IN
(SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
AND pr_merged_at IS NOT NULL
AND pr_created_at
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
AND to_timestamp(:end_date, 'YYYY-MM-DD')
ORDER BY pull_requests.repo_id, pull_requests.pull_request_id
""")

results = pd.read_sql(review_duration_SQL, self.db,
params={'repo_group_id': repo_group_id,
'begin_date': begin_date,
'end_date': end_date})
results['duration'] = results['duration'].astype(str)
return results
else:
review_duration_SQL = s.sql.text("""
SELECT
repo_name,
pull_request_id,
pr_created_at AS created_at,
pr_merged_at AS merged_at,
(pr_merged_at - pr_created_at) AS duration
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
WHERE pull_requests.repo_id = :repo_id
AND pr_merged_at IS NOT NULL
AND pr_created_at
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
AND to_timestamp(:end_date, 'YYYY-MM-DD')
ORDER BY pull_requests.repo_id, pull_request_id
""")

results = pd.read_sql(review_duration_SQL, self.db,
params={'repo_id': repo_id,
'begin_date': begin_date,
'end_date': end_date})
results['duration'] = results['duration'].astype(str)
return results

@annotate(tag='issue-duration')
def issue_duration(self, repo_group_id, repo_id=None, begin_date=None, end_date=None):
"""Returns the duration of each issue.
Expand Down Expand Up @@ -1388,7 +1628,7 @@ def license_declared(self, repo_group_id, repo_id=None):
repo_name = pd.read_sql(s.sql.text('SELECT repo_name FROM repo WHERE repo_group_id = :repo_group_id'), self.spdx_db, params={'repo_group_id', repo_group_id})
if repo_name.empty:
return pd.DataFrame()

repo_name_list = repo_name['repo_name'].tolist()
license_declared_SQL = s.sql.text("""
SELECT packages.name as repo_name, licenses.short_name, COUNT(files_licenses.file_id) as count
Expand All @@ -1399,11 +1639,11 @@ def license_declared(self, repo_group_id, repo_id=None):
GROUP BY licenses.short_name, repo_name
ORDER BY count DESC
""")

results = pd.read_sql(license_declared_SQL, self.spdx_db, params={'repo_name_list': repo_name_list})

return results

@annotate(tag='license-coverage')
def license_coverage(self, repo_group_id, repo_id=None):
"""Returns the declared license
Expand Down Expand Up @@ -1475,11 +1715,11 @@ def license_coverage(self, repo_group_id, repo_id=None):
)b
WHERE a.name = b.name
""")

results = pd.read_sql(license_declared_SQL, self.spdx_db, params={'repo_name_list': repo_name_list})

return results

@annotate(tag='license_count')
def license_count(self, repo_group_id, repo_id=None):
"""Returns the declared license
Expand Down Expand Up @@ -1579,13 +1819,13 @@ def license_count(self, repo_group_id, repo_id=None):
ORDER BY license_declared_file DESC
) c
WHERE a.name = b.name
AND b.name = c.name
AND b.name = c.name
""")

results = pd.read_sql(license_declared_SQL, self.spdx_db, params={'repo_name_list': repo_name_list})

return results


@annotate(tag='issues-maintainer-response-duration')
def issues_maintainer_response_duration(self, repo_group_id, repo_id=None, begin_date=None, end_date=None):
Expand Down Expand Up @@ -1661,7 +1901,7 @@ def issues_maintainer_response_duration(self, repo_group_id, repo_id=None, begin

@annotate(tag='committers')
def committers(self, repo_group_id, repo_id=None, begin_date=None, end_date=None, period='day'):

if not begin_date:
begin_date = '1970-1-1 00:00:01'
if not end_date:
Expand Down Expand Up @@ -1706,7 +1946,7 @@ def committers(self, repo_group_id, repo_id=None, begin_date=None, end_date=None

results = pd.read_sql(committersSQL, self.db, params={'repo_id': repo_id, 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date, 'period':period})

return results
return results


#####################################
Expand Down
Loading