Skip to content

Commit 7c4639b

Browse files
authored
Merge pull request #334 from parthsharma2/dev
PR metric implementations & PR worker bug fixes
2 parents 4493830 + 7f097ea commit 7c4639b

File tree

3 files changed

+539
-25
lines changed

3 files changed

+539
-25
lines changed

augur/datasources/augur_db/augur_db.py

+250-10
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,182 @@ def code_changes_lines(self, repo_group_id, repo_id=None, period='day', begin_da
692692
'begin_date': begin_date, 'end_date': end_date})
693693
return results
694694

695+
@annotate(tag='reviews')
696+
def reviews(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
697+
""" Returns a timeseris of new reviews or pull requests opened
698+
699+
:param repo_group_id: The repository's repo_group_id
700+
:param repo_id: The repository's repo_id, defaults to None
701+
:param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
702+
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
703+
:param end_date: Specifies the end date, defaults to datetime.now()
704+
:return: DataFrame of new reviews/period
705+
"""
706+
if not begin_date:
707+
begin_date = '1970-1-1'
708+
if not end_date:
709+
end_date = datetime.datetime.now().strftime('%Y-%m-%d')
710+
711+
if not repo_id:
712+
reviews_SQL = s.sql.text("""
713+
SELECT
714+
pull_requests.repo_id,
715+
repo_name,
716+
DATE_TRUNC(:period, pull_requests.pr_created_at) AS date,
717+
COUNT(pr_src_id) AS pull_requests
718+
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
719+
WHERE pull_requests.repo_id IN
720+
(SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
721+
AND pull_requests.pr_created_at
722+
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
723+
AND to_timestamp(:end_date, 'YYYY-MM-DD')
724+
GROUP BY pull_requests.repo_id, repo_name, date
725+
ORDER BY pull_requests.repo_id, date
726+
""")
727+
728+
results = pd.read_sql(reviews_SQL, self.db,
729+
params={'period': period, 'repo_group_id': repo_group_id,
730+
'begin_date': begin_date, 'end_date': end_date })
731+
return results
732+
733+
else:
734+
reviews_SQL = s.sql.text("""
735+
SELECT
736+
repo_name,
737+
DATE_TRUNC(:period, pull_requests.pr_created_at) AS date,
738+
COUNT(pr_src_id) AS pull_requests
739+
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
740+
WHERE pull_requests.repo_id = :repo_id
741+
AND pull_requests.pr_created_at
742+
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD HH24:MI:SS')
743+
AND to_timestamp(:end_date, 'YYYY-MM-DD HH24:MI:SS')
744+
GROUP BY date, repo_name
745+
ORDER BY date
746+
""")
747+
748+
results = pd.read_sql(reviews_SQL, self.db,
749+
params={'period': period, 'repo_id': repo_id,
750+
'begin_date': begin_date, 'end_date': end_date})
751+
return results
752+
753+
@annotate(tag='reviews-accepted')
754+
def reviews_accepted(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
755+
"""Returns a timeseries of number of reviews or pull requests accepted.
756+
757+
:param repo_group_id: The repository's repo_group_id
758+
:param repo_id: The repository's repo_id, defaults to None
759+
:param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
760+
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
761+
:param end_date: Specifies the end date, defaults to datetime.now()
762+
:return: DataFrame of accepted reviews/period
763+
"""
764+
if not begin_date:
765+
begin_date = '1970-1-1'
766+
if not end_date:
767+
end_date = datetime.datetime.now().strftime('%Y-%m-%d')
768+
769+
if not repo_id:
770+
reviews_accepted_SQL = s.sql.text("""
771+
SELECT
772+
pull_requests.repo_id,
773+
repo.repo_name,
774+
DATE_TRUNC(:period, pull_requests.pr_merged_at) AS date,
775+
COUNT(pr_src_id) AS pull_requests
776+
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
777+
WHERE pull_requests.repo_id IN
778+
(SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
779+
AND pr_merged_at IS NOT NULL
780+
AND pr_merged_at
781+
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
782+
AND to_timestamp(:end_date, 'YYYY-MM-DD')
783+
GROUP BY pull_requests.repo_id, repo_name, date
784+
ORDER BY pull_requests.repo_id, date
785+
""")
786+
787+
results = pd.read_sql(reviews_accepted_SQL, self.db,
788+
params={'period': period, 'repo_group_id': repo_group_id,
789+
'begin_date': begin_date, 'end_date': end_date})
790+
return results
791+
else:
792+
reviews_accepted_SQL = s.sql.text("""
793+
SELECT
794+
repo.repo_name,
795+
DATE_TRUNC(:period, pull_requests.pr_merged_at) AS date,
796+
COUNT(pr_src_id) AS pull_requests
797+
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
798+
WHERE pull_requests.repo_id = :repo_id
799+
AND pr_merged_at IS NOT NULL
800+
AND pr_merged_at
801+
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
802+
AND to_timestamp(:end_date, 'YYYY-MM-DD')
803+
GROUP BY date, repo.repo_name
804+
ORDER BY date
805+
""")
806+
807+
results = pd.read_sql(reviews_accepted_SQL, self.db,
808+
params={'period': period, 'repo_id': repo_id,
809+
'begin_date': begin_date, 'end_date': end_date})
810+
return results
811+
812+
@annotate(tag='reviews-declined')
813+
def reviews_declined(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
814+
""" Returns a time series of reivews declined
815+
816+
:param repo_group_id: The repository's repo_group_id
817+
:param repo_id: The repository's repo_id, defaults to None
818+
:param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
819+
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
820+
:param end_date: Specifies the end date, defaults to datetime.now()
821+
:return: DataFrame of declined reviews/period
822+
"""
823+
if not begin_date:
824+
begin_date = '1970-1-1'
825+
if not end_date:
826+
end_date = datetime.datetime.now().strftime('%Y-%m-%d')
827+
828+
if not repo_id:
829+
reviews_declined_SQL = s.sql.text("""
830+
SELECT
831+
pull_requests.repo_id,
832+
repo.repo_name,
833+
DATE_TRUNC(:period, pull_requests.pr_closed_at) AS date,
834+
COUNT(pr_src_id) AS pull_requests
835+
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
836+
WHERE pull_requests.repo_id IN
837+
(SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
838+
AND pr_src_state = 'closed' AND pr_merged_at IS NULL
839+
AND pr_closed_at
840+
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
841+
AND to_timestamp(:end_date, 'YYYY-MM-DD')
842+
GROUP BY pull_requests.repo_id, repo_name, date
843+
ORDER BY pull_requests.repo_id, date
844+
""")
845+
846+
results = pd.read_sql(reviews_declined_SQL, self.db,
847+
params={'period': period, 'repo_group_id': repo_group_id,
848+
'begin_date': begin_date, 'end_date': end_date })
849+
return results
850+
else:
851+
reviews_declined_SQL = s.sql.text("""
852+
SELECT
853+
repo.repo_name,
854+
DATE_TRUNC(:period, pull_requests.pr_closed_at) AS date,
855+
COUNT(pr_src_id) AS pull_requests
856+
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
857+
WHERE pull_requests.repo_id = :repo_id
858+
AND pr_src_state = 'closed' AND pr_merged_at IS NULL
859+
AND pr_closed_at
860+
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
861+
AND to_timestamp(:end_date, 'YYYY-MM-DD')
862+
GROUP BY date, repo.repo_name
863+
ORDER BY date
864+
""")
865+
866+
results = pd.read_sql(reviews_declined_SQL, self.db,
867+
params={'period': period, 'repo_id': repo_id,
868+
'begin_date': begin_date, 'end_date': end_date})
869+
return results
870+
695871
@annotate(tag='issues-new')
696872
def issues_new(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
697873
"""Returns a timeseries of new issues opened.
@@ -855,6 +1031,70 @@ def issues_closed(self, repo_group_id, repo_id=None, period='day', begin_date=No
8551031
'begin_date': begin_date, 'end_date': end_date})
8561032
return results
8571033

1034+
@annotate(tag='review-duration')
1035+
def review_duration(self, repo_group_id, repo_id=None, begin_date=None, end_date=None):
1036+
""" Returns the duartion of each accepted review.
1037+
1038+
:param repo_group_id: The repository's repo_group_id
1039+
:param repo_id: The repository's repo_id, defaults to None
1040+
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
1041+
:param end_date: Specifies the end date, defaults to datetime.now()
1042+
:return: DataFrame of pull request id with the corresponding duration
1043+
"""
1044+
if not begin_date:
1045+
begin_date = '1970-1-1'
1046+
if not end_date:
1047+
end_date = datetime.datetime.now().strftime('%Y-%m-%d')
1048+
1049+
if not repo_id:
1050+
review_duration_SQL = s.sql.text("""
1051+
SELECT
1052+
pull_requests.repo_id,
1053+
repo.repo_name,
1054+
pull_requests.pull_request_id,
1055+
pull_requests.pr_created_at AS created_at,
1056+
pull_requests.pr_merged_at AS merged_at,
1057+
(pr_merged_at - pr_created_at) AS duration
1058+
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
1059+
WHERE pull_requests.repo_id IN
1060+
(SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id)
1061+
AND pr_merged_at IS NOT NULL
1062+
AND pr_created_at
1063+
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
1064+
AND to_timestamp(:end_date, 'YYYY-MM-DD')
1065+
ORDER BY pull_requests.repo_id, pull_requests.pull_request_id
1066+
""")
1067+
1068+
results = pd.read_sql(review_duration_SQL, self.db,
1069+
params={'repo_group_id': repo_group_id,
1070+
'begin_date': begin_date,
1071+
'end_date': end_date})
1072+
results['duration'] = results['duration'].astype(str)
1073+
return results
1074+
else:
1075+
review_duration_SQL = s.sql.text("""
1076+
SELECT
1077+
repo_name,
1078+
pull_request_id,
1079+
pr_created_at AS created_at,
1080+
pr_merged_at AS merged_at,
1081+
(pr_merged_at - pr_created_at) AS duration
1082+
FROM pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id
1083+
WHERE pull_requests.repo_id = :repo_id
1084+
AND pr_merged_at IS NOT NULL
1085+
AND pr_created_at
1086+
BETWEEN to_timestamp(:begin_date, 'YYYY-MM-DD')
1087+
AND to_timestamp(:end_date, 'YYYY-MM-DD')
1088+
ORDER BY pull_requests.repo_id, pull_request_id
1089+
""")
1090+
1091+
results = pd.read_sql(review_duration_SQL, self.db,
1092+
params={'repo_id': repo_id,
1093+
'begin_date': begin_date,
1094+
'end_date': end_date})
1095+
results['duration'] = results['duration'].astype(str)
1096+
return results
1097+
8581098
@annotate(tag='issue-duration')
8591099
def issue_duration(self, repo_group_id, repo_id=None, begin_date=None, end_date=None):
8601100
"""Returns the duration of each issue.
@@ -1388,7 +1628,7 @@ def license_declared(self, repo_group_id, repo_id=None):
13881628
repo_name = pd.read_sql(s.sql.text('SELECT repo_name FROM repo WHERE repo_group_id = :repo_group_id'), self.spdx_db, params={'repo_group_id', repo_group_id})
13891629
if repo_name.empty:
13901630
return pd.DataFrame()
1391-
1631+
13921632
repo_name_list = repo_name['repo_name'].tolist()
13931633
license_declared_SQL = s.sql.text("""
13941634
SELECT packages.name as repo_name, licenses.short_name, COUNT(files_licenses.file_id) as count
@@ -1399,11 +1639,11 @@ def license_declared(self, repo_group_id, repo_id=None):
13991639
GROUP BY licenses.short_name, repo_name
14001640
ORDER BY count DESC
14011641
""")
1402-
1642+
14031643
results = pd.read_sql(license_declared_SQL, self.spdx_db, params={'repo_name_list': repo_name_list})
14041644

14051645
return results
1406-
1646+
14071647
@annotate(tag='license-coverage')
14081648
def license_coverage(self, repo_group_id, repo_id=None):
14091649
"""Returns the declared license
@@ -1475,11 +1715,11 @@ def license_coverage(self, repo_group_id, repo_id=None):
14751715
)b
14761716
WHERE a.name = b.name
14771717
""")
1478-
1718+
14791719
results = pd.read_sql(license_declared_SQL, self.spdx_db, params={'repo_name_list': repo_name_list})
14801720

14811721
return results
1482-
1722+
14831723
@annotate(tag='license_count')
14841724
def license_count(self, repo_group_id, repo_id=None):
14851725
"""Returns the declared license
@@ -1579,13 +1819,13 @@ def license_count(self, repo_group_id, repo_id=None):
15791819
ORDER BY license_declared_file DESC
15801820
) c
15811821
WHERE a.name = b.name
1582-
AND b.name = c.name
1822+
AND b.name = c.name
15831823
""")
1584-
1824+
15851825
results = pd.read_sql(license_declared_SQL, self.spdx_db, params={'repo_name_list': repo_name_list})
15861826

15871827
return results
1588-
1828+
15891829

15901830
@annotate(tag='issues-maintainer-response-duration')
15911831
def issues_maintainer_response_duration(self, repo_group_id, repo_id=None, begin_date=None, end_date=None):
@@ -1661,7 +1901,7 @@ def issues_maintainer_response_duration(self, repo_group_id, repo_id=None, begin
16611901

16621902
@annotate(tag='committers')
16631903
def committers(self, repo_group_id, repo_id=None, begin_date=None, end_date=None, period='day'):
1664-
1904+
16651905
if not begin_date:
16661906
begin_date = '1970-1-1 00:00:01'
16671907
if not end_date:
@@ -1706,7 +1946,7 @@ def committers(self, repo_group_id, repo_id=None, begin_date=None, end_date=None
17061946

17071947
results = pd.read_sql(committersSQL, self.db, params={'repo_id': repo_id, 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date, 'period':period})
17081948

1709-
return results
1949+
return results
17101950

17111951

17121952
#####################################

0 commit comments

Comments
 (0)