From 056356f7cf0163fcbf2d6a2c8071c1df6c3ec53f Mon Sep 17 00:00:00 2001 From: RussellDash332 Date: Mon, 22 Jan 2024 18:21:21 +0700 Subject: [PATCH] Extend ranklist to support query for top 100 --- README.md | 1 + autokattis/api/__init__.py | 221 ++++++++++++++++++++++--------------- setup.py | 2 +- test/main.py | 5 + test/nus.py | 5 + 5 files changed, 146 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index c429465..ac165e9 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ kt.suggest() # what's the next problem for me? ```py kt.ranklist() # people around you +kt.ranklist(top_100=True) # show top 100 kt.ranklist(country='Singapore') # country leaderboard kt.ranklist(country='SGP') # use alpha-3 code instead kt.ranklist(university='National University of Singapore') # university leaderboard diff --git a/autokattis/api/__init__.py b/autokattis/api/__init__.py index 627b4c9..e25c63c 100644 --- a/autokattis/api/__init__.py +++ b/autokattis/api/__init__.py @@ -506,54 +506,18 @@ def suggest(self): return self.Result(data) @lru_cache - def ranklist(self, country=None, university=None): + def ranklist(self, top_100=False, country=None, university=None): ''' Retrieves the current ranklist. - Country or university can be specified, but not both. + Query for top 100 takes precedence over query for country or university. + Otherwise, country or university can be specified, but not both. Default: ranklist of people around you. ''' - assert country == None or university == None, 'Both of country and university cannot be given at the same time!' - - if country == university == None: - soup = self.get_homepage() - try: - table = soup.find_all('table', class_='table2 report_grid-problems_table')[1] - except: - return self.Result([]) - if not table: - return self.Result([]) - data = [] - for row in table.tbody.find_all('tr'): - columns = row.find_all('td') - rank, name, pts, *_ = [column.text.strip() for column in columns] - rank = int(rank) if rank.isdigit() else None - pts = float(re.findall(r'[\d\.]+', pts)[0]) - findall = columns[1].find_all('a') - - new_data = { - 'rank': rank, - 'name': name, - 'points': pts, - 'country': None, - 'university': None - } - - for urlsplit, title in [(column.get('href').split('/'), column.get('title')) for column in findall]: - assert sum(x in urlsplit for x in ['users', 'universities', 'countries']) == 1, 'Only one field should be present' - if 'users' in urlsplit: - new_data['username'] = urlsplit[-1] # guaranteed to exist - elif 'universities' in urlsplit: - new_data['university_code'] = urlsplit[-1] - new_data['university'] = title - elif 'countries' in urlsplit: - new_data['country_code'] = urlsplit[-1] - new_data['country'] = title - data.append(new_data) - elif country != None: - country_code = guess_id(country, COUNTRIES) - response = self.get(f'{self.BASE_URL}/countries/{country_code}') + data = [] + if top_100: + response = self.get(f'{self.BASE_URL}/ranklist') soup = bs(response.content, features='lxml') try: table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users') @@ -561,7 +525,6 @@ def ranklist(self, country=None, university=None): return self.Result([]) if not table: return self.Result([]) - data = [] headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')] for row in table.tbody.find_all('tr'): columns = row.find_all('td') @@ -593,59 +556,143 @@ def ranklist(self, country=None, university=None): 'name': name, 'username': username, 'points': pts, - 'country_code': country_code, - 'country': COUNTRIES[country_code], + 'country_code': country_code if country else None, + 'country': country, 'subdivision_code': subdivision_code if subdivision else None, 'subdivision': subdivision if subdivision else None, 'university_code': university_code if university else None, 'university': university if university else None }) else: - university_code = guess_id(university, UNIVERSITIES) - response = self.get(f'{self.BASE_URL}/universities/{university_code}') - soup = bs(response.content, features='lxml') - table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users') - if not table: - return self.Result([]) - data = [] - headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')] - for row in table.tbody.find_all('tr'): - columns = row.find_all('td') - columns_text = [column.text.strip() for column in columns] - columns_url = [column.find_all('a') for column in columns] + assert country == None or university == None, 'Both of country and university cannot be given at the same time!' + + if country == university == None: + soup = self.get_homepage() + try: + table = soup.find_all('table', class_='table2 report_grid-problems_table')[1] + except: + return self.Result([]) + if not table: + return self.Result([]) + for row in table.tbody.find_all('tr'): + columns = row.find_all('td') + rank, name, pts, *_ = [column.text.strip() for column in columns] + rank = int(rank) if rank.isdigit() else None + pts = float(re.findall(r'[\d\.]+', pts)[0]) + findall = columns[1].find_all('a') - rank = int(columns_text[0]) - name = columns_text[1] - pts = float(columns_text[-1]) - name_urls = columns_url[1] - username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist + new_data = { + 'rank': rank, + 'name': name, + 'points': pts, + 'country': None, + 'university': None + } + + for urlsplit, title in [(column.get('href').split('/'), column.get('title')) for column in findall]: + assert sum(x in urlsplit for x in ['users', 'universities', 'countries']) == 1, 'Only one field should be present' + if 'users' in urlsplit: + new_data['username'] = urlsplit[-1] # guaranteed to exist + elif 'universities' in urlsplit: + new_data['university_code'] = urlsplit[-1] + new_data['university'] = title + elif 'countries' in urlsplit: + new_data['country_code'] = urlsplit[-1] + new_data['country'] = title + data.append(new_data) + elif country != None: + country_code = guess_id(country, COUNTRIES) + response = self.get(f'{self.BASE_URL}/countries/{country_code}') + soup = bs(response.content, features='lxml') + try: + table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users') + except: + return self.Result([]) + if not table: + return self.Result([]) + headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')] + for row in table.tbody.find_all('tr'): + columns = row.find_all('td') + columns_text = [column.text.strip() for column in columns] + columns_url = [column.find_all('a') for column in columns] + + rank = int(columns_text[0]) + name = columns_text[1] + pts = float(columns_text[-1]) + name_urls = columns_url[1] + username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist + + if 'Subdivision' in headers: + subdivision = columns_text[2] + subdivision_urls = columns_url[2] + subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None + else: + subdivision = None - if 'Country' in headers: - country = columns_text[2] - country_urls = columns_url[2] - country_code = country_urls[0].get('href').split('/')[-1] if country_urls else None - else: - country = None + if 'University' in headers: + university = columns_text[-2] + university_urls = columns_url[-2] + university_code = university_urls[0].get('href').split('/')[-1] if university_urls else None + else: + university = None - if 'Subdivision' in headers: - subdivision = columns_text[-2] - subdivision_urls = columns_url[-2] - subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None - else: - subdivision = None + data.append({ + 'rank': rank, + 'name': name, + 'username': username, + 'points': pts, + 'country_code': country_code, + 'country': COUNTRIES[country_code], + 'subdivision_code': subdivision_code if subdivision else None, + 'subdivision': subdivision if subdivision else None, + 'university_code': university_code if university else None, + 'university': university if university else None + }) + else: + university_code = guess_id(university, UNIVERSITIES) + response = self.get(f'{self.BASE_URL}/universities/{university_code}') + soup = bs(response.content, features='lxml') + table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users') + if not table: + return self.Result([]) + headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')] + for row in table.tbody.find_all('tr'): + columns = row.find_all('td') + columns_text = [column.text.strip() for column in columns] + columns_url = [column.find_all('a') for column in columns] + + rank = int(columns_text[0]) + name = columns_text[1] + pts = float(columns_text[-1]) + name_urls = columns_url[1] + username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist + + if 'Country' in headers: + country = columns_text[2] + country_urls = columns_url[2] + country_code = country_urls[0].get('href').split('/')[-1] if country_urls else None + else: + country = None - data.append({ - 'rank': rank, - 'name': name, - 'username': username, - 'points': pts, - 'country_code': country_code if country else None, - 'country': country if country else None, - 'subdivision_code': subdivision_code if subdivision else None, - 'subdivision': subdivision if subdivision else None, - 'university_code': university_code, - 'university': UNIVERSITIES[university_code] - }) + if 'Subdivision' in headers: + subdivision = columns_text[-2] + subdivision_urls = columns_url[-2] + subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None + else: + subdivision = None + + data.append({ + 'rank': rank, + 'name': name, + 'username': username, + 'points': pts, + 'country_code': country_code if country else None, + 'country': country if country else None, + 'subdivision_code': subdivision_code if subdivision else None, + 'subdivision': subdivision if subdivision else None, + 'university_code': university_code, + 'university': UNIVERSITIES[university_code] + }) return self.Result(data) @lru_cache @@ -820,7 +867,7 @@ def assignments(self, offering_id, course_id=None): 'problems': ','.join(pids) }) name, status = truncate(asg.text.strip()).split('\n') - status = status.replace('(', '').replace(')', '') + status = status.replace('(', '').replace(')', '').strip() link = self.get_base_url() + asg.find('a').get('href') aid = link.split('/')[-1] pids = [] diff --git a/setup.py b/setup.py index bd9be8e..b92b210 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup_args = dict( name='autokattis', - version='1.6', + version='1.6.1', description='Updated Kattis API wrapper', long_description_content_type="text/markdown", long_description=README, diff --git a/test/main.py b/test/main.py index 057018a..d8579f0 100644 --- a/test/main.py +++ b/test/main.py @@ -85,6 +85,11 @@ print(df:=ret.to_df()) df.to_csv('test_ranklist_default.csv', index=False) +print('=== TEST RANKLIST (TOP 100) ===') +ret = kt.ranklist(top_100=True) # show top 100 +print(df:=ret.to_df()) +df.to_csv('test_ranklist_top100.csv', index=False) + print('=== TEST RANKLIST (COUNTRY) ===') ret = kt.ranklist(country='Singapore') # country leaderboard print(df:=ret.to_df()) diff --git a/test/nus.py b/test/nus.py index 1d15f0a..39d0bfe 100644 --- a/test/nus.py +++ b/test/nus.py @@ -85,6 +85,11 @@ print(df:=ret.to_df()) df.to_csv('test_nus_ranklist_default.csv', index=False) +print('=== TEST RANKLIST (TOP 100) ===') +ret = kt.ranklist(top_100=True) # show top 100 +print(df:=ret.to_df()) +df.to_csv('test_nus_ranklist_top100.csv', index=False) + print('=== TEST RANKLIST (COUNTRY) ===') ret = kt.ranklist(country='Singapore') # country leaderboard print(df:=ret.to_df())