Skip to content

Commit

Permalink
Extend ranklist to support query for top 100
Browse files Browse the repository at this point in the history
  • Loading branch information
RussellDash332 committed Jan 22, 2024
1 parent 0108f2e commit 056356f
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 88 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ kt.suggest() # what's the next problem for me?

```py
kt.ranklist() # people around you
kt.ranklist(top_100=True) # show top 100
kt.ranklist(country='Singapore') # country leaderboard
kt.ranklist(country='SGP') # use alpha-3 code instead
kt.ranklist(university='National University of Singapore') # university leaderboard
Expand Down
221 changes: 134 additions & 87 deletions autokattis/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,62 +506,25 @@ def suggest(self):
return self.Result(data)

@lru_cache
def ranklist(self, country=None, university=None):
def ranklist(self, top_100=False, country=None, university=None):
'''
Retrieves the current ranklist.
Country or university can be specified, but not both.
Query for top 100 takes precedence over query for country or university.
Otherwise, country or university can be specified, but not both.
Default: ranklist of people around you.
'''

assert country == None or university == None, 'Both of country and university cannot be given at the same time!'

if country == university == None:
soup = self.get_homepage()
try:
table = soup.find_all('table', class_='table2 report_grid-problems_table')[1]
except:
return self.Result([])
if not table:
return self.Result([])
data = []
for row in table.tbody.find_all('tr'):
columns = row.find_all('td')
rank, name, pts, *_ = [column.text.strip() for column in columns]
rank = int(rank) if rank.isdigit() else None
pts = float(re.findall(r'[\d\.]+', pts)[0])
findall = columns[1].find_all('a')

new_data = {
'rank': rank,
'name': name,
'points': pts,
'country': None,
'university': None
}

for urlsplit, title in [(column.get('href').split('/'), column.get('title')) for column in findall]:
assert sum(x in urlsplit for x in ['users', 'universities', 'countries']) == 1, 'Only one field should be present'
if 'users' in urlsplit:
new_data['username'] = urlsplit[-1] # guaranteed to exist
elif 'universities' in urlsplit:
new_data['university_code'] = urlsplit[-1]
new_data['university'] = title
elif 'countries' in urlsplit:
new_data['country_code'] = urlsplit[-1]
new_data['country'] = title
data.append(new_data)
elif country != None:
country_code = guess_id(country, COUNTRIES)
response = self.get(f'{self.BASE_URL}/countries/{country_code}')
data = []
if top_100:
response = self.get(f'{self.BASE_URL}/ranklist')
soup = bs(response.content, features='lxml')
try:
table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users')
except:
return self.Result([])
if not table:
return self.Result([])
data = []
headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')]
for row in table.tbody.find_all('tr'):
columns = row.find_all('td')
Expand Down Expand Up @@ -593,59 +556,143 @@ def ranklist(self, country=None, university=None):
'name': name,
'username': username,
'points': pts,
'country_code': country_code,
'country': COUNTRIES[country_code],
'country_code': country_code if country else None,
'country': country,
'subdivision_code': subdivision_code if subdivision else None,
'subdivision': subdivision if subdivision else None,
'university_code': university_code if university else None,
'university': university if university else None
})
else:
university_code = guess_id(university, UNIVERSITIES)
response = self.get(f'{self.BASE_URL}/universities/{university_code}')
soup = bs(response.content, features='lxml')
table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users')
if not table:
return self.Result([])
data = []
headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')]
for row in table.tbody.find_all('tr'):
columns = row.find_all('td')
columns_text = [column.text.strip() for column in columns]
columns_url = [column.find_all('a') for column in columns]
assert country == None or university == None, 'Both of country and university cannot be given at the same time!'

if country == university == None:
soup = self.get_homepage()
try:
table = soup.find_all('table', class_='table2 report_grid-problems_table')[1]
except:
return self.Result([])
if not table:
return self.Result([])
for row in table.tbody.find_all('tr'):
columns = row.find_all('td')
rank, name, pts, *_ = [column.text.strip() for column in columns]
rank = int(rank) if rank.isdigit() else None
pts = float(re.findall(r'[\d\.]+', pts)[0])
findall = columns[1].find_all('a')

rank = int(columns_text[0])
name = columns_text[1]
pts = float(columns_text[-1])
name_urls = columns_url[1]
username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist
new_data = {
'rank': rank,
'name': name,
'points': pts,
'country': None,
'university': None
}

for urlsplit, title in [(column.get('href').split('/'), column.get('title')) for column in findall]:
assert sum(x in urlsplit for x in ['users', 'universities', 'countries']) == 1, 'Only one field should be present'
if 'users' in urlsplit:
new_data['username'] = urlsplit[-1] # guaranteed to exist
elif 'universities' in urlsplit:
new_data['university_code'] = urlsplit[-1]
new_data['university'] = title
elif 'countries' in urlsplit:
new_data['country_code'] = urlsplit[-1]
new_data['country'] = title
data.append(new_data)
elif country != None:
country_code = guess_id(country, COUNTRIES)
response = self.get(f'{self.BASE_URL}/countries/{country_code}')
soup = bs(response.content, features='lxml')
try:
table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users')
except:
return self.Result([])
if not table:
return self.Result([])
headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')]
for row in table.tbody.find_all('tr'):
columns = row.find_all('td')
columns_text = [column.text.strip() for column in columns]
columns_url = [column.find_all('a') for column in columns]

rank = int(columns_text[0])
name = columns_text[1]
pts = float(columns_text[-1])
name_urls = columns_url[1]
username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist

if 'Subdivision' in headers:
subdivision = columns_text[2]
subdivision_urls = columns_url[2]
subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None
else:
subdivision = None

if 'Country' in headers:
country = columns_text[2]
country_urls = columns_url[2]
country_code = country_urls[0].get('href').split('/')[-1] if country_urls else None
else:
country = None
if 'University' in headers:
university = columns_text[-2]
university_urls = columns_url[-2]
university_code = university_urls[0].get('href').split('/')[-1] if university_urls else None
else:
university = None

if 'Subdivision' in headers:
subdivision = columns_text[-2]
subdivision_urls = columns_url[-2]
subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None
else:
subdivision = None
data.append({
'rank': rank,
'name': name,
'username': username,
'points': pts,
'country_code': country_code,
'country': COUNTRIES[country_code],
'subdivision_code': subdivision_code if subdivision else None,
'subdivision': subdivision if subdivision else None,
'university_code': university_code if university else None,
'university': university if university else None
})
else:
university_code = guess_id(university, UNIVERSITIES)
response = self.get(f'{self.BASE_URL}/universities/{university_code}')
soup = bs(response.content, features='lxml')
table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users')
if not table:
return self.Result([])
headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')]
for row in table.tbody.find_all('tr'):
columns = row.find_all('td')
columns_text = [column.text.strip() for column in columns]
columns_url = [column.find_all('a') for column in columns]

rank = int(columns_text[0])
name = columns_text[1]
pts = float(columns_text[-1])
name_urls = columns_url[1]
username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist

if 'Country' in headers:
country = columns_text[2]
country_urls = columns_url[2]
country_code = country_urls[0].get('href').split('/')[-1] if country_urls else None
else:
country = None

data.append({
'rank': rank,
'name': name,
'username': username,
'points': pts,
'country_code': country_code if country else None,
'country': country if country else None,
'subdivision_code': subdivision_code if subdivision else None,
'subdivision': subdivision if subdivision else None,
'university_code': university_code,
'university': UNIVERSITIES[university_code]
})
if 'Subdivision' in headers:
subdivision = columns_text[-2]
subdivision_urls = columns_url[-2]
subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None
else:
subdivision = None

data.append({
'rank': rank,
'name': name,
'username': username,
'points': pts,
'country_code': country_code if country else None,
'country': country if country else None,
'subdivision_code': subdivision_code if subdivision else None,
'subdivision': subdivision if subdivision else None,
'university_code': university_code,
'university': UNIVERSITIES[university_code]
})
return self.Result(data)

@lru_cache
Expand Down Expand Up @@ -820,7 +867,7 @@ def assignments(self, offering_id, course_id=None):
'problems': ','.join(pids)
})
name, status = truncate(asg.text.strip()).split('\n')
status = status.replace('(', '').replace(')', '')
status = status.replace('(', '').replace(')', '').strip()
link = self.get_base_url() + asg.find('a').get('href')
aid = link.split('/')[-1]
pids = []
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup_args = dict(
name='autokattis',
version='1.6',
version='1.6.1',
description='Updated Kattis API wrapper',
long_description_content_type="text/markdown",
long_description=README,
Expand Down
5 changes: 5 additions & 0 deletions test/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@
print(df:=ret.to_df())
df.to_csv('test_ranklist_default.csv', index=False)

print('=== TEST RANKLIST (TOP 100) ===')
ret = kt.ranklist(top_100=True) # show top 100
print(df:=ret.to_df())
df.to_csv('test_ranklist_top100.csv', index=False)

print('=== TEST RANKLIST (COUNTRY) ===')
ret = kt.ranklist(country='Singapore') # country leaderboard
print(df:=ret.to_df())
Expand Down
5 changes: 5 additions & 0 deletions test/nus.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@
print(df:=ret.to_df())
df.to_csv('test_nus_ranklist_default.csv', index=False)

print('=== TEST RANKLIST (TOP 100) ===')
ret = kt.ranklist(top_100=True) # show top 100
print(df:=ret.to_df())
df.to_csv('test_nus_ranklist_top100.csv', index=False)

print('=== TEST RANKLIST (COUNTRY) ===')
ret = kt.ranklist(country='Singapore') # country leaderboard
print(df:=ret.to_df())
Expand Down

0 comments on commit 056356f

Please # to comment.