-
Notifications
You must be signed in to change notification settings - Fork 244
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
Fix github api pagination #247
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
from concurrent.futures import ThreadPoolExecutor | ||
from typing import Any, Dict, List, Optional, Tuple | ||
|
||
import aiohttp | ||
import chardet | ||
import requests | ||
from fastapi import HTTPException | ||
|
@@ -187,23 +188,71 @@ async def get_repos_for_user(self, user_id: str): | |
|
||
auth = AppAuth(app_id=app_id, private_key=private_key) | ||
jwt = auth.create_jwt() | ||
installations_url = "https://api.github.com/app/installations" | ||
|
||
# Fetch all installations with pagination | ||
all_installations = [] | ||
base_url = "https://api.github.com/app/installations" | ||
headers = { | ||
"Accept": "application/vnd.github+json", | ||
"Authorization": f"Bearer {jwt}", | ||
"X-GitHub-Api-Version": "2022-11-28", | ||
} | ||
|
||
response = requests.get(installations_url, headers=headers) | ||
|
||
if response.status_code != 200: | ||
logger.error(f"Failed to get installations. Response: {response.text}") | ||
raise HTTPException( | ||
status_code=response.status_code, | ||
detail=f"Failed to get installations: {response.text}", | ||
) | ||
|
||
all_installations = response.json() | ||
async with aiohttp.ClientSession() as session: | ||
# Get first page to determine total pages | ||
async with session.get(base_url, headers=headers) as response: | ||
if response.status != 200: | ||
error_text = await response.text() | ||
logger.error(f"Failed to get installations. Response: {error_text}") | ||
raise HTTPException( | ||
status_code=response.status, | ||
detail=f"Failed to get installations: {error_text}", | ||
) | ||
|
||
# Extract last page number from Link header | ||
last_page = 1 | ||
if "Link" in response.headers: | ||
link_header = response.headers["Link"] | ||
links = requests.utils.parse_header_links(link_header) | ||
for link in links: | ||
if link["rel"] == "last": | ||
last_page = int(link["url"].split("page=")[-1]) | ||
break | ||
|
||
# Add first page's data | ||
first_page_data = await response.json() | ||
all_installations.extend(first_page_data) | ||
|
||
logger.info(f"Total pages to fetch: {last_page}") | ||
|
||
# Generate remaining page URLs (skip page 1 since we already have it) | ||
page_urls = [f"{base_url}?page={page}" for page in range(2, last_page + 1)] | ||
|
||
# Process URLs in batches of 10 | ||
async def fetch_page(url): | ||
try: | ||
async with session.get(url, headers=headers) as response: | ||
if response.status == 200: | ||
installations = await response.json() | ||
logger.info(f"Fetched {len(installations)} installations from {url}") | ||
return installations | ||
else: | ||
error_text = await response.text() | ||
logger.error(f"Failed to fetch page {url}. Response: {error_text}") | ||
return [] | ||
except Exception as e: | ||
logger.error(f"Error fetching page {url}: {str(e)}") | ||
return [] | ||
|
||
# Process URLs in batches of 10 | ||
for i in range(0, len(page_urls), 10): | ||
batch = page_urls[i:i + 10] | ||
batch_tasks = [fetch_page(url) for url in batch] | ||
batch_results = await asyncio.gather(*batch_tasks) | ||
for installations in batch_results: | ||
all_installations.extend(installations) | ||
|
||
logger.info(f"Final number of installations collected: {len(all_installations)}") | ||
|
||
# Filter installations: user's personal installation + org installations where user is a member | ||
user_installations = [] | ||
|
@@ -217,20 +266,35 @@ async def get_repos_for_user(self, user_id: str): | |
elif account_type == "Organization" and account_login in org_logins: | ||
user_installations.append(installation) | ||
|
||
|
||
repos = [] | ||
for installation in user_installations: | ||
app_auth = auth.get_installation_auth(installation["id"]) | ||
github = Github(auth=app_auth) | ||
repos_url = installation["repositories_url"] | ||
repos_response = requests.get( | ||
repos_url, headers={"Authorization": f"Bearer {app_auth.token}"} | ||
) | ||
if repos_response.status_code == 200: | ||
repos.extend(repos_response.json().get("repositories", [])) | ||
else: | ||
logger.error( | ||
f"Failed to fetch repositories for installation ID {installation['id']}. Response: {repos_response.text}" | ||
|
||
# Handle pagination for repositories as well | ||
next_repos_url = repos_url | ||
while next_repos_url: | ||
repos_response = requests.get( | ||
next_repos_url, | ||
headers={"Authorization": f"Bearer {app_auth.token}"} | ||
) | ||
|
||
if repos_response.status_code == 200: | ||
repos.extend(repos_response.json().get("repositories", [])) | ||
|
||
# Check for next page in Link header | ||
next_repos_url = None | ||
if "links" in repos_response.__dict__: | ||
for link in repos_response.links.values(): | ||
if link.get("rel") == "next": | ||
next_repos_url = link.get("url") | ||
break | ||
else: | ||
Comment on lines
+275
to
+294
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Consider using aiohttp for repository pagination to maintain consistency. Example approach: - repos_response = requests.get(
- next_repos_url,
- headers={"Authorization": f"Bearer {app_auth.token}"}
- )
+ async with session.get(
+ next_repos_url,
+ headers={"Authorization": f"Bearer {app_auth.token}"}
+ ) as response:
+ if response.status == 200:
+ data = await response.json()
+ repos.extend(data.get("repositories", []))
+ # Continue processing
|
||
logger.error( | ||
f"Failed to fetch repositories for installation ID {installation['id']}. Response: {repos_response.text}" | ||
) | ||
|
||
# Remove duplicate repositories if any | ||
unique_repos = {repo["id"]: repo for repo in repos}.values() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove unused variable.
'github' is assigned to but never used, which might confuse readers and violate lint checks.
- github = Github(auth=app_auth) repos_url = installation["repositories_url"]
📝 Committable suggestion
🧰 Tools
🪛 Ruff (0.8.2)
273-273: Local variable
github
is assigned to but never usedRemove assignment to unused variable
github
(F841)