Skip to content

Commit

Permalink
Merge pull request #247 from potpie-ai/pagination
Browse files Browse the repository at this point in the history
Fix github api pagination
  • Loading branch information
dhirenmathur authored Feb 8, 2025
2 parents 0360f58 + 6c22fcd commit a07007c
Showing 1 changed file with 83 additions and 19 deletions.
102 changes: 83 additions & 19 deletions app/modules/code_provider/github/github_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, List, Optional, Tuple

import aiohttp
import chardet
import requests
from fastapi import HTTPException
Expand Down Expand Up @@ -187,23 +188,71 @@ async def get_repos_for_user(self, user_id: str):

auth = AppAuth(app_id=app_id, private_key=private_key)
jwt = auth.create_jwt()
installations_url = "https://api.github.com/app/installations"

# Fetch all installations with pagination
all_installations = []
base_url = "https://api.github.com/app/installations"
headers = {
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {jwt}",
"X-GitHub-Api-Version": "2022-11-28",
}

response = requests.get(installations_url, headers=headers)

if response.status_code != 200:
logger.error(f"Failed to get installations. Response: {response.text}")
raise HTTPException(
status_code=response.status_code,
detail=f"Failed to get installations: {response.text}",
)

all_installations = response.json()
async with aiohttp.ClientSession() as session:
# Get first page to determine total pages
async with session.get(base_url, headers=headers) as response:
if response.status != 200:
error_text = await response.text()
logger.error(f"Failed to get installations. Response: {error_text}")
raise HTTPException(
status_code=response.status,
detail=f"Failed to get installations: {error_text}",
)

# Extract last page number from Link header
last_page = 1
if "Link" in response.headers:
link_header = response.headers["Link"]
links = requests.utils.parse_header_links(link_header)
for link in links:
if link["rel"] == "last":
last_page = int(link["url"].split("page=")[-1])
break

# Add first page's data
first_page_data = await response.json()
all_installations.extend(first_page_data)

logger.info(f"Total pages to fetch: {last_page}")

# Generate remaining page URLs (skip page 1 since we already have it)
page_urls = [f"{base_url}?page={page}" for page in range(2, last_page + 1)]

# Process URLs in batches of 10
async def fetch_page(url):
try:
async with session.get(url, headers=headers) as response:
if response.status == 200:
installations = await response.json()
logger.info(f"Fetched {len(installations)} installations from {url}")
return installations
else:
error_text = await response.text()
logger.error(f"Failed to fetch page {url}. Response: {error_text}")
return []
except Exception as e:
logger.error(f"Error fetching page {url}: {str(e)}")
return []

# Process URLs in batches of 10
for i in range(0, len(page_urls), 10):
batch = page_urls[i:i + 10]
batch_tasks = [fetch_page(url) for url in batch]
batch_results = await asyncio.gather(*batch_tasks)
for installations in batch_results:
all_installations.extend(installations)

logger.info(f"Final number of installations collected: {len(all_installations)}")

# Filter installations: user's personal installation + org installations where user is a member
user_installations = []
Expand All @@ -217,20 +266,35 @@ async def get_repos_for_user(self, user_id: str):
elif account_type == "Organization" and account_login in org_logins:
user_installations.append(installation)


repos = []
for installation in user_installations:
app_auth = auth.get_installation_auth(installation["id"])
github = Github(auth=app_auth)
repos_url = installation["repositories_url"]
repos_response = requests.get(
repos_url, headers={"Authorization": f"Bearer {app_auth.token}"}
)
if repos_response.status_code == 200:
repos.extend(repos_response.json().get("repositories", []))
else:
logger.error(
f"Failed to fetch repositories for installation ID {installation['id']}. Response: {repos_response.text}"

# Handle pagination for repositories as well
next_repos_url = repos_url
while next_repos_url:
repos_response = requests.get(
next_repos_url,
headers={"Authorization": f"Bearer {app_auth.token}"}
)

if repos_response.status_code == 200:
repos.extend(repos_response.json().get("repositories", []))

# Check for next page in Link header
next_repos_url = None
if "links" in repos_response.__dict__:
for link in repos_response.links.values():
if link.get("rel") == "next":
next_repos_url = link.get("url")
break
else:
logger.error(
f"Failed to fetch repositories for installation ID {installation['id']}. Response: {repos_response.text}"
)

# Remove duplicate repositories if any
unique_repos = {repo["id"]: repo for repo in repos}.values()
Expand Down

0 comments on commit a07007c

Please # to comment.