import re
from datetime import date, datetime, timezone
from typing import Any
import httpx
GITHUB_GRAPHQL_URL = "https://api.github.com/graphql"
class GitHubSourceError(RuntimeError):
pass
def _extract_attr(tag: str, attr: str) -> str | None:
match = re.search(rf'{attr}="([^"]+)"', tag)
return match.group(1) if match else None
def _parse_public_contributions_html(html: str, from_date: date, to_date: date) -> dict[str, int]:
tooltip_by_id: dict[str, int] = {}
for tooltip_match in re.finditer(r']*for="([^"]+)"[^>]*>(.*?)', html, flags=re.S):
cell_id = tooltip_match.group(1)
tooltip_text = re.sub(r"<[^>]+>", "", tooltip_match.group(2)).strip()
count_match = re.search(r"(\d[\d,]*)\s+contribution", tooltip_text, flags=re.I)
if not count_match:
if "No contributions" in tooltip_text:
tooltip_by_id[cell_id] = 0
continue
tooltip_by_id[cell_id] = int(count_match.group(1).replace(",", ""))
normalized: dict[str, int] = {}
for td_match in re.finditer(r"
]*ContributionCalendar-day[^>]*> | ", html, flags=re.S):
tag = td_match.group(0)
date_key = _extract_attr(tag, "data-date")
cell_id = _extract_attr(tag, "id")
if not date_key or not cell_id:
continue
if from_date.isoformat() <= date_key <= to_date.isoformat():
normalized[date_key] = tooltip_by_id.get(cell_id, 0)
return normalized
async def _fetch_github_activity_public(
username: str,
from_date: date,
to_date: date,
timeout_seconds: float,
) -> dict[str, int]:
endpoint = (
f"https://github.com/users/{username}/contributions"
f"?from={from_date.isoformat()}&to={to_date.isoformat()}"
)
headers = {
"Accept": "text/html",
"User-Agent": "git-activity-merge/0.1",
}
async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
response = await client.get(endpoint, headers=headers)
if response.status_code >= 400:
raise GitHubSourceError(
f"GitHub public contributions request failed with status {response.status_code}"
)
return _parse_public_contributions_html(response.text, from_date, to_date)
async def fetch_github_activity(
username: str,
token: str | None,
from_date: date,
to_date: date,
timeout_seconds: float = 20.0,
) -> dict[str, int]:
if not token:
return await _fetch_github_activity_public(
username=username,
from_date=from_date,
to_date=to_date,
timeout_seconds=timeout_seconds,
)
query = """
query($login: String!, $from: DateTime!, $to: DateTime!) {
user(login: $login) {
contributionsCollection(from: $from, to: $to) {
contributionCalendar {
weeks {
contributionDays {
date
contributionCount
}
}
}
}
}
}
"""
variables: dict[str, Any] = {
"login": username,
"from": datetime.combine(from_date, datetime.min.time(), tzinfo=timezone.utc).isoformat(),
"to": datetime.combine(to_date, datetime.max.time(), tzinfo=timezone.utc).isoformat(),
}
headers: dict[str, str] = {"Accept": "application/json"}
if token:
headers["Authorization"] = f"bearer {token}"
async with httpx.AsyncClient(timeout=timeout_seconds) as client:
response = await client.post(
GITHUB_GRAPHQL_URL,
headers=headers,
json={"query": query, "variables": variables},
)
if response.status_code >= 400:
raise GitHubSourceError(f"GitHub GraphQL request failed with status {response.status_code}")
payload = response.json()
if payload.get("errors"):
raise GitHubSourceError("GitHub GraphQL response included errors")
user = payload.get("data", {}).get("user")
if not user:
return {}
weeks = (
user.get("contributionsCollection", {})
.get("contributionCalendar", {})
.get("weeks", [])
)
normalized: dict[str, int] = {}
for week in weeks:
for day in week.get("contributionDays", []):
date_key = str(day.get("date", ""))
if not date_key:
continue
normalized[date_key] = int(day.get("contributionCount", 0))
return normalized