Skip to content

Commit

Permalink
Merge pull request #1 from AF-Warsame/copilot/fix-f985b0bd-ab02-43f9-…
Browse files Browse the repository at this point in the history
…8c87-310fa6d06ab9
  • Loading branch information
AF-Warsame authored and GitHub committed Sep 27, 2025
2 parents 922fbc1 + e9bbad2 commit 18b1746
Show file tree
Hide file tree
Showing 27 changed files with 732 additions and 38 deletions.
Binary file removed .coverage
Binary file not shown.
50 changes: 50 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# Testing
.coverage
.pytest_cache/
.tox/
htmlcov/

# Virtual environments
venv/
env/
ENV/

# IDE
.vscode/
.idea/
*.swp
*.swo

# OS
.DS_Store
Thumbs.db

# Logs
*.log

# Temporary files
tmp/
temp/
Binary file added __pycache__/run.cpython-312.pyc
Binary file not shown.
File renamed without changes.
13 changes: 13 additions & 0 deletions src/acmecli.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
pyproject.toml
src/acmecli/__init__.py
src/acmecli/cache.py
src/acmecli/cli.py
src/acmecli/github_handler.py
src/acmecli/hf_handler.py
src/acmecli/reporter.py
src/acmecli/scoring.py
src/acmecli/types.py
Expand All @@ -10,6 +13,16 @@ src/acmecli.egg-info/dependency_links.txt
src/acmecli.egg-info/top_level.txt
src/acmecli/metrics/__init__.py
src/acmecli/metrics/base.py
src/acmecli/metrics/busfactor_metric.py
src/acmecli/metrics/code_quality_metric.py
src/acmecli/metrics/dataset_code_metric.py
src/acmecli/metrics/dataset_quality_metric.py
src/acmecli/metrics/hf_downloads_metric.py
src/acmecli/metrics/license_metric.py
src/acmecli/metrics/performance_metric.py
src/acmecli/metrics/rampup_metric.py
src/acmecli/metrics/size_metric.py
tests/test_hf_handler.py
tests/test_metric_heuristics.py
tests/test_metrics_contract.py
tests/test_reporter_schema.py
Binary file modified src/acmecli/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file modified src/acmecli/__pycache__/types.cpython-312.pyc
Binary file not shown.
23 changes: 23 additions & 0 deletions src/acmecli/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from typing import Dict, Optional


class InMemoryCache:
"""Simple in-memory cache implementation."""

def __init__(self):
self._cache: Dict[str, bytes] = {}
self._etags: Dict[str, str] = {}

def get(self, key: str) -> bytes | None:
"""Get cached data by key."""
return self._cache.get(key)

def set(self, key: str, data: bytes, etag: str | None = None) -> None:
"""Set cached data with optional etag."""
self._cache[key] = data
if etag:
self._etags[key] = etag

def get_etag(self, key: str) -> str | None:
"""Get etag for cached data."""
return self._etags.get(key)
62 changes: 44 additions & 18 deletions src/acmecli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,37 +41,63 @@ def process_url(url: str, github_handler, hf_handler, cache):
else:
return None

if not meta:
return None

results = {}
with concurrent.futures.ThreadPoolExecutor() as executor:
future_to_metric = {
executor.submit(m.score, meta): m.name for m in REGISTRY
}
for future in concurrent.futures.as_completed(future_to_metric):
metric_name = future_to_metric[future]
mv = future.result()
results[metric_name] = mv
try:
mv = future.result()
results[metric_name] = mv
except Exception as e:
logging.error(f"Error computing metric {metric_name}: {e}")
# Create a default MetricValue for failed metrics
from .types import MetricValue
results[metric_name] = MetricValue(metric_name, 0.0, 0)

net_score, net_score_latency = compute_net_score(results)

# Helper function to safely get metric values
def get_metric_value(name, default=0.0):
metric = results.get(name)
return metric.value if metric else default

def get_metric_latency(name, default=0):
metric = results.get(name)
return metric.latency_ms if metric else default

# Handle size_score specially since it returns a dict
size_result = results.get('size_score')
size_score_value = size_result.value if size_result else {
'raspberry_pi': 0.0, 'jetson_nano': 0.0, 'desktop_pc': 0.0, 'aws_server': 0.0
}

return ReportRow(
name=repo_name,
category="MODEL",
net_score=net_score,
net_score_latency=net_score_latency,
ramp_up_time=results['ramp_up_time'].value,
ramp_up_time_latency=results['ramp_up_time'].latency_ms,
bus_factor=results['bus_factor'].value,
bus_factor_latency=results['bus_factor'].latency_ms,
performance_claims=results['performance_claims'].value,
performance_claims_latency=results['performance_claims'].latency_ms,
license=results['license'].value,
license_latency=results['license'].latency_ms,
size_score=results['size_score'].value,
size_score_latency=results['size_score'].latency_ms,
dataset_and_code_score=results['dataset_and_code_score'].value,
dataset_and_code_score_latency=results['dataset_and_code_score'].latency_ms,
dataset_quality=results['dataset_quality'].value,
dataset_quality_latency=results['dataset_quality'].latency_ms,
code_quality=results['code_quality'].value,
code_quality_latency=results['code_quality'].latency_ms,
ramp_up_time=get_metric_value('ramp_up_time'),
ramp_up_time_latency=get_metric_latency('ramp_up_time'),
bus_factor=get_metric_value('bus_factor'),
bus_factor_latency=get_metric_latency('bus_factor'),
performance_claims=get_metric_value('performance_claims'),
performance_claims_latency=get_metric_latency('performance_claims'),
license=get_metric_value('license'),
license_latency=get_metric_latency('license'),
size_score=size_score_value,
size_score_latency=get_metric_latency('size_score'),
dataset_and_code_score=get_metric_value('dataset_and_code_score'),
dataset_and_code_score_latency=get_metric_latency('dataset_and_code_score'),
dataset_quality=get_metric_value('dataset_quality'),
dataset_quality_latency=get_metric_latency('dataset_quality'),
code_quality=get_metric_value('code_quality'),
code_quality_latency=get_metric_latency('code_quality'),
)

def main(argv: list[str]) -> int:
Expand Down
95 changes: 95 additions & 0 deletions src/acmecli/github_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import requests
import logging
from typing import Dict, Any


class GitHubHandler:
"""Handler for GitHub repository metadata fetching."""

def __init__(self):
self.session = requests.Session()
# Set user agent for GitHub API
self.session.headers.update({
'User-Agent': 'ACME-CLI/1.0',
'Accept': 'application/vnd.github.v3+json'
})

def fetch_meta(self, url: str) -> Dict[str, Any]:
"""Fetch repository metadata from GitHub API."""
try:
# Parse GitHub URL: https://github.com/owner/repo
parts = url.rstrip('/').split('/')
if len(parts) < 5 or 'github.com' not in parts[2]:
logging.error(f"Invalid GitHub URL format: {url}")
return {}

owner, repo = parts[3], parts[4]
api_url = f"https://api.github.com/repos/{owner}/{repo}"

response = self.session.get(api_url)
response.raise_for_status()

repo_data = response.json()

# Fetch additional metadata
meta = {
'name': repo_data.get('name', ''),
'full_name': repo_data.get('full_name', ''),
'description': repo_data.get('description', ''),
'stars': repo_data.get('stargazers_count', 0),
'forks': repo_data.get('forks_count', 0),
'watchers': repo_data.get('watchers_count', 0),
'size': repo_data.get('size', 0), # in KB
'language': repo_data.get('language', ''),
'topics': repo_data.get('topics', []),
'license': repo_data.get('license', {}).get('spdx_id', '') if repo_data.get('license') else '',
'created_at': repo_data.get('created_at', ''),
'updated_at': repo_data.get('updated_at', ''),
'pushed_at': repo_data.get('pushed_at', ''),
'default_branch': repo_data.get('default_branch', 'main'),
'open_issues_count': repo_data.get('open_issues_count', 0),
'has_wiki': repo_data.get('has_wiki', False),
'has_pages': repo_data.get('has_pages', False),
'archived': repo_data.get('archived', False),
'disabled': repo_data.get('disabled', False),
}

# Try to fetch contributors data
try:
contributors_url = f"https://api.github.com/repos/{owner}/{repo}/contributors"
contrib_response = self.session.get(contributors_url)
if contrib_response.status_code == 200:
contributors = contrib_response.json()
meta['contributors'] = {
contrib.get('login', 'unknown'): contrib.get('contributions', 0)
for contrib in contributors[:10] # Limit to top 10
}
else:
meta['contributors'] = {}
except Exception as e:
logging.warning(f"Failed to fetch contributors for {url}: {e}")
meta['contributors'] = {}

# Try to fetch README
try:
readme_url = f"https://api.github.com/repos/{owner}/{repo}/readme"
readme_response = self.session.get(readme_url)
if readme_response.status_code == 200:
readme_data = readme_response.json()
import base64
readme_content = base64.b64decode(readme_data.get('content', '')).decode('utf-8')
meta['readme_text'] = readme_content
else:
meta['readme_text'] = ''
except Exception as e:
logging.warning(f"Failed to fetch README for {url}: {e}")
meta['readme_text'] = ''

return meta

except requests.RequestException as e:
logging.error(f"HTTP error fetching metadata for {url}: {e}")
return {}
except Exception as e:
logging.error(f"Failed to fetch metadata for {url}: {e}")
return {}
Binary file modified src/acmecli/metrics/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file modified src/acmecli/metrics/__pycache__/base.cpython-312.pyc
Binary file not shown.
Binary file modified src/acmecli/metrics/__pycache__/license_metric.cpython-312.pyc
Binary file not shown.
64 changes: 64 additions & 0 deletions src/acmecli/metrics/busfactor_metric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import time
from ..types import MetricValue
from .base import register


class BusFactorMetric:
"""Metric to assess bus factor - higher score means less risk from key person dependency."""
name = "bus_factor"

def score(self, meta: dict) -> MetricValue:
t0 = time.perf_counter()

# Heuristics for bus factor (higher = safer, more distributed)
score = 0.0

contributors = meta.get('contributors', {})
if contributors:
total_contributions = sum(contributors.values())
contributor_count = len(contributors)

if contributor_count >= 10:
score += 0.4
elif contributor_count >= 5:
score += 0.3
elif contributor_count >= 3:
score += 0.2
elif contributor_count >= 2:
score += 0.1

# Check contribution distribution
if total_contributions > 0:
# Find the top contributor's share
max_contributions = max(contributors.values()) if contributors else 0
top_contributor_share = max_contributions / total_contributions

# Lower share of top contributor = better bus factor
if top_contributor_share < 0.3:
score += 0.3
elif top_contributor_share < 0.5:
score += 0.2
elif top_contributor_share < 0.7:
score += 0.1

# Organization/company backing (GitHub org vs individual)
full_name = meta.get('full_name', '')
if '/' in full_name:
owner = full_name.split('/')[0]
# Heuristic: longer names often indicate organizations
if len(owner) > 3 and not owner.islower():
score += 0.1

# Forks indicate community involvement
forks = meta.get('forks', 0)
if forks > 50:
score += 0.2
elif forks > 10:
score += 0.1

value = min(1.0, score)
latency_ms = int((time.perf_counter() - t0) * 1000)
return MetricValue(self.name, value, latency_ms)


register(BusFactorMetric())
Loading

0 comments on commit 18b1746

Please sign in to comment.