diff --git a/.coverage b/.coverage deleted file mode 100644 index ac17062..0000000 Binary files a/.coverage and /dev/null differ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..34a7ca4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Testing +.coverage +.pytest_cache/ +.tox/ +htmlcov/ + +# Virtual environments +venv/ +env/ +ENV/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log + +# Temporary files +tmp/ +temp/ \ No newline at end of file diff --git a/__pycache__/run.cpython-312.pyc b/__pycache__/run.cpython-312.pyc new file mode 100644 index 0000000..6249259 Binary files /dev/null and b/__pycache__/run.cpython-312.pyc differ diff --git a/requirments.txt b/requirements.txt similarity index 100% rename from requirments.txt rename to requirements.txt diff --git a/src/acmecli.egg-info/SOURCES.txt b/src/acmecli.egg-info/SOURCES.txt index 7ab9ca9..0dc8322 100644 --- a/src/acmecli.egg-info/SOURCES.txt +++ b/src/acmecli.egg-info/SOURCES.txt @@ -1,6 +1,9 @@ pyproject.toml src/acmecli/__init__.py +src/acmecli/cache.py src/acmecli/cli.py +src/acmecli/github_handler.py +src/acmecli/hf_handler.py src/acmecli/reporter.py src/acmecli/scoring.py src/acmecli/types.py @@ -10,6 +13,16 @@ src/acmecli.egg-info/dependency_links.txt src/acmecli.egg-info/top_level.txt src/acmecli/metrics/__init__.py src/acmecli/metrics/base.py +src/acmecli/metrics/busfactor_metric.py +src/acmecli/metrics/code_quality_metric.py +src/acmecli/metrics/dataset_code_metric.py +src/acmecli/metrics/dataset_quality_metric.py +src/acmecli/metrics/hf_downloads_metric.py src/acmecli/metrics/license_metric.py +src/acmecli/metrics/performance_metric.py +src/acmecli/metrics/rampup_metric.py +src/acmecli/metrics/size_metric.py +tests/test_hf_handler.py +tests/test_metric_heuristics.py tests/test_metrics_contract.py tests/test_reporter_schema.py \ No newline at end of file diff --git a/src/acmecli/__pycache__/__init__.cpython-312.pyc b/src/acmecli/__pycache__/__init__.cpython-312.pyc index b376949..8c5a7a2 100644 Binary files a/src/acmecli/__pycache__/__init__.cpython-312.pyc and b/src/acmecli/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/acmecli/__pycache__/hf_handler.cpython-312.pyc b/src/acmecli/__pycache__/hf_handler.cpython-312.pyc new file mode 100644 index 0000000..00331c7 Binary files /dev/null and b/src/acmecli/__pycache__/hf_handler.cpython-312.pyc differ diff --git a/src/acmecli/__pycache__/types.cpython-312.pyc b/src/acmecli/__pycache__/types.cpython-312.pyc index 3fc03c2..669d12b 100644 Binary files a/src/acmecli/__pycache__/types.cpython-312.pyc and b/src/acmecli/__pycache__/types.cpython-312.pyc differ diff --git a/src/acmecli/cache.py b/src/acmecli/cache.py new file mode 100644 index 0000000..db48b21 --- /dev/null +++ b/src/acmecli/cache.py @@ -0,0 +1,23 @@ +from typing import Dict, Optional + + +class InMemoryCache: + """Simple in-memory cache implementation.""" + + def __init__(self): + self._cache: Dict[str, bytes] = {} + self._etags: Dict[str, str] = {} + + def get(self, key: str) -> bytes | None: + """Get cached data by key.""" + return self._cache.get(key) + + def set(self, key: str, data: bytes, etag: str | None = None) -> None: + """Set cached data with optional etag.""" + self._cache[key] = data + if etag: + self._etags[key] = etag + + def get_etag(self, key: str) -> str | None: + """Get etag for cached data.""" + return self._etags.get(key) \ No newline at end of file diff --git a/src/acmecli/cli.py b/src/acmecli/cli.py index 649bd6b..e525815 100644 --- a/src/acmecli/cli.py +++ b/src/acmecli/cli.py @@ -41,6 +41,9 @@ def process_url(url: str, github_handler, hf_handler, cache): else: return None + if not meta: + return None + results = {} with concurrent.futures.ThreadPoolExecutor() as executor: future_to_metric = { @@ -48,30 +51,53 @@ def process_url(url: str, github_handler, hf_handler, cache): } for future in concurrent.futures.as_completed(future_to_metric): metric_name = future_to_metric[future] - mv = future.result() - results[metric_name] = mv + try: + mv = future.result() + results[metric_name] = mv + except Exception as e: + logging.error(f"Error computing metric {metric_name}: {e}") + # Create a default MetricValue for failed metrics + from .types import MetricValue + results[metric_name] = MetricValue(metric_name, 0.0, 0) + net_score, net_score_latency = compute_net_score(results) + + # Helper function to safely get metric values + def get_metric_value(name, default=0.0): + metric = results.get(name) + return metric.value if metric else default + + def get_metric_latency(name, default=0): + metric = results.get(name) + return metric.latency_ms if metric else default + + # Handle size_score specially since it returns a dict + size_result = results.get('size_score') + size_score_value = size_result.value if size_result else { + 'raspberry_pi': 0.0, 'jetson_nano': 0.0, 'desktop_pc': 0.0, 'aws_server': 0.0 + } + return ReportRow( name=repo_name, category="MODEL", net_score=net_score, net_score_latency=net_score_latency, - ramp_up_time=results['ramp_up_time'].value, - ramp_up_time_latency=results['ramp_up_time'].latency_ms, - bus_factor=results['bus_factor'].value, - bus_factor_latency=results['bus_factor'].latency_ms, - performance_claims=results['performance_claims'].value, - performance_claims_latency=results['performance_claims'].latency_ms, - license=results['license'].value, - license_latency=results['license'].latency_ms, - size_score=results['size_score'].value, - size_score_latency=results['size_score'].latency_ms, - dataset_and_code_score=results['dataset_and_code_score'].value, - dataset_and_code_score_latency=results['dataset_and_code_score'].latency_ms, - dataset_quality=results['dataset_quality'].value, - dataset_quality_latency=results['dataset_quality'].latency_ms, - code_quality=results['code_quality'].value, - code_quality_latency=results['code_quality'].latency_ms, + ramp_up_time=get_metric_value('ramp_up_time'), + ramp_up_time_latency=get_metric_latency('ramp_up_time'), + bus_factor=get_metric_value('bus_factor'), + bus_factor_latency=get_metric_latency('bus_factor'), + performance_claims=get_metric_value('performance_claims'), + performance_claims_latency=get_metric_latency('performance_claims'), + license=get_metric_value('license'), + license_latency=get_metric_latency('license'), + size_score=size_score_value, + size_score_latency=get_metric_latency('size_score'), + dataset_and_code_score=get_metric_value('dataset_and_code_score'), + dataset_and_code_score_latency=get_metric_latency('dataset_and_code_score'), + dataset_quality=get_metric_value('dataset_quality'), + dataset_quality_latency=get_metric_latency('dataset_quality'), + code_quality=get_metric_value('code_quality'), + code_quality_latency=get_metric_latency('code_quality'), ) def main(argv: list[str]) -> int: diff --git a/src/acmecli/github_handler.py b/src/acmecli/github_handler.py new file mode 100644 index 0000000..0f35b87 --- /dev/null +++ b/src/acmecli/github_handler.py @@ -0,0 +1,95 @@ +import requests +import logging +from typing import Dict, Any + + +class GitHubHandler: + """Handler for GitHub repository metadata fetching.""" + + def __init__(self): + self.session = requests.Session() + # Set user agent for GitHub API + self.session.headers.update({ + 'User-Agent': 'ACME-CLI/1.0', + 'Accept': 'application/vnd.github.v3+json' + }) + + def fetch_meta(self, url: str) -> Dict[str, Any]: + """Fetch repository metadata from GitHub API.""" + try: + # Parse GitHub URL: https://github.com/owner/repo + parts = url.rstrip('/').split('/') + if len(parts) < 5 or 'github.com' not in parts[2]: + logging.error(f"Invalid GitHub URL format: {url}") + return {} + + owner, repo = parts[3], parts[4] + api_url = f"https://api.github.com/repos/{owner}/{repo}" + + response = self.session.get(api_url) + response.raise_for_status() + + repo_data = response.json() + + # Fetch additional metadata + meta = { + 'name': repo_data.get('name', ''), + 'full_name': repo_data.get('full_name', ''), + 'description': repo_data.get('description', ''), + 'stars': repo_data.get('stargazers_count', 0), + 'forks': repo_data.get('forks_count', 0), + 'watchers': repo_data.get('watchers_count', 0), + 'size': repo_data.get('size', 0), # in KB + 'language': repo_data.get('language', ''), + 'topics': repo_data.get('topics', []), + 'license': repo_data.get('license', {}).get('spdx_id', '') if repo_data.get('license') else '', + 'created_at': repo_data.get('created_at', ''), + 'updated_at': repo_data.get('updated_at', ''), + 'pushed_at': repo_data.get('pushed_at', ''), + 'default_branch': repo_data.get('default_branch', 'main'), + 'open_issues_count': repo_data.get('open_issues_count', 0), + 'has_wiki': repo_data.get('has_wiki', False), + 'has_pages': repo_data.get('has_pages', False), + 'archived': repo_data.get('archived', False), + 'disabled': repo_data.get('disabled', False), + } + + # Try to fetch contributors data + try: + contributors_url = f"https://api.github.com/repos/{owner}/{repo}/contributors" + contrib_response = self.session.get(contributors_url) + if contrib_response.status_code == 200: + contributors = contrib_response.json() + meta['contributors'] = { + contrib.get('login', 'unknown'): contrib.get('contributions', 0) + for contrib in contributors[:10] # Limit to top 10 + } + else: + meta['contributors'] = {} + except Exception as e: + logging.warning(f"Failed to fetch contributors for {url}: {e}") + meta['contributors'] = {} + + # Try to fetch README + try: + readme_url = f"https://api.github.com/repos/{owner}/{repo}/readme" + readme_response = self.session.get(readme_url) + if readme_response.status_code == 200: + readme_data = readme_response.json() + import base64 + readme_content = base64.b64decode(readme_data.get('content', '')).decode('utf-8') + meta['readme_text'] = readme_content + else: + meta['readme_text'] = '' + except Exception as e: + logging.warning(f"Failed to fetch README for {url}: {e}") + meta['readme_text'] = '' + + return meta + + except requests.RequestException as e: + logging.error(f"HTTP error fetching metadata for {url}: {e}") + return {} + except Exception as e: + logging.error(f"Failed to fetch metadata for {url}: {e}") + return {} \ No newline at end of file diff --git a/src/acmecli/metrics/__pycache__/__init__.cpython-312.pyc b/src/acmecli/metrics/__pycache__/__init__.cpython-312.pyc index f29c0dc..f0325a7 100644 Binary files a/src/acmecli/metrics/__pycache__/__init__.cpython-312.pyc and b/src/acmecli/metrics/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/acmecli/metrics/__pycache__/base.cpython-312.pyc b/src/acmecli/metrics/__pycache__/base.cpython-312.pyc index db346cb..5760bd8 100644 Binary files a/src/acmecli/metrics/__pycache__/base.cpython-312.pyc and b/src/acmecli/metrics/__pycache__/base.cpython-312.pyc differ diff --git a/src/acmecli/metrics/__pycache__/license_metric.cpython-312.pyc b/src/acmecli/metrics/__pycache__/license_metric.cpython-312.pyc index 1cf53e3..e38ef5c 100644 Binary files a/src/acmecli/metrics/__pycache__/license_metric.cpython-312.pyc and b/src/acmecli/metrics/__pycache__/license_metric.cpython-312.pyc differ diff --git a/src/acmecli/metrics/busfactor_metric.py b/src/acmecli/metrics/busfactor_metric.py new file mode 100644 index 0000000..5d6f142 --- /dev/null +++ b/src/acmecli/metrics/busfactor_metric.py @@ -0,0 +1,64 @@ +import time +from ..types import MetricValue +from .base import register + + +class BusFactorMetric: + """Metric to assess bus factor - higher score means less risk from key person dependency.""" + name = "bus_factor" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for bus factor (higher = safer, more distributed) + score = 0.0 + + contributors = meta.get('contributors', {}) + if contributors: + total_contributions = sum(contributors.values()) + contributor_count = len(contributors) + + if contributor_count >= 10: + score += 0.4 + elif contributor_count >= 5: + score += 0.3 + elif contributor_count >= 3: + score += 0.2 + elif contributor_count >= 2: + score += 0.1 + + # Check contribution distribution + if total_contributions > 0: + # Find the top contributor's share + max_contributions = max(contributors.values()) if contributors else 0 + top_contributor_share = max_contributions / total_contributions + + # Lower share of top contributor = better bus factor + if top_contributor_share < 0.3: + score += 0.3 + elif top_contributor_share < 0.5: + score += 0.2 + elif top_contributor_share < 0.7: + score += 0.1 + + # Organization/company backing (GitHub org vs individual) + full_name = meta.get('full_name', '') + if '/' in full_name: + owner = full_name.split('/')[0] + # Heuristic: longer names often indicate organizations + if len(owner) > 3 and not owner.islower(): + score += 0.1 + + # Forks indicate community involvement + forks = meta.get('forks', 0) + if forks > 50: + score += 0.2 + elif forks > 10: + score += 0.1 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(BusFactorMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/code_quality_metric.py b/src/acmecli/metrics/code_quality_metric.py new file mode 100644 index 0000000..c628377 --- /dev/null +++ b/src/acmecli/metrics/code_quality_metric.py @@ -0,0 +1,78 @@ +import time +from ..types import MetricValue +from .base import register + + +class CodeQualityMetric: + """Metric to assess code style, maintainability, and engineering practices.""" + name = "code_quality" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for code quality assessment + score = 0.0 + + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for testing mentions + testing_keywords = ['test', 'testing', 'pytest', 'unittest', 'coverage', 'ci', 'continuous integration'] + if any(keyword in readme_text for keyword in testing_keywords): + score += 0.3 + + # Look for documentation practices + doc_keywords = ['documentation', 'docs', 'api', 'docstring', 'readme', 'wiki'] + if any(keyword in readme_text for keyword in doc_keywords): + score += 0.2 + + # Look for code style and linting + style_keywords = ['lint', 'flake8', 'pylint', 'black', 'isort', 'pre-commit', 'style guide'] + if any(keyword in readme_text for keyword in style_keywords): + score += 0.2 + + # Look for dependency management + dep_keywords = ['requirements.txt', 'setup.py', 'pyproject.toml', 'pipfile', 'conda', 'environment'] + if any(keyword in readme_text for keyword in dep_keywords): + score += 0.1 + + # Look for version control best practices + vc_keywords = ['tag', 'release', 'version', 'changelog', 'semantic versioning'] + if any(keyword in readme_text for keyword in vc_keywords): + score += 0.1 + + # Check for popular programming language (better tooling/community) + language = meta.get('language', '').lower() + popular_languages = ['python', 'javascript', 'java', 'c++', 'typescript', 'go', 'rust'] + if language in popular_languages: + score += 0.1 + + # Check for recent activity (maintained code is generally better) + if meta.get('pushed_at'): + from datetime import datetime, timezone + try: + pushed_date = datetime.fromisoformat(meta['pushed_at'].replace('Z', '+00:00')) + now = datetime.now(timezone.utc) + days_since_push = (now - pushed_date).days + if days_since_push < 30: + score += 0.2 + elif days_since_push < 90: + score += 0.1 + except: + pass + + # Check open issues ratio (fewer issues relative to activity often indicates quality) + open_issues = meta.get('open_issues_count', 0) + stars = meta.get('stars', 0) + if stars > 0: + issue_ratio = open_issues / max(stars, 1) + if issue_ratio < 0.1: + score += 0.1 + elif issue_ratio < 0.2: + score += 0.05 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(CodeQualityMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/dataset_code_metric.py b/src/acmecli/metrics/dataset_code_metric.py new file mode 100644 index 0000000..6961681 --- /dev/null +++ b/src/acmecli/metrics/dataset_code_metric.py @@ -0,0 +1,58 @@ +import time +from ..types import MetricValue +from .base import register + + +class DatasetAndCodeMetric: + """Metric to assess availability of training dataset and code documentation.""" + name = "dataset_and_code_score" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for dataset and code availability + score = 0.0 + + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for dataset-related information + dataset_keywords = ['dataset', 'data', 'training data', 'corpus', 'benchmark'] + if any(keyword in readme_text for keyword in dataset_keywords): + score += 0.3 + + # Look for specific well-known datasets + known_datasets = ['imagenet', 'coco', 'openimages', 'wikipedia', 'common crawl', + 'glue', 'squad', 'wmt', 'pile', 'c4', 'openwebtext'] + if any(dataset in readme_text for dataset in known_datasets): + score += 0.2 + + # Look for code availability indicators + code_keywords = ['code', 'implementation', 'source', 'repository', 'github', 'script'] + if any(keyword in readme_text for keyword in code_keywords): + score += 0.2 + + # Look for example usage or demo code + example_keywords = ['example', 'demo', 'tutorial', 'usage', 'quickstart', 'getting started'] + if any(keyword in readme_text for keyword in example_keywords): + score += 0.2 + + # Look for links to external resources + if 'http' in readme_text or 'www' in readme_text: + score += 0.1 + + # Check if repository has multiple programming languages (indicates comprehensive codebase) + language = meta.get('language', '') + if language: + score += 0.1 + + # Check repository size - larger repos often have more comprehensive code/data + size_kb = meta.get('size', 0) + if size_kb > 10000: # > 10MB suggests substantial content + score += 0.1 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(DatasetAndCodeMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/dataset_quality_metric.py b/src/acmecli/metrics/dataset_quality_metric.py new file mode 100644 index 0000000..0108b51 --- /dev/null +++ b/src/acmecli/metrics/dataset_quality_metric.py @@ -0,0 +1,59 @@ +import time +from ..types import MetricValue +from .base import register + + +class DatasetQualityMetric: + """Metric to assess the quality of training/evaluation datasets used.""" + name = "dataset_quality" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for dataset quality assessment + score = 0.0 + + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for high-quality, well-known datasets + premium_datasets = ['imagenet', 'coco', 'openimages', 'wmt', 'squad', 'glue', 'superglue'] + if any(dataset in readme_text for dataset in premium_datasets): + score += 0.4 + + # Look for dataset size indicators (larger often means better) + size_indicators = ['million', 'billion', 'large-scale', 'comprehensive', 'extensive'] + if any(indicator in readme_text for indicator in size_indicators): + score += 0.2 + + # Look for data curation and cleaning mentions + quality_keywords = ['curated', 'cleaned', 'filtered', 'validated', 'annotated', 'labeled'] + if any(keyword in readme_text for keyword in quality_keywords): + score += 0.2 + + # Look for diversity and bias considerations + diversity_keywords = ['diverse', 'balanced', 'bias', 'fairness', 'representative'] + if any(keyword in readme_text for keyword in diversity_keywords): + score += 0.1 + + # Look for evaluation methodology + eval_keywords = ['evaluation', 'benchmark', 'metric', 'validation', 'test set'] + if any(keyword in readme_text for keyword in eval_keywords): + score += 0.1 + + # Check for academic/research backing (often indicates quality) + if readme_text and any(keyword in readme_text for keyword in ['paper', 'research', 'university', 'arxiv']): + score += 0.1 + + # Check repository maturity (stars, forks indicate community validation) + stars = meta.get('stars', 0) + if stars > 500: + score += 0.1 + elif stars > 100: + score += 0.05 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(DatasetQualityMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/hf_downloads_metric.py b/src/acmecli/metrics/hf_downloads_metric.py index 494ed65..600abfd 100644 --- a/src/acmecli/metrics/hf_downloads_metric.py +++ b/src/acmecli/metrics/hf_downloads_metric.py @@ -1,15 +1,18 @@ import time +from ..types import MetricValue from .base import register + class HFDownloadsMetric: name = "hf_downloads" - def score(self, meta: dict): + def score(self, meta: dict) -> MetricValue: t0 = time.perf_counter() downloads = meta.get("downloads", 0) # Heuristic: normalize to [0,1] (e.g. >10000 is 1.0, <100 is 0.1) value = min(1.0, downloads / 10000) if downloads else 0.0 latency_ms = int((time.perf_counter() - t0) * 1000) - return type("MetricValue", (), {"name": self.name, "value": value, "latency_ms": latency_ms})() + return MetricValue(self.name, value, latency_ms) + register(HFDownloadsMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/license_metric.py b/src/acmecli/metrics/license_metric.py index 838708a..28b5752 100644 --- a/src/acmecli/metrics/license_metric.py +++ b/src/acmecli/metrics/license_metric.py @@ -1,18 +1,55 @@ import time -from ..types import Metric, Signals, TargetSpec, SourceHandler, Cache, MetricValue +from ..types import MetricValue from .base import register + class LicenseMetric: + """Metric to assess license clarity and permissiveness for LGPLv2.1 compatibility.""" name = "license" - def collect(self, spec: TargetSpec, handler: SourceHandler, cache: Cache) -> Signals: - # Week 1 stub: no network; just return empty signals - return {} - - def score(self, signals: Signals) -> MetricValue: + def score(self, meta: dict) -> MetricValue: t0 = time.perf_counter() - value = 0.0 # TODO: map license presence/compatibility → [0,1] + + # Heuristics for license compatibility with LGPLv2.1 + score = 0.0 + license_name = meta.get('license', '').lower() + readme_text = meta.get('readme_text', '').lower() + + # Check for explicit license in metadata + if license_name: + # LGPLv2.1-compatible licenses (high scores) + compatible_licenses = [ + 'mit', 'bsd', 'apache', 'lgpl', 'mpl', 'cc0', 'unlicense', 'public domain' + ] + if any(lic in license_name for lic in compatible_licenses): + score += 0.8 + # Potentially compatible (medium scores) + elif any(lic in license_name for lic in ['gpl-2', 'lgpl-2']): + score += 0.6 + # Less compatible (lower scores) + elif 'gpl-3' in license_name: + score += 0.3 + else: + score += 0.2 # Some license is better than none + + # Check README for license information + if readme_text: + license_keywords = ['license', 'licensing', 'copyright', 'terms', 'legal'] + if any(keyword in readme_text for keyword in license_keywords): + score += 0.1 + + # Look for specific license mentions in README + readme_compatible = ['mit', 'bsd', 'apache', 'lgpl', 'mozilla public license'] + if any(lic in readme_text for lic in readme_compatible): + score += 0.1 + + # Penalty for no license information at all + if not license_name and 'license' not in readme_text: + score = max(0.0, score - 0.3) + + value = min(1.0, score) latency_ms = int((time.perf_counter() - t0) * 1000) return MetricValue(self.name, value, latency_ms) + register(LicenseMetric()) diff --git a/src/acmecli/metrics/performance_metric.py b/src/acmecli/metrics/performance_metric.py new file mode 100644 index 0000000..f4a93a0 --- /dev/null +++ b/src/acmecli/metrics/performance_metric.py @@ -0,0 +1,52 @@ +import time +import re +from ..types import MetricValue +from .base import register + + +class PerformanceClaimsMetric: + """Metric to assess evidence of performance claims through benchmarks and evaluations.""" + name = "performance_claims" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for performance claims evidence + score = 0.0 + + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for benchmark-related keywords + benchmark_keywords = ['benchmark', 'evaluation', 'eval', 'performance', 'accuracy', 'f1', 'bleu', 'rouge'] + if any(keyword in readme_text for keyword in benchmark_keywords): + score += 0.3 + + # Look for specific metrics or numbers indicating performance + if re.search(r'\d+\.?\d*%', readme_text) or re.search(r'score.*\d+', readme_text): + score += 0.2 + + # Look for comparison with other models + comparison_keywords = ['compared to', 'vs', 'versus', 'outperform', 'better than', 'state-of-the-art', 'sota'] + if any(keyword in readme_text for keyword in comparison_keywords): + score += 0.2 + + # Look for evaluation datasets + eval_datasets = ['glue', 'superglue', 'squad', 'coco', 'imagenet', 'wmt', 'bleu', 'rouge'] + if any(dataset in readme_text for dataset in eval_datasets): + score += 0.2 + + # Look for published papers or citations + paper_keywords = ['paper', 'arxiv', 'citation', 'published', 'acl', 'nips', 'icml', 'iclr', 'emnlp'] + if any(keyword in readme_text for keyword in paper_keywords): + score += 0.1 + + # Check if repo has releases (indicates mature development) + if meta.get('has_pages', False): # Often used for documentation/results + score += 0.1 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(PerformanceClaimsMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/rampup_metric.py b/src/acmecli/metrics/rampup_metric.py new file mode 100644 index 0000000..6dc7a12 --- /dev/null +++ b/src/acmecli/metrics/rampup_metric.py @@ -0,0 +1,54 @@ +import time +from ..types import MetricValue +from .base import register + + +class RampUpMetric: + """Metric to assess ease of ramp-up based on documentation and examples.""" + name = "ramp_up_time" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for ramp-up time (higher = easier to ramp up) + score = 0.0 + + # Check for README content + readme_text = meta.get('readme_text', '').lower() + if readme_text: + score += 0.3 + # Look for common documentation sections + if any(keyword in readme_text for keyword in ['install', 'usage', 'example', 'quickstart', 'getting started']): + score += 0.2 + if any(keyword in readme_text for keyword in ['api', 'documentation', 'docs']): + score += 0.1 + + # Check for presence of wiki + if meta.get('has_wiki', False): + score += 0.1 + + # Check for active maintenance (recent updates) + if meta.get('pushed_at'): + from datetime import datetime, timezone + try: + pushed_date = datetime.fromisoformat(meta['pushed_at'].replace('Z', '+00:00')) + now = datetime.now(timezone.utc) + days_since_push = (now - pushed_date).days + if days_since_push < 30: + score += 0.2 + elif days_since_push < 90: + score += 0.1 + except: + pass + + # Check for stars (indication of community adoption) + stars = meta.get('stars', 0) + if stars > 100: + score += 0.1 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(RampUpMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/size_metric.py b/src/acmecli/metrics/size_metric.py new file mode 100644 index 0000000..c5c9208 --- /dev/null +++ b/src/acmecli/metrics/size_metric.py @@ -0,0 +1,65 @@ +import time +from typing import Dict +from ..types import MetricValue +from .base import register + + +class SizeMetric: + """Metric to assess model size compatibility with different hardware platforms.""" + name = "size_score" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Get repository size in KB + repo_size_kb = meta.get('size', 0) + + # Heuristic size thresholds for different hardware (in KB) + # Based on typical model sizes and hardware constraints + thresholds = { + 'raspberry_pi': 100_000, # ~100MB - very constrained + 'jetson_nano': 1_000_000, # ~1GB - moderate constraints + 'desktop_pc': 10_000_000, # ~10GB - good resources + 'aws_server': 50_000_000 # ~50GB - high resources + } + + # Calculate compatibility scores for each platform + scores = {} + for platform, threshold in thresholds.items(): + if repo_size_kb == 0: + # Unknown size - give moderate score + scores[platform] = 0.5 + elif repo_size_kb <= threshold * 0.1: + # Very small - excellent compatibility + scores[platform] = 1.0 + elif repo_size_kb <= threshold * 0.5: + # Small - good compatibility + scores[platform] = 0.8 + elif repo_size_kb <= threshold: + # At threshold - moderate compatibility + scores[platform] = 0.6 + elif repo_size_kb <= threshold * 2: + # Over threshold - poor compatibility + scores[platform] = 0.3 + else: + # Way over threshold - very poor compatibility + scores[platform] = 0.1 + + # Check README for size-related information + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for explicit size mentions + if any(keyword in readme_text for keyword in ['lightweight', 'small', 'compact', 'efficient']): + # Boost all scores slightly for models claiming to be lightweight + for platform in scores: + scores[platform] = min(1.0, scores[platform] + 0.1) + elif any(keyword in readme_text for keyword in ['large', 'heavy', 'resource-intensive']): + # Reduce scores for models explicitly stating they are large + for platform in scores: + scores[platform] = max(0.0, scores[platform] - 0.1) + + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, scores, latency_ms) + + +register(SizeMetric()) \ No newline at end of file diff --git a/src/acmecli/scoring.py b/src/acmecli/scoring.py index 2603046..b020d9e 100644 --- a/src/acmecli/scoring.py +++ b/src/acmecli/scoring.py @@ -1,19 +1,35 @@ import time + def compute_net_score(results: dict): - # Weighted sum (example weights, including HF metric if present) + """Compute weighted net score from individual metric results.""" + # Weighted sum (weights should add up to 1.0) weights = { - 'license': 0.18, - 'ramp_up_time': 0.13, - 'bus_factor': 0.10, - 'performance_claims': 0.10, - 'size_score': 0.10, - 'dataset_and_code_score': 0.10, - 'dataset_quality': 0.13, - 'code_quality': 0.10, - 'hf_downloads': 0.06 + 'license': 0.20, # High weight for license compatibility + 'ramp_up_time': 0.15, # Important for ease of adoption + 'bus_factor': 0.12, # Risk management + 'performance_claims': 0.12, # Evidence of quality + 'size_score': 0.10, # Deployability concerns + 'dataset_and_code_score': 0.10, # Availability of resources + 'dataset_quality': 0.11, # Quality of training data + 'code_quality': 0.10, # Engineering practices } + t0 = time.perf_counter() - net_score = sum(results[k].value * weights[k] for k in weights if k in results) + net_score = 0.0 + + for metric_name, weight in weights.items(): + if metric_name in results: + metric_result = results[metric_name] + metric_value = metric_result.value + + # Handle size_score specially since it's a dict + if metric_name == 'size_score' and isinstance(metric_value, dict): + # Average across all platform scores + platform_scores = list(metric_value.values()) + metric_value = sum(platform_scores) / len(platform_scores) if platform_scores else 0.0 + + net_score += metric_value * weight + latency_ms = int((time.perf_counter() - t0) * 1000) return net_score, latency_ms \ No newline at end of file diff --git a/test_url.txt b/test_url.txt new file mode 100644 index 0000000..be601e7 --- /dev/null +++ b/test_url.txt @@ -0,0 +1 @@ +https://github.com/pytorch/pytorch diff --git a/tests/__pycache__/test_hf_handler.cpython-312-pytest-8.4.2.pyc b/tests/__pycache__/test_hf_handler.cpython-312-pytest-8.4.2.pyc new file mode 100644 index 0000000..7ffc898 Binary files /dev/null and b/tests/__pycache__/test_hf_handler.cpython-312-pytest-8.4.2.pyc differ diff --git a/tests/__pycache__/test_metric_heuristics.cpython-312-pytest-8.4.2.pyc b/tests/__pycache__/test_metric_heuristics.cpython-312-pytest-8.4.2.pyc new file mode 100644 index 0000000..a07ecaf Binary files /dev/null and b/tests/__pycache__/test_metric_heuristics.cpython-312-pytest-8.4.2.pyc differ