diff --git a/README.md b/README.md index 19bf476..24b17a2 100644 --- a/README.md +++ b/README.md @@ -6,3 +6,35 @@ Esau Cortez Ethan Surber Sam Brahim + +# Metrics: + +# Ramp_up_time: + +Start latency timer using time.perf_counter(). + +Look for a local README: if resource contains local_dir (a directory path), we check common README filenames (README.md, README.rst, README.txt, README) in that directory. If found, read it (UTF-8, errors replaced). + +If no local README, attempt a best-effort remote fetch (only if requests is installed) for common repo hosts: + +For GitHub: tries raw.githubusercontent.com/{owner}/{repo}/{branch}/README.md for main and master. + +For Hugging Face: tries similar raw/{branch}/README.md patterns. + +Generic fallbacks are also attempted. + +Note: this remote fetch is optional and will be skipped if requests is not present. (Testing can mock requests so no network is required.) + +If no README content is available, return score 0.0 and the elapsed latency. + +If README content is found, compute: + +Length score from the word count using thresholds (0.0 / 0.1 / 0.25 / 0.4). + +Installation score = +0.35 if README contains an "installation" heading or common install phrases (pip install, conda install, docker, etc.). + +Code snippet score = +0.25 if README contains fenced code blocks (```) or indented code lines (4 leading spaces or tabs). + +Sum weights (length + install + code) and cap at 1.0. Round score to 4 decimals. + +Return (score, latency_ms) where latency_ms is integer milliseconds (rounded). diff --git a/phase1/pytest.ini b/phase1/pytest.ini new file mode 100644 index 0000000..bcefd4f --- /dev/null +++ b/phase1/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = src diff --git a/phase1/requirements-dev.txt b/phase1/requirements-dev.txt new file mode 100644 index 0000000..0b30880 --- /dev/null +++ b/phase1/requirements-dev.txt @@ -0,0 +1,4 @@ +pytest +coverage +pytest-mock +requests-mock diff --git a/phase1/requirements.txt b/phase1/requirements.txt new file mode 100644 index 0000000..66fccf6 --- /dev/null +++ b/phase1/requirements.txt @@ -0,0 +1 @@ +requests>=2.0.0 diff --git a/phase1/requirements.txt.txt b/phase1/requirements.txt.txt deleted file mode 100644 index e69de29..0000000 diff --git a/phase1/src/__init__.py b/phase1/src/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/phase1/src/__init__.py @@ -0,0 +1 @@ + diff --git a/phase1/src/metrics/__init__.py b/phase1/src/metrics/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/phase1/src/metrics/__init__.py @@ -0,0 +1 @@ + diff --git a/phase1/src/metrics/ramp_up_time.py b/phase1/src/metrics/ramp_up_time.py index 93b2cdb..0a15f2f 100644 --- a/phase1/src/metrics/ramp_up_time.py +++ b/phase1/src/metrics/ramp_up_time.py @@ -1 +1,208 @@ -#ramp_up_time metric \ No newline at end of file +# SWE 45000, PIN FALL 2025 +# TEAM 4 +# PHASE 1 PROJECT + +#METRIC: ramp_up_time +#REQUIREMENTS SATISFIED: latency score, ramp_up_time metric score + +# DISCLAIMER: This file contains code either partially or entirely written by +# Artificial Intelligence +""" +src/metrics/ramp_up_time.py + +Metric signature: + def metric(resource: Dict[str, Any]) -> Tuple[float, int] + +The metric returns: + (score_in_[0.0,1.0], latency_ms_int) + +Scoring (total = 1.0): + - README length --> up to 0.40 (raw word count of readme) + - Installation section keyword --> up to 0.35 (does the readme include installation/startup related keywords/headers?) + - Code snippets (fenced or indented) --> up to 0.25 (are there any code snippets/examples included?) +""" +from __future__ import annotations + +import os +import re +import time +from typing import Any, Dict, Optional, Tuple + +# Threshold-based length scoring (words -> score) +def _length_score(word_count: int) -> float: + if word_count < 50: + return 0.0 + if 50 <= word_count <= 199: + return 0.1 + if 200 <= word_count <= 499: + return 0.25 + return 0.4 # >= 500 words + +# Detect installation section by heading or common install phrases +_INSTALL_RE = re.compile( + r"(^|\n)\s*(?:#{1,6}\s*)?(installation|install|setup|getting started|quickstart|usage)\b", + flags=re.I, +) +_INSTALL_PHRASES = [ + "pip install", + "conda install", + "docker", + "docker-compose", + "requirements.txt", + "setup.py", + "poetry add", +] + +# Detect code fences or indented code blocks +_CODE_FENCE_RE = re.compile(r"```") # fenced blocks +_INDENTED_CODE_RE = re.compile(r"(?m)^( {4}|\t).+") # lines starting with 4 spaces or a tab + + +def _read_local_readme(local_dir: str) -> Optional[str]: + if not local_dir: + return None + candidates = ["README.md", "README.rst", "README.txt", "README"] + for name in candidates: + p = os.path.join(local_dir, name) + if os.path.isfile(p): + try: + with open(p, "r", encoding="utf-8", errors="replace") as fh: + return fh.read() + except Exception: + # reading error -> skip to next candidate + continue + return None + + +def _try_fetch_remote_readme(url: str, timeout: float = 6.0) -> Optional[str]: + """ + Best-effort attempt to fetch README over HTTP for common hosts (GitHub, Hugging Face). + This is best-effort and optional — remote fetching is used only if requests is available. + """ + try: + import requests # requests is optional dependency; if missing, we skip remote fetch + except Exception: + return None + + url = url.rstrip("/") + # Try GitHub raw patterns + if "github.com/" in url: + # attempt to grab owner/repo from the URL + try: + parts = url.split("github.com/")[-1].split("/") + owner, repo = parts[0], parts[1] + repo = repo.replace(".git", "") + for branch in ("main", "master"): + raw = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/README.md" + try: + r = requests.get(raw, timeout=timeout) + if r.status_code == 200 and r.text.strip(): + return r.text + except Exception: + pass + except Exception: + pass + + # Try Hugging Face pattern + if "huggingface.co/" in url: + try: + hf_path = url.split("huggingface.co/")[-1].strip("/") + # try common raw endpoints + for branch in ("main", "master"): + raw = f"https://huggingface.co/{hf_path}/raw/{branch}/README.md" + try: + r = requests.get(raw, timeout=timeout) + if r.status_code == 200 and r.text.strip(): + return r.text + except Exception: + pass + except Exception: + pass + + # Generic attempt: try /raw/main/README.md and /raw/master/README.md + for suffix in ("/raw/main/README.md", "/raw/master/README.md", "/README.md"): + candidate = url + suffix + try: + r = requests.get(candidate, timeout=timeout) + if r.status_code == 200 and r.text.strip(): + return r.text + except Exception: + pass + + return None + + +def _has_install_section(content: str) -> bool: + if _INSTALL_RE.search(content): + return True + low = content.lower() + for phrase in _INSTALL_PHRASES: + if phrase in low: + return True + return False + + +def _has_code_snippet(content: str) -> bool: + if _CODE_FENCE_RE.search(content): + return True + if _INDENTED_CODE_RE.search(content): + return True + return False + + +def metric(resource: Dict[str, Any]) -> Tuple[float, int]: + """ + Compute ramp-up-time proxy score for a model resource. + + resource keys used: + - 'local_dir' (optional): path to a local clone; metric prioritizes reading README from here. + - 'url' (optional): model repo URL; used for best-effort remote fetch if local README not found. + + Returns: + (score, latency_ms) + """ + t0 = time.perf_counter() + try: + content: Optional[str] = None + + # 1) Prefer local README if present (fast and deterministic) + local_dir = resource.get("local_dir") or resource.get("local_path") or None + if local_dir and isinstance(local_dir, str): + content = _read_local_readme(local_dir) + + # 2) If no local README, try remote fetch (best-effort; requires 'requests' in env) + if content is None: + url = resource.get("url") or "" + if url: + try: + content = _try_fetch_remote_readme(url) + except Exception: + content = None # degrade gracefully + + # 3) If still none, produce score 0.0 + if not content: + score = 0.0 + latency_ms = int(round((time.perf_counter() - t0) * 1000.0)) + return float(score), int(latency_ms) + + # Normalize content + # Word count for length-based scoring + words = re.findall(r"\w+", content) + wc = len(words) + + len_score = _length_score(wc) + install_score = 0.35 if _has_install_section(content) else 0.0 + code_score = 0.25 if _has_code_snippet(content) else 0.0 + + total = len_score + install_score + code_score + if total > 1.0: + total = 1.0 + + latency_ms = int(round((time.perf_counter() - t0) * 1000.0)) + return float(round(total, 4)), latency_ms + + except Exception: + # Any unexpected error should degrade to safe default (score 0.0), + # but still measure elapsed time. + latency_ms = int(round((time.perf_counter() - t0) * 1000.0)) + return 0.0, latency_ms diff --git a/phase1/tests/integration/__pycache__/test_prepare_resource.cpython-313-pytest-8.4.2.pyc b/phase1/tests/integration/__pycache__/test_prepare_resource.cpython-313-pytest-8.4.2.pyc new file mode 100644 index 0000000..6e3b1ec Binary files /dev/null and b/phase1/tests/integration/__pycache__/test_prepare_resource.cpython-313-pytest-8.4.2.pyc differ diff --git a/phase1/tests/integration/test_prepare_resource.py b/phase1/tests/integration/test_prepare_resource.py new file mode 100644 index 0000000..11f3930 --- /dev/null +++ b/phase1/tests/integration/test_prepare_resource.py @@ -0,0 +1 @@ +#blank \ No newline at end of file diff --git a/phase1/tests/unit/__pycache__/test_ramp_up_time_local.cpython-313-pytest-8.4.2.pyc b/phase1/tests/unit/__pycache__/test_ramp_up_time_local.cpython-313-pytest-8.4.2.pyc new file mode 100644 index 0000000..68a6943 Binary files /dev/null and b/phase1/tests/unit/__pycache__/test_ramp_up_time_local.cpython-313-pytest-8.4.2.pyc differ diff --git a/phase1/tests/unit/test_ramp_up_time_local.py b/phase1/tests/unit/test_ramp_up_time_local.py new file mode 100644 index 0000000..fcd096c --- /dev/null +++ b/phase1/tests/unit/test_ramp_up_time_local.py @@ -0,0 +1,20 @@ +# tests/unit/test_ramp_up_time_local.py +from pathlib import Path +from src.metrics.ramp_up_time import metric + +def write_readme(tmp_path: Path, text: str) -> str: + p = tmp_path / "README.md" + p.write_text(text, encoding="utf-8") + return str(tmp_path) + +def test_empty_readme(tmp_path): + d = write_readme(tmp_path, "") + score, lat = metric({"local_dir": d}) + assert score == 0.0 + assert isinstance(lat, int) and lat >= 0 + +def test_length_only(tmp_path): + text = "word " * 250 + d = write_readme(tmp_path, text) + score, _ = metric({"local_dir": d}) + assert 0.24 <= score <= 0.26