From 540022b9fabefb4591ff1d9f2be372a6ad4656fc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 27 Sep 2025 19:46:29 +0000 Subject: [PATCH 1/3] Initial plan From d1d1b8c80551a9086c15585522f99f4fdd3fb332 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 27 Sep 2025 19:49:49 +0000 Subject: [PATCH 2/3] Initial assessment and plan for implementing missing components Co-authored-by: AF-Warsame <201992579+AF-Warsame@users.noreply.github.com> --- .coverage | Bin 53248 -> 0 bytes __pycache__/run.cpython-312.pyc | Bin 0 -> 149 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 150 -> 148 bytes .../__pycache__/hf_handler.cpython-312.pyc | Bin 0 -> 1714 bytes src/acmecli/__pycache__/types.cpython-312.pyc | Bin 4206 -> 4204 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 692 -> 690 bytes .../metrics/__pycache__/base.cpython-312.pyc | Bin 381 -> 379 bytes .../license_metric.cpython-312.pyc | Bin 1308 -> 1306 bytes ...st_hf_handler.cpython-312-pytest-8.4.2.pyc | Bin 0 -> 2952 bytes ...ic_heuristics.cpython-312-pytest-8.4.2.pyc | Bin 0 -> 2254 bytes 10 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .coverage create mode 100644 __pycache__/run.cpython-312.pyc create mode 100644 src/acmecli/__pycache__/hf_handler.cpython-312.pyc create mode 100644 tests/__pycache__/test_hf_handler.cpython-312-pytest-8.4.2.pyc create mode 100644 tests/__pycache__/test_metric_heuristics.cpython-312-pytest-8.4.2.pyc diff --git a/.coverage b/.coverage deleted file mode 100644 index ac170622d430d895624681c89b1911e8c06d184a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI)O>Y}T7zglOukE$-;w_ZQilUOaKoT`c>r`q*dw{gfp`uV;3IY+~uGiyaiM{LY zuG2&nLT-^NRYHP0;#*LTd;t*R!nGU#l>?P3fe>a702T;900I#Be+zWq9Mj7a6YABE1GCnY zfoEFMi=M|%zqPz{VOd;QI(2SYMEk_dh@fSDUMvaEy)4?&6Aim5h3(XB%M5I1RRn7? zN!{^foo{rsk8X9Ga3PH6wW?jGs6eh#7H!XNnckLIms@jTg7|EK+zg@}l!>%g9Zn%; z;_zAF$%gc#V@W?M#f)8_9htrM$%tOwzhAvB!#bIsML(khwV@ros+?_5MwaWJFs0x$W3W=0$Ja+f^&?PEQHrz8_Me< zSLk9C8K0|9rx(0SWSl$>ZcyPIH(~7s@~J5}EVI*T)rkC~POHrqha#dA2kx-QVX=W5 zS2p#v+2vaH>N{{b6LP+yQB=pVFLmVh#h(+s$hoi@jyor~64U}-7|oYo+OI0qD&?ks zsY&m*s%9r}!|f_v{Az`s#y)+fpqHnn)a#eRhQd76njQGtE5(6Ev%Pyr)8XU8+YR@U z!A8QpsM!>5BkVF9EuxDs-=r|U46`$*1xf{|mSLH%{G#VBjcSEA4Qz_=l%TvRe&UO>1R<>71x)XW5JUOXe zEr!h~zFP5%KG1}cE4j-snjC6L)9-l6K=QCbGKuJol1U$lk5F~y^?Z40a#(fZbk$0{ zQt0Ygy?o$++KqYyk4yZ>4OD{n^t!+UH-hxVZ=1oI*jaYoO$DFM<;y1z3@bPnp<3bR z{Iw499=6&4q{g3)Jb7{zWUTmg&^v`xW1Yqg-k1=g!IJ zi3Sl06w}lxed?kQtD*bV%)X;?5Y2GgY5%>Mb)V zcD$JxXe19D?6sHkG~m*^o&I*hc6}VY8J&R#x9AGiB84PX{9b2dmM8eCaZjNiED(SI z1Rwwb2tWV=5P$##AOHaf>^%WZ&8Rxh|1-wVigAaYus{F;5P$##AOHafKmY;|fB*y_ z@Kg$nWwe7v@)sUQ_Nm(BMEo}Z#}_I`7LJWjtun@K#kg(!^;A|6g@ynGAOHafKmY;| zfB*y_009Ue3D@5P$## zAOHafKmY;|fB*y_0D++bm4c?&r%$ZB*e=8}m!2Ut3=BJ!{3ZTGDFT zD?a^)8~O(~^o9LJT~pT6gm|*;dV%yPW~Z_Et6#5e!}2tWV=5P$##AOHafKmY;|P?SZ@R@2O0 zd-MOoQEfd{NZ9NP<2&a6xg(lPl|wTB&mPzOQ~}8;UX+PyN{F+g7 zYPMJ`sr>nW&bXx*_l!S{KWP5{+ml^Elnw$AfB*y_009U<00Izz00bZafgJ*Qn(V9D zT=Vb$ z1w~aL009U<00Izz00bZa0SG_<0=o$C_x~~f-vtJ{AOHafKmY;|fB*y_009U<00MhR F;6HV~bx;5R diff --git a/__pycache__/run.cpython-312.pyc b/__pycache__/run.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6249259f94c0f7015ffcbd125a85307937ecb4db GIT binary patch literal 149 zcmX@j%ge<81g`csGPr^CV-N=hn4pZ$JV3^Dh7^Vr#vFzyhDt_Frk5ZoKTXD4%tfVn zD;Yk6xWAP2GxBp&^?@RJsYUwb`9<0KC8@G%qg~0}!~{-o?n!$Uy@o}0wIcvlJygl Ob5oOZGW92B>H+{{ITYyt delta 66 zcmbQjIE|6}G%qg~0}yQ7dUPVUv9_tJRZMtdQE_5!s(P`4TYgSTYEf~FOKO>}qqDDT UOmR_iOk#3wYI06y%*1qE0H1Od!TO-vg{6rTOVYij;#Fn^Bg01>g8vQ=6tps108NkA3!z@d#Vt7T_wFIl{1b}a(3 ztE%E)4poD4Fes`5r%-X>=p#~c>jmUUu_hdldg6vcrB?LPnYB$IZF$nZH}Ab~zkP4s z%>LHUP!Fh#-zg`*69M?0IW?iaE2lT9DF6r{XM$M8axsnv_W|Tr02Frk3NXemaS`#D zIn#P2RMF>5LFyL(0x=GP7!NrlKpu&45eh3}Oj;5n67f#lo6u6wM3@K}Y?3;RoD5Fs z)o-|j9;yZUoJMK10K6c}&2et#l7ie$HJanIJmfoRmRfX_P|# z2kE(l^jv|IFZ6gmDatNi@s1bPU#P=-1@eOgxPbIsfow3&k^k9yLXs8?(bn zB-40^Y~peAlaTmK&32|NXv7T!2_Uj<%$k(TCQ{rmDN@!o45ON(5szu;C}pGNYxgvo zf?>xB$C0Ba!}G||AcchE77j1-BjJclBs*;y4v{fh$ROLXiI6}JX~vpiBbDM*+tHki zO~g6dN)hk)BORq3gDyR$<#t0Pgt3JqULxAajAM2diImnb)dHmD@6d8FV>DuuAJXyJ#7L(8ft8|#Xu&m-M5lw@2@R<8CydWqD# z4XTWDx%K?g54?-As_Y(mgMR=gqTd@Rx3(81*CyBXC)4@yqt4L!!bZpD;C9Q_(BroJ zSh-`Q_*wBrabd4*&$Azx$lpKocNa$2MmIer|IK`~>}!7QyMExi{>s-|_O};?*M^^b znU5ZY`<}~N@^<&GRtgX0qmR1}eAmjs@Uw}HiOp}fwNmg?8f;&kDu;TWB{!0rOS?0> zD^gKtpMU%@HW0QWJ)u7<1F!fvB#1)T70Qea&_ z0n}{5KbXO{YPg7i847pR0{COLa`TMwOdhS2@fH4ry6kW5(w+z$$GrhfZ$RUpVt@<0 J1ys4O{R5e8nWg{$ literal 0 HcmV?d00001 diff --git a/src/acmecli/__pycache__/types.cpython-312.pyc b/src/acmecli/__pycache__/types.cpython-312.pyc index 3fc03c202c5a2c46b4d07904bf8aac9eaff39819..669d12b50aad6417c8068d1d3b0e6401063348af 100644 GIT binary patch delta 70 zcmaE-@J4~}G%qg~0}!~{-^e(}zLD=Llcs@wMt*Lpeo<*&UTTqkd45s0eo1O^34|yv UO4d(I&P`3u$<*J>$DF_m07U>7HCT$~EtC;Y_qTnA4XrY7fP>gT4G6lEqC>rc*M@&EvxhZ(#8 delta 78 zcmdnQx`mbZG%qg~0}vcAyOFVDBd-pVv8}6BOn72Zabj+&da;6AeojhiQE`k*YMHL1 gv#)DRaZz$iVsdV3a!zJUZfZ$UW^!@NnA4XrY7fP>gT4G6lEqC>u>(dsL2cf!iE}s delta 79 zcmbQmHHVA$G%qg~0}yQ7dNgD5M&8Yg#?G!*G2w|t#fiD8>ct9f`8g@6Ma3~Lsb#v3 i&c3cO#YM?6iOIRC$vK%Zxv3>ZnaRa5n?EvYG6Mh+vm2EF diff --git a/tests/__pycache__/test_hf_handler.cpython-312-pytest-8.4.2.pyc b/tests/__pycache__/test_hf_handler.cpython-312-pytest-8.4.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ffc8984bb693dabf12035faf4bd7da9e2e5ee40 GIT binary patch literal 2952 zcmeHJ&1>976xT?zpR(P?iA@4-@KT3H#o68Xqot|)5wJ^o$+`F<6wU0e9DOV^+Boa3 zz?2+PTBvVz!T&(XA0m_5jt9inOpMy<72?rIs>M^^roag_F{^H6>9#hfrdf1t~J#mfn1R zmEi-VA$Z}8X8t*&k(y;vTzO6=`AVJvM5PO7P~72pHOVWVKAC^5$tw(=QK_)c^Rn0E zK2iAq?(hf^3@^(%7r0lXEuTkFa^?IDZ{$crm8di-jnqd<*t>s%wAGW=n zRf|$g!ucCF`@h&kkzU|g>jYD zP_XU0frTimxQ>l|ij4^(&P!JB;Lz@wA`v1ENFvT!Ht~6XXg(M?Zpd?GWjC-bm$Evy zhM0z?Mc7r7_JVndftAmA^P2G)4U)r zzH9kb$UW?m$%DIb96Rv(7Qsf1>0aPJ#KXSDlVhMq#2LVJkyWz4AAOVM}Z^7c_=uuVeO-FO+6~$fo0Ra9g2@3p}nU zEfs%!yUL5|rSJ6JyQliZ4{P79jjygB=^MN2FZ7u`bii?A3>(k%#tF=?W7zoqPGUI9 zVYcEKZ0w=TEn(4pu%5Xkd=4Aqzutu};kQ23uZ-c`(~tJySf4wA;yOl0yD`8 z`!I7$n7KLNtW$k<46i@^czHt_6z7rD7iBqd)4++dC_#gYEhg5gmSZBi8OC_8c~ zz$h} ztAF$G2RdRi-+cX*zteGj6Vq?)fH;5go6|xY&tk9J61|?vd3B5sS~%JW_Cg39E=s{{@#3IGlf`?q=acbGU%DFz2Ai$_Lj(r>RWu3*UGS^CFX&zlW>!7!722 zRCu!RD8SWZMhmz&(85BfZB?L$EO7X9s@^0Hr+`6(OFX6}3;zE1ycERgq;@xnKTY89 z^fM)5qKL2Ht6?$WW$HikG9@yJIpL+iIT>huNuPPZ3RwK3oqZtuk>E8^-mK~Jg}mA% za6u$p-u>imrQ5ITw7l9Zb9Gs#+INXkj$Mm7t!|rc&(;Pp{GTx*Q$wdr_O^AEDNuon zMJ|@PXzX4wDYZ!u)vVV5ssgB%H2KGxjxOY8Q%7;Z1<@2JNmCFankqNrK9_W=D?EFR ziv=#0cBfNyR-gl(!F63h3xCZGxi zHCs0Kuqi+q4||RqI33Eet`A_!ZWy%f_goxB8IN55q3IeO)3e$1{b;X_-&rTbC+wQh z@`+~#hW)tfnjJHM6}`leNBEn76?yu&?ONJcN*fDlVfsfHJQQ;fFVBMuE5i&ndRb|X6E zcFP!D)C5O#D8z}O*Wt^tC;gVcd78hqcmJ%QJ)J$69e#WioXmW1TDZIS6?5U+Bj&OsB=RKV!tt@2Wzj@IrzPb&Da>p=ipt4t<$mkl4E1<3!Z8OA1Q!1htrYt z0?UirM`ppnZW{5`t;V+ou2YrRJH!d#+KyZ-m-q;O=d%wfv8tx!*_P{6W1?tv5-XgM xh?<`y@)6hJwJu%cS3Mv1T>EoU2a|v4>3bmFC Date: Sat, 27 Sep 2025 19:55:26 +0000 Subject: [PATCH 3/3] Implement core infrastructure: metrics, handlers, and scoring pipeline Co-authored-by: AF-Warsame <201992579+AF-Warsame@users.noreply.github.com> --- .gitignore | 50 +++++++++ requirments.txt => requirements.txt | 0 src/acmecli.egg-info/SOURCES.txt | 13 +++ src/acmecli/cache.py | 23 +++++ src/acmecli/cli.py | 62 ++++++++---- src/acmecli/github_handler.py | 95 ++++++++++++++++++ .../license_metric.cpython-312.pyc | Bin 1306 -> 2890 bytes src/acmecli/metrics/busfactor_metric.py | 64 ++++++++++++ src/acmecli/metrics/code_quality_metric.py | 78 ++++++++++++++ src/acmecli/metrics/dataset_code_metric.py | 58 +++++++++++ src/acmecli/metrics/dataset_quality_metric.py | 59 +++++++++++ src/acmecli/metrics/hf_downloads_metric.py | 7 +- src/acmecli/metrics/license_metric.py | 51 ++++++++-- src/acmecli/metrics/performance_metric.py | 52 ++++++++++ src/acmecli/metrics/rampup_metric.py | 54 ++++++++++ src/acmecli/metrics/size_metric.py | 65 ++++++++++++ src/acmecli/scoring.py | 38 +++++-- test_url.txt | 1 + 18 files changed, 732 insertions(+), 38 deletions(-) create mode 100644 .gitignore rename requirments.txt => requirements.txt (100%) create mode 100644 src/acmecli/cache.py create mode 100644 src/acmecli/github_handler.py create mode 100644 src/acmecli/metrics/busfactor_metric.py create mode 100644 src/acmecli/metrics/code_quality_metric.py create mode 100644 src/acmecli/metrics/dataset_code_metric.py create mode 100644 src/acmecli/metrics/dataset_quality_metric.py create mode 100644 src/acmecli/metrics/performance_metric.py create mode 100644 src/acmecli/metrics/rampup_metric.py create mode 100644 src/acmecli/metrics/size_metric.py create mode 100644 test_url.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..34a7ca4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Testing +.coverage +.pytest_cache/ +.tox/ +htmlcov/ + +# Virtual environments +venv/ +env/ +ENV/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log + +# Temporary files +tmp/ +temp/ \ No newline at end of file diff --git a/requirments.txt b/requirements.txt similarity index 100% rename from requirments.txt rename to requirements.txt diff --git a/src/acmecli.egg-info/SOURCES.txt b/src/acmecli.egg-info/SOURCES.txt index 7ab9ca9..0dc8322 100644 --- a/src/acmecli.egg-info/SOURCES.txt +++ b/src/acmecli.egg-info/SOURCES.txt @@ -1,6 +1,9 @@ pyproject.toml src/acmecli/__init__.py +src/acmecli/cache.py src/acmecli/cli.py +src/acmecli/github_handler.py +src/acmecli/hf_handler.py src/acmecli/reporter.py src/acmecli/scoring.py src/acmecli/types.py @@ -10,6 +13,16 @@ src/acmecli.egg-info/dependency_links.txt src/acmecli.egg-info/top_level.txt src/acmecli/metrics/__init__.py src/acmecli/metrics/base.py +src/acmecli/metrics/busfactor_metric.py +src/acmecli/metrics/code_quality_metric.py +src/acmecli/metrics/dataset_code_metric.py +src/acmecli/metrics/dataset_quality_metric.py +src/acmecli/metrics/hf_downloads_metric.py src/acmecli/metrics/license_metric.py +src/acmecli/metrics/performance_metric.py +src/acmecli/metrics/rampup_metric.py +src/acmecli/metrics/size_metric.py +tests/test_hf_handler.py +tests/test_metric_heuristics.py tests/test_metrics_contract.py tests/test_reporter_schema.py \ No newline at end of file diff --git a/src/acmecli/cache.py b/src/acmecli/cache.py new file mode 100644 index 0000000..db48b21 --- /dev/null +++ b/src/acmecli/cache.py @@ -0,0 +1,23 @@ +from typing import Dict, Optional + + +class InMemoryCache: + """Simple in-memory cache implementation.""" + + def __init__(self): + self._cache: Dict[str, bytes] = {} + self._etags: Dict[str, str] = {} + + def get(self, key: str) -> bytes | None: + """Get cached data by key.""" + return self._cache.get(key) + + def set(self, key: str, data: bytes, etag: str | None = None) -> None: + """Set cached data with optional etag.""" + self._cache[key] = data + if etag: + self._etags[key] = etag + + def get_etag(self, key: str) -> str | None: + """Get etag for cached data.""" + return self._etags.get(key) \ No newline at end of file diff --git a/src/acmecli/cli.py b/src/acmecli/cli.py index 649bd6b..e525815 100644 --- a/src/acmecli/cli.py +++ b/src/acmecli/cli.py @@ -41,6 +41,9 @@ def process_url(url: str, github_handler, hf_handler, cache): else: return None + if not meta: + return None + results = {} with concurrent.futures.ThreadPoolExecutor() as executor: future_to_metric = { @@ -48,30 +51,53 @@ def process_url(url: str, github_handler, hf_handler, cache): } for future in concurrent.futures.as_completed(future_to_metric): metric_name = future_to_metric[future] - mv = future.result() - results[metric_name] = mv + try: + mv = future.result() + results[metric_name] = mv + except Exception as e: + logging.error(f"Error computing metric {metric_name}: {e}") + # Create a default MetricValue for failed metrics + from .types import MetricValue + results[metric_name] = MetricValue(metric_name, 0.0, 0) + net_score, net_score_latency = compute_net_score(results) + + # Helper function to safely get metric values + def get_metric_value(name, default=0.0): + metric = results.get(name) + return metric.value if metric else default + + def get_metric_latency(name, default=0): + metric = results.get(name) + return metric.latency_ms if metric else default + + # Handle size_score specially since it returns a dict + size_result = results.get('size_score') + size_score_value = size_result.value if size_result else { + 'raspberry_pi': 0.0, 'jetson_nano': 0.0, 'desktop_pc': 0.0, 'aws_server': 0.0 + } + return ReportRow( name=repo_name, category="MODEL", net_score=net_score, net_score_latency=net_score_latency, - ramp_up_time=results['ramp_up_time'].value, - ramp_up_time_latency=results['ramp_up_time'].latency_ms, - bus_factor=results['bus_factor'].value, - bus_factor_latency=results['bus_factor'].latency_ms, - performance_claims=results['performance_claims'].value, - performance_claims_latency=results['performance_claims'].latency_ms, - license=results['license'].value, - license_latency=results['license'].latency_ms, - size_score=results['size_score'].value, - size_score_latency=results['size_score'].latency_ms, - dataset_and_code_score=results['dataset_and_code_score'].value, - dataset_and_code_score_latency=results['dataset_and_code_score'].latency_ms, - dataset_quality=results['dataset_quality'].value, - dataset_quality_latency=results['dataset_quality'].latency_ms, - code_quality=results['code_quality'].value, - code_quality_latency=results['code_quality'].latency_ms, + ramp_up_time=get_metric_value('ramp_up_time'), + ramp_up_time_latency=get_metric_latency('ramp_up_time'), + bus_factor=get_metric_value('bus_factor'), + bus_factor_latency=get_metric_latency('bus_factor'), + performance_claims=get_metric_value('performance_claims'), + performance_claims_latency=get_metric_latency('performance_claims'), + license=get_metric_value('license'), + license_latency=get_metric_latency('license'), + size_score=size_score_value, + size_score_latency=get_metric_latency('size_score'), + dataset_and_code_score=get_metric_value('dataset_and_code_score'), + dataset_and_code_score_latency=get_metric_latency('dataset_and_code_score'), + dataset_quality=get_metric_value('dataset_quality'), + dataset_quality_latency=get_metric_latency('dataset_quality'), + code_quality=get_metric_value('code_quality'), + code_quality_latency=get_metric_latency('code_quality'), ) def main(argv: list[str]) -> int: diff --git a/src/acmecli/github_handler.py b/src/acmecli/github_handler.py new file mode 100644 index 0000000..0f35b87 --- /dev/null +++ b/src/acmecli/github_handler.py @@ -0,0 +1,95 @@ +import requests +import logging +from typing import Dict, Any + + +class GitHubHandler: + """Handler for GitHub repository metadata fetching.""" + + def __init__(self): + self.session = requests.Session() + # Set user agent for GitHub API + self.session.headers.update({ + 'User-Agent': 'ACME-CLI/1.0', + 'Accept': 'application/vnd.github.v3+json' + }) + + def fetch_meta(self, url: str) -> Dict[str, Any]: + """Fetch repository metadata from GitHub API.""" + try: + # Parse GitHub URL: https://github.com/owner/repo + parts = url.rstrip('/').split('/') + if len(parts) < 5 or 'github.com' not in parts[2]: + logging.error(f"Invalid GitHub URL format: {url}") + return {} + + owner, repo = parts[3], parts[4] + api_url = f"https://api.github.com/repos/{owner}/{repo}" + + response = self.session.get(api_url) + response.raise_for_status() + + repo_data = response.json() + + # Fetch additional metadata + meta = { + 'name': repo_data.get('name', ''), + 'full_name': repo_data.get('full_name', ''), + 'description': repo_data.get('description', ''), + 'stars': repo_data.get('stargazers_count', 0), + 'forks': repo_data.get('forks_count', 0), + 'watchers': repo_data.get('watchers_count', 0), + 'size': repo_data.get('size', 0), # in KB + 'language': repo_data.get('language', ''), + 'topics': repo_data.get('topics', []), + 'license': repo_data.get('license', {}).get('spdx_id', '') if repo_data.get('license') else '', + 'created_at': repo_data.get('created_at', ''), + 'updated_at': repo_data.get('updated_at', ''), + 'pushed_at': repo_data.get('pushed_at', ''), + 'default_branch': repo_data.get('default_branch', 'main'), + 'open_issues_count': repo_data.get('open_issues_count', 0), + 'has_wiki': repo_data.get('has_wiki', False), + 'has_pages': repo_data.get('has_pages', False), + 'archived': repo_data.get('archived', False), + 'disabled': repo_data.get('disabled', False), + } + + # Try to fetch contributors data + try: + contributors_url = f"https://api.github.com/repos/{owner}/{repo}/contributors" + contrib_response = self.session.get(contributors_url) + if contrib_response.status_code == 200: + contributors = contrib_response.json() + meta['contributors'] = { + contrib.get('login', 'unknown'): contrib.get('contributions', 0) + for contrib in contributors[:10] # Limit to top 10 + } + else: + meta['contributors'] = {} + except Exception as e: + logging.warning(f"Failed to fetch contributors for {url}: {e}") + meta['contributors'] = {} + + # Try to fetch README + try: + readme_url = f"https://api.github.com/repos/{owner}/{repo}/readme" + readme_response = self.session.get(readme_url) + if readme_response.status_code == 200: + readme_data = readme_response.json() + import base64 + readme_content = base64.b64decode(readme_data.get('content', '')).decode('utf-8') + meta['readme_text'] = readme_content + else: + meta['readme_text'] = '' + except Exception as e: + logging.warning(f"Failed to fetch README for {url}: {e}") + meta['readme_text'] = '' + + return meta + + except requests.RequestException as e: + logging.error(f"HTTP error fetching metadata for {url}: {e}") + return {} + except Exception as e: + logging.error(f"Failed to fetch metadata for {url}: {e}") + return {} \ No newline at end of file diff --git a/src/acmecli/metrics/__pycache__/license_metric.cpython-312.pyc b/src/acmecli/metrics/__pycache__/license_metric.cpython-312.pyc index 8c4e89e17ad5f3154b09bfbbcdc2c0cfb884229a..e38ef5c4389cb2625432bd5e24225410c5d16d50 100644 GIT binary patch literal 2890 zcmb_e+iw(A7(a8{TX)&+r7e_8*NB7_*+NT%BB5xpt+k?&Na7MRna-SU2WBsxnU?Nm zHzAS4R(&Wj+Qk@cd@3O(y!s~?U%KuZ>aiv!@BnXxk|;d+ot>Fd>m?XC$$WFpx8L_W z-+lInP$+<4eRX1aV!(&cO?GgX=xo;BhRrfOA%rSIF1bm^cgd&u|3xR@=!CVFS^pGj;shO@1ehUvKN}!N!J;i- zk+cCl02YP8BIzH{nRctK4Vn?q^lZ|Mf~M!cHD~>1Oyg$*Qyd9fEp8+<4zPnI=qERx z6~O}E?WM)a>v67lt+2~~yb37u3|EVnpaR_JR(w{(MJc0mQFs67UG|oJ*I32DI*pQT zr!w{)A;R)U5p5w@(Xv>!@U)fmQ6Z~aR2VZg>QM@cI)SNZWC{lG8Bx^()IXVX3cVC6 zOpd`mq2;rRo>SK=8IakXz3{B<0rDC5I^v*B%ji9rFcq%M&GJ*|b8epF&?f;A716iC z1@0<$>MFV_P(F2l3II!k&OLHY$zqcsCK4APzT7>L&*E+}nag3)eIZZIcU#!BY+@3% zTT!!EHS})iLKr4yw~Jz*Q-$I~|FI0T^K^k6FYOuKI22N*nkP7Q%*d;XVIEJd%WZ|r z&2}Jv+(v&~L_bCc7klS=t1mQSduq|a2WxB|eXu3MWxlw=)}8(*4F>4|M8d8m+km*< zlbwvS3&`VIzMW}ruslAzczW)1b+pm8w-z6MK)T@fBwvys$@_as9+vF=J(=F-y4i2@ zd!2K=-|_GJ{-k6Vkl&83p36|bnlBWIo|&+y1oJv;Qpvy>#dvz5ND8DA%%$G*xCn{Y z$QI_DQyBk0G^{i?h9)|)n3+3VJ>Q5WYta!qG>*gHF^5^ll2%4o*AdokrS@#Tq#K5k za9Y7Bwx{khVR?K)fA;~zU;G_dd=`p$UnBNhEjr-%3o@`)`}Nxp*JOZ-mY&5l2;aeR zS;P~(Hf$6p+;!rFT@!2$ zm13iRZ**pqc^t;pgNtY-6uA-FRSWH^hY~aVss}~lGtzR$_W9|{(+mE^o_a^>M#rIA z$Dw-1i!;IH_Rjf<%M;bg;#j@?;Endfwf4jH_P&{xyB-wWdQaq^-Le$yy6-`)v3hiH zspr*3bg&T~TyAZv#x4ghOSdBNdVF~4&`={j+=vXX^M8wMt&Y|rdmk}ujX8+{chIgg zOTis0;b=WNvefrlBRbLuk2tb((mfA)<}9}q+}RWuSUUJhBRbFs53CCSM(d6R|Ml&) z#IcolM^&i4vvB&#*?K%V^XjeE*xW$1?bGm#w9>Zq%iT9R_tiT0)jRiJ=WCrk*R|TV zqxH6zW&$hW&g!AB`>ymY;Kg)3d}PUYWDUZIPQk~+CpSm6EYpB2XY<-5Bv+8f2Xgk{MdcztZw9CYxtK!gzwJSJmZLO+Y|F1N%lo?I25 zm-?!^R}s7xUb@d-q^Zf|lOD1cUNDG?1#CJ6Ii{GHFr+5rrn?^cj@cF;Coh8t`<9u{ o0a+C|j{60*{EE7MMV-HUQe59%#K;(jIREUw$8OFJHlc_Va@!r|W zh6Ez?A<{n}5d_7jN?!6O`0h(Xkc4YQ!I$Q3WraeYJZEMHnm&18e&=J(J^aqcub$@u z%FXwVR=&{z{wAjub8L*dXlw%mrlk-7)0vTKnI7nw5g3^nn3)w=StDp< zF`7qX8#n|SgFt6mU@)DV%-|L?d4pNp?mF0o892=5Hu}x3$sF|ELDP4Y_em1-T=JT0 zTDi)U#L`}=`Nbu_hTuhCj2;KlAp%^gCPb@nn1F+7AogE&RwI1qYM!kDYmO0V^%~E#-tEaScP|hGEuY z{gmv9F#M_?rPUuXi@AahPSDr{%}Wq*@iGB5EG`qABcQ@%Gr}W_fO6$^ga>fcI=A`3 z(A2G$c0L{gs=Z64`lVm0$De&ZqUg{ut%aStLqPTD;=iP-!c@+0iHoR|RSZ_S6m;Sh zofjxpoA@G MetricValue: + t0 = time.perf_counter() + + # Heuristics for bus factor (higher = safer, more distributed) + score = 0.0 + + contributors = meta.get('contributors', {}) + if contributors: + total_contributions = sum(contributors.values()) + contributor_count = len(contributors) + + if contributor_count >= 10: + score += 0.4 + elif contributor_count >= 5: + score += 0.3 + elif contributor_count >= 3: + score += 0.2 + elif contributor_count >= 2: + score += 0.1 + + # Check contribution distribution + if total_contributions > 0: + # Find the top contributor's share + max_contributions = max(contributors.values()) if contributors else 0 + top_contributor_share = max_contributions / total_contributions + + # Lower share of top contributor = better bus factor + if top_contributor_share < 0.3: + score += 0.3 + elif top_contributor_share < 0.5: + score += 0.2 + elif top_contributor_share < 0.7: + score += 0.1 + + # Organization/company backing (GitHub org vs individual) + full_name = meta.get('full_name', '') + if '/' in full_name: + owner = full_name.split('/')[0] + # Heuristic: longer names often indicate organizations + if len(owner) > 3 and not owner.islower(): + score += 0.1 + + # Forks indicate community involvement + forks = meta.get('forks', 0) + if forks > 50: + score += 0.2 + elif forks > 10: + score += 0.1 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(BusFactorMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/code_quality_metric.py b/src/acmecli/metrics/code_quality_metric.py new file mode 100644 index 0000000..c628377 --- /dev/null +++ b/src/acmecli/metrics/code_quality_metric.py @@ -0,0 +1,78 @@ +import time +from ..types import MetricValue +from .base import register + + +class CodeQualityMetric: + """Metric to assess code style, maintainability, and engineering practices.""" + name = "code_quality" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for code quality assessment + score = 0.0 + + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for testing mentions + testing_keywords = ['test', 'testing', 'pytest', 'unittest', 'coverage', 'ci', 'continuous integration'] + if any(keyword in readme_text for keyword in testing_keywords): + score += 0.3 + + # Look for documentation practices + doc_keywords = ['documentation', 'docs', 'api', 'docstring', 'readme', 'wiki'] + if any(keyword in readme_text for keyword in doc_keywords): + score += 0.2 + + # Look for code style and linting + style_keywords = ['lint', 'flake8', 'pylint', 'black', 'isort', 'pre-commit', 'style guide'] + if any(keyword in readme_text for keyword in style_keywords): + score += 0.2 + + # Look for dependency management + dep_keywords = ['requirements.txt', 'setup.py', 'pyproject.toml', 'pipfile', 'conda', 'environment'] + if any(keyword in readme_text for keyword in dep_keywords): + score += 0.1 + + # Look for version control best practices + vc_keywords = ['tag', 'release', 'version', 'changelog', 'semantic versioning'] + if any(keyword in readme_text for keyword in vc_keywords): + score += 0.1 + + # Check for popular programming language (better tooling/community) + language = meta.get('language', '').lower() + popular_languages = ['python', 'javascript', 'java', 'c++', 'typescript', 'go', 'rust'] + if language in popular_languages: + score += 0.1 + + # Check for recent activity (maintained code is generally better) + if meta.get('pushed_at'): + from datetime import datetime, timezone + try: + pushed_date = datetime.fromisoformat(meta['pushed_at'].replace('Z', '+00:00')) + now = datetime.now(timezone.utc) + days_since_push = (now - pushed_date).days + if days_since_push < 30: + score += 0.2 + elif days_since_push < 90: + score += 0.1 + except: + pass + + # Check open issues ratio (fewer issues relative to activity often indicates quality) + open_issues = meta.get('open_issues_count', 0) + stars = meta.get('stars', 0) + if stars > 0: + issue_ratio = open_issues / max(stars, 1) + if issue_ratio < 0.1: + score += 0.1 + elif issue_ratio < 0.2: + score += 0.05 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(CodeQualityMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/dataset_code_metric.py b/src/acmecli/metrics/dataset_code_metric.py new file mode 100644 index 0000000..6961681 --- /dev/null +++ b/src/acmecli/metrics/dataset_code_metric.py @@ -0,0 +1,58 @@ +import time +from ..types import MetricValue +from .base import register + + +class DatasetAndCodeMetric: + """Metric to assess availability of training dataset and code documentation.""" + name = "dataset_and_code_score" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for dataset and code availability + score = 0.0 + + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for dataset-related information + dataset_keywords = ['dataset', 'data', 'training data', 'corpus', 'benchmark'] + if any(keyword in readme_text for keyword in dataset_keywords): + score += 0.3 + + # Look for specific well-known datasets + known_datasets = ['imagenet', 'coco', 'openimages', 'wikipedia', 'common crawl', + 'glue', 'squad', 'wmt', 'pile', 'c4', 'openwebtext'] + if any(dataset in readme_text for dataset in known_datasets): + score += 0.2 + + # Look for code availability indicators + code_keywords = ['code', 'implementation', 'source', 'repository', 'github', 'script'] + if any(keyword in readme_text for keyword in code_keywords): + score += 0.2 + + # Look for example usage or demo code + example_keywords = ['example', 'demo', 'tutorial', 'usage', 'quickstart', 'getting started'] + if any(keyword in readme_text for keyword in example_keywords): + score += 0.2 + + # Look for links to external resources + if 'http' in readme_text or 'www' in readme_text: + score += 0.1 + + # Check if repository has multiple programming languages (indicates comprehensive codebase) + language = meta.get('language', '') + if language: + score += 0.1 + + # Check repository size - larger repos often have more comprehensive code/data + size_kb = meta.get('size', 0) + if size_kb > 10000: # > 10MB suggests substantial content + score += 0.1 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(DatasetAndCodeMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/dataset_quality_metric.py b/src/acmecli/metrics/dataset_quality_metric.py new file mode 100644 index 0000000..0108b51 --- /dev/null +++ b/src/acmecli/metrics/dataset_quality_metric.py @@ -0,0 +1,59 @@ +import time +from ..types import MetricValue +from .base import register + + +class DatasetQualityMetric: + """Metric to assess the quality of training/evaluation datasets used.""" + name = "dataset_quality" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for dataset quality assessment + score = 0.0 + + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for high-quality, well-known datasets + premium_datasets = ['imagenet', 'coco', 'openimages', 'wmt', 'squad', 'glue', 'superglue'] + if any(dataset in readme_text for dataset in premium_datasets): + score += 0.4 + + # Look for dataset size indicators (larger often means better) + size_indicators = ['million', 'billion', 'large-scale', 'comprehensive', 'extensive'] + if any(indicator in readme_text for indicator in size_indicators): + score += 0.2 + + # Look for data curation and cleaning mentions + quality_keywords = ['curated', 'cleaned', 'filtered', 'validated', 'annotated', 'labeled'] + if any(keyword in readme_text for keyword in quality_keywords): + score += 0.2 + + # Look for diversity and bias considerations + diversity_keywords = ['diverse', 'balanced', 'bias', 'fairness', 'representative'] + if any(keyword in readme_text for keyword in diversity_keywords): + score += 0.1 + + # Look for evaluation methodology + eval_keywords = ['evaluation', 'benchmark', 'metric', 'validation', 'test set'] + if any(keyword in readme_text for keyword in eval_keywords): + score += 0.1 + + # Check for academic/research backing (often indicates quality) + if readme_text and any(keyword in readme_text for keyword in ['paper', 'research', 'university', 'arxiv']): + score += 0.1 + + # Check repository maturity (stars, forks indicate community validation) + stars = meta.get('stars', 0) + if stars > 500: + score += 0.1 + elif stars > 100: + score += 0.05 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(DatasetQualityMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/hf_downloads_metric.py b/src/acmecli/metrics/hf_downloads_metric.py index 494ed65..600abfd 100644 --- a/src/acmecli/metrics/hf_downloads_metric.py +++ b/src/acmecli/metrics/hf_downloads_metric.py @@ -1,15 +1,18 @@ import time +from ..types import MetricValue from .base import register + class HFDownloadsMetric: name = "hf_downloads" - def score(self, meta: dict): + def score(self, meta: dict) -> MetricValue: t0 = time.perf_counter() downloads = meta.get("downloads", 0) # Heuristic: normalize to [0,1] (e.g. >10000 is 1.0, <100 is 0.1) value = min(1.0, downloads / 10000) if downloads else 0.0 latency_ms = int((time.perf_counter() - t0) * 1000) - return type("MetricValue", (), {"name": self.name, "value": value, "latency_ms": latency_ms})() + return MetricValue(self.name, value, latency_ms) + register(HFDownloadsMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/license_metric.py b/src/acmecli/metrics/license_metric.py index 838708a..28b5752 100644 --- a/src/acmecli/metrics/license_metric.py +++ b/src/acmecli/metrics/license_metric.py @@ -1,18 +1,55 @@ import time -from ..types import Metric, Signals, TargetSpec, SourceHandler, Cache, MetricValue +from ..types import MetricValue from .base import register + class LicenseMetric: + """Metric to assess license clarity and permissiveness for LGPLv2.1 compatibility.""" name = "license" - def collect(self, spec: TargetSpec, handler: SourceHandler, cache: Cache) -> Signals: - # Week 1 stub: no network; just return empty signals - return {} - - def score(self, signals: Signals) -> MetricValue: + def score(self, meta: dict) -> MetricValue: t0 = time.perf_counter() - value = 0.0 # TODO: map license presence/compatibility → [0,1] + + # Heuristics for license compatibility with LGPLv2.1 + score = 0.0 + license_name = meta.get('license', '').lower() + readme_text = meta.get('readme_text', '').lower() + + # Check for explicit license in metadata + if license_name: + # LGPLv2.1-compatible licenses (high scores) + compatible_licenses = [ + 'mit', 'bsd', 'apache', 'lgpl', 'mpl', 'cc0', 'unlicense', 'public domain' + ] + if any(lic in license_name for lic in compatible_licenses): + score += 0.8 + # Potentially compatible (medium scores) + elif any(lic in license_name for lic in ['gpl-2', 'lgpl-2']): + score += 0.6 + # Less compatible (lower scores) + elif 'gpl-3' in license_name: + score += 0.3 + else: + score += 0.2 # Some license is better than none + + # Check README for license information + if readme_text: + license_keywords = ['license', 'licensing', 'copyright', 'terms', 'legal'] + if any(keyword in readme_text for keyword in license_keywords): + score += 0.1 + + # Look for specific license mentions in README + readme_compatible = ['mit', 'bsd', 'apache', 'lgpl', 'mozilla public license'] + if any(lic in readme_text for lic in readme_compatible): + score += 0.1 + + # Penalty for no license information at all + if not license_name and 'license' not in readme_text: + score = max(0.0, score - 0.3) + + value = min(1.0, score) latency_ms = int((time.perf_counter() - t0) * 1000) return MetricValue(self.name, value, latency_ms) + register(LicenseMetric()) diff --git a/src/acmecli/metrics/performance_metric.py b/src/acmecli/metrics/performance_metric.py new file mode 100644 index 0000000..f4a93a0 --- /dev/null +++ b/src/acmecli/metrics/performance_metric.py @@ -0,0 +1,52 @@ +import time +import re +from ..types import MetricValue +from .base import register + + +class PerformanceClaimsMetric: + """Metric to assess evidence of performance claims through benchmarks and evaluations.""" + name = "performance_claims" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for performance claims evidence + score = 0.0 + + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for benchmark-related keywords + benchmark_keywords = ['benchmark', 'evaluation', 'eval', 'performance', 'accuracy', 'f1', 'bleu', 'rouge'] + if any(keyword in readme_text for keyword in benchmark_keywords): + score += 0.3 + + # Look for specific metrics or numbers indicating performance + if re.search(r'\d+\.?\d*%', readme_text) or re.search(r'score.*\d+', readme_text): + score += 0.2 + + # Look for comparison with other models + comparison_keywords = ['compared to', 'vs', 'versus', 'outperform', 'better than', 'state-of-the-art', 'sota'] + if any(keyword in readme_text for keyword in comparison_keywords): + score += 0.2 + + # Look for evaluation datasets + eval_datasets = ['glue', 'superglue', 'squad', 'coco', 'imagenet', 'wmt', 'bleu', 'rouge'] + if any(dataset in readme_text for dataset in eval_datasets): + score += 0.2 + + # Look for published papers or citations + paper_keywords = ['paper', 'arxiv', 'citation', 'published', 'acl', 'nips', 'icml', 'iclr', 'emnlp'] + if any(keyword in readme_text for keyword in paper_keywords): + score += 0.1 + + # Check if repo has releases (indicates mature development) + if meta.get('has_pages', False): # Often used for documentation/results + score += 0.1 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(PerformanceClaimsMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/rampup_metric.py b/src/acmecli/metrics/rampup_metric.py new file mode 100644 index 0000000..6dc7a12 --- /dev/null +++ b/src/acmecli/metrics/rampup_metric.py @@ -0,0 +1,54 @@ +import time +from ..types import MetricValue +from .base import register + + +class RampUpMetric: + """Metric to assess ease of ramp-up based on documentation and examples.""" + name = "ramp_up_time" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Heuristics for ramp-up time (higher = easier to ramp up) + score = 0.0 + + # Check for README content + readme_text = meta.get('readme_text', '').lower() + if readme_text: + score += 0.3 + # Look for common documentation sections + if any(keyword in readme_text for keyword in ['install', 'usage', 'example', 'quickstart', 'getting started']): + score += 0.2 + if any(keyword in readme_text for keyword in ['api', 'documentation', 'docs']): + score += 0.1 + + # Check for presence of wiki + if meta.get('has_wiki', False): + score += 0.1 + + # Check for active maintenance (recent updates) + if meta.get('pushed_at'): + from datetime import datetime, timezone + try: + pushed_date = datetime.fromisoformat(meta['pushed_at'].replace('Z', '+00:00')) + now = datetime.now(timezone.utc) + days_since_push = (now - pushed_date).days + if days_since_push < 30: + score += 0.2 + elif days_since_push < 90: + score += 0.1 + except: + pass + + # Check for stars (indication of community adoption) + stars = meta.get('stars', 0) + if stars > 100: + score += 0.1 + + value = min(1.0, score) + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, value, latency_ms) + + +register(RampUpMetric()) \ No newline at end of file diff --git a/src/acmecli/metrics/size_metric.py b/src/acmecli/metrics/size_metric.py new file mode 100644 index 0000000..c5c9208 --- /dev/null +++ b/src/acmecli/metrics/size_metric.py @@ -0,0 +1,65 @@ +import time +from typing import Dict +from ..types import MetricValue +from .base import register + + +class SizeMetric: + """Metric to assess model size compatibility with different hardware platforms.""" + name = "size_score" + + def score(self, meta: dict) -> MetricValue: + t0 = time.perf_counter() + + # Get repository size in KB + repo_size_kb = meta.get('size', 0) + + # Heuristic size thresholds for different hardware (in KB) + # Based on typical model sizes and hardware constraints + thresholds = { + 'raspberry_pi': 100_000, # ~100MB - very constrained + 'jetson_nano': 1_000_000, # ~1GB - moderate constraints + 'desktop_pc': 10_000_000, # ~10GB - good resources + 'aws_server': 50_000_000 # ~50GB - high resources + } + + # Calculate compatibility scores for each platform + scores = {} + for platform, threshold in thresholds.items(): + if repo_size_kb == 0: + # Unknown size - give moderate score + scores[platform] = 0.5 + elif repo_size_kb <= threshold * 0.1: + # Very small - excellent compatibility + scores[platform] = 1.0 + elif repo_size_kb <= threshold * 0.5: + # Small - good compatibility + scores[platform] = 0.8 + elif repo_size_kb <= threshold: + # At threshold - moderate compatibility + scores[platform] = 0.6 + elif repo_size_kb <= threshold * 2: + # Over threshold - poor compatibility + scores[platform] = 0.3 + else: + # Way over threshold - very poor compatibility + scores[platform] = 0.1 + + # Check README for size-related information + readme_text = meta.get('readme_text', '').lower() + if readme_text: + # Look for explicit size mentions + if any(keyword in readme_text for keyword in ['lightweight', 'small', 'compact', 'efficient']): + # Boost all scores slightly for models claiming to be lightweight + for platform in scores: + scores[platform] = min(1.0, scores[platform] + 0.1) + elif any(keyword in readme_text for keyword in ['large', 'heavy', 'resource-intensive']): + # Reduce scores for models explicitly stating they are large + for platform in scores: + scores[platform] = max(0.0, scores[platform] - 0.1) + + latency_ms = int((time.perf_counter() - t0) * 1000) + return MetricValue(self.name, scores, latency_ms) + + +register(SizeMetric()) \ No newline at end of file diff --git a/src/acmecli/scoring.py b/src/acmecli/scoring.py index 2603046..b020d9e 100644 --- a/src/acmecli/scoring.py +++ b/src/acmecli/scoring.py @@ -1,19 +1,35 @@ import time + def compute_net_score(results: dict): - # Weighted sum (example weights, including HF metric if present) + """Compute weighted net score from individual metric results.""" + # Weighted sum (weights should add up to 1.0) weights = { - 'license': 0.18, - 'ramp_up_time': 0.13, - 'bus_factor': 0.10, - 'performance_claims': 0.10, - 'size_score': 0.10, - 'dataset_and_code_score': 0.10, - 'dataset_quality': 0.13, - 'code_quality': 0.10, - 'hf_downloads': 0.06 + 'license': 0.20, # High weight for license compatibility + 'ramp_up_time': 0.15, # Important for ease of adoption + 'bus_factor': 0.12, # Risk management + 'performance_claims': 0.12, # Evidence of quality + 'size_score': 0.10, # Deployability concerns + 'dataset_and_code_score': 0.10, # Availability of resources + 'dataset_quality': 0.11, # Quality of training data + 'code_quality': 0.10, # Engineering practices } + t0 = time.perf_counter() - net_score = sum(results[k].value * weights[k] for k in weights if k in results) + net_score = 0.0 + + for metric_name, weight in weights.items(): + if metric_name in results: + metric_result = results[metric_name] + metric_value = metric_result.value + + # Handle size_score specially since it's a dict + if metric_name == 'size_score' and isinstance(metric_value, dict): + # Average across all platform scores + platform_scores = list(metric_value.values()) + metric_value = sum(platform_scores) / len(platform_scores) if platform_scores else 0.0 + + net_score += metric_value * weight + latency_ms = int((time.perf_counter() - t0) * 1000) return net_score, latency_ms \ No newline at end of file diff --git a/test_url.txt b/test_url.txt new file mode 100644 index 0000000..be601e7 --- /dev/null +++ b/test_url.txt @@ -0,0 +1 @@ +https://github.com/pytorch/pytorch