StrachanGroup · ngholiza · Jun 16, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/api/data_loader_pg.py b/api/data_loader_pg.py
diff --git a/api/fair_archiver.py b/api/fair_archiver.py
@@ -159,7 +159,9 @@ def _load_domain_config(equipment_id: str) -> dict[str, Any] | None:
     """
     try:
         from domain_configs import load_domain_config
-        return load_domain_config(equipment_id)
+        config = load_domain_config(equipment_id)
+        if config:
+            return _normalize_pg_domain_config(config)
     except Exception:
         pass
 
@@ -168,12 +170,58 @@ def _load_domain_config(equipment_id: str) -> dict[str, Any] | None:
     config_path = configs_dir / f"{equipment_id}.json"
     if config_path.exists():
         try:
-            return json.loads(config_path.read_text())
+            return _normalize_pg_domain_config(json.loads(config_path.read_text()))
         except Exception as exc:
             logger.debug("Could not load domain config %s: %s", equipment_id, exc)
+
+    # Fallback: dynamically registered equipment have no JSON file on disk;
+    # their config lives in PostgreSQL. Pull config_json so FAIR metadata still
+    # carries registered units, ontology, hardware, and owner info.
+    if equipment_id and equipment_id != "unknown":
+        try:
+            from metadata_pg import get_equipment_pg
+
+            equipment = get_equipment_pg(equipment_id)
+            if equipment:
+                config = equipment.get("config_json")
+                if isinstance(config, dict) and config:
+                    return _normalize_pg_domain_config(config)
+        except Exception as exc:
+            logger.debug("Could not load PG config for %s: %s", equipment_id, exc)
     return None
 
 
+def _normalize_pg_domain_config(config: dict[str, Any]) -> dict[str, Any]:
+    """Normalize a registered equipment config (whether loaded from its JSON
+    snapshot or from PostgreSQL) so the FAIR metadata generator (which reads
+    hardware/identity fields from ``config["domain"]``) sees the fields
+    ``build_domain_config()`` stores at the config's top level.
+
+    Returns a shallow copy with a ``domain`` dict that backfills (without
+    overwriting) the registered hardware/identity fields. A config that already
+    carries everything under ``domain`` (e.g. the canonical etcher) is unchanged.
+    """
+    if not isinstance(config, dict):
+        return config
+    normalized = dict(config)
+    domain = dict(normalized.get("domain") or {})
+    for field in (
+        "manufacturer",
+        "model",
+        "serial_number",
+        "location",
+        "equipment_type",
+        "sosa_type",
+        "ontology_uri",
+        "id",
+        "name",
+    ):
+        if not domain.get(field) and config.get(field):
+            domain[field] = config[field]
+    normalized["domain"] = domain
+    return normalized
+
+
 def _get_feature_ontology_map(domain_config: dict[str, Any] | None) -> dict[str, dict[str, str]]:
     """
     Build a mapping from feature/target IDs to their QUDT ontology URIs
@@ -212,6 +260,36 @@ def _get_feature_ontology_map(domain_config: dict[str, Any] | None) -> dict[str,
                 entry["qudt_unit"] = target["qudt_unit"]
             mapping[tid] = entry
 
+    # Registered input parameters (set-points captured at registration; always
+    # input). Generic equipment often declare their inputs here rather than under
+    # ``features``, so without this their FAIR metadata would lose units/provenance.
+    for parameter in domain_config.get("parameters", []):
+        if not isinstance(parameter, dict):
+            continue
+        pid = parameter.get("name") or parameter.get("id")
+        if not pid:
+            continue
+        entry = {"unit": parameter.get("unit", ""), "prov_direction": "input"}
+        if parameter.get("qudt_quantity_kind"):
+            entry["qudt_quantity_kind"] = parameter["qudt_quantity_kind"]
+        if parameter.get("qudt_unit"):
+            entry["qudt_unit"] = parameter["qudt_unit"]
+        mapping.setdefault(str(pid), entry)
+
+    # Registered outputs (measured results captured at registration; always output)
+    for output in domain_config.get("outputs", []):
+        if not isinstance(output, dict):
+            continue
+        oid = output.get("name") or output.get("id")
+        if not oid:
+            continue
+        entry = {"unit": output.get("unit", ""), "prov_direction": "output"}
+        if output.get("qudt_quantity_kind"):
+            entry["qudt_quantity_kind"] = output["qudt_quantity_kind"]
+        if output.get("qudt_unit"):
+            entry["qudt_unit"] = output["qudt_unit"]
+        mapping.setdefault(str(oid), entry)
+
     return mapping
 
 
@@ -764,6 +842,9 @@ def run_to_jsonld(run: dict[str, Any]) -> dict[str, Any]:
     project_name = run.get("project_name", "Unknown Project")
     equipment_name = run.get("equipment_name", "Unknown Equipment")
     equipment_id = run.get("equipment_id", "unknown")
+    # Label the dataset by its actual equipment so non-etcher runs aren't
+    # misrepresented as etcher runs in FAIR metadata.
+    run_label = "Etcher" if equipment_id == "etcher" else (equipment_name or "Equipment")
 
     # Load domain config for ontology URIs
     domain_config = _load_domain_config(equipment_id)
@@ -796,26 +877,46 @@ def run_to_jsonld(run: dict[str, Any]) -> dict[str, Any]:
             prop["prov:qualifiedGeneration"] = "prov:generated"
         variables_measured.append(prop)
 
-    # Build ontology-enriched target list
+    # Build ontology-enriched result list. The canonical etcher uses typed
+    # columns; every other equipment reports measurements under ``outputs``
+    # (which may legitimately be empty for an inputs-only upload).
+    generic_outputs = run.get("outputs") or {}
     results = []
-    for target_id, target_label, target_key in [
-        ("AvgEtchRate", "Etch Rate", "avg_etch_rate"),
-        ("RangeEtchRate", "Thickness Range", "range_etch_rate"),
-    ]:
-        onto = ontology_map.get(target_id, {})
-        result_prop: dict[str, Any] = {
-            "@type": "PropertyValue",
-            "name": target_id,
-            "value": run.get(target_key),
-        }
-        if onto.get("unit"):
-            result_prop["unitText"] = onto["unit"]
-        if onto.get("qudt_quantity_kind"):
-            result_prop["qudt:hasQuantityKind"] = {"@id": onto["qudt_quantity_kind"]}
-        if onto.get("qudt_unit"):
-            result_prop["qudt:hasUnit"] = {"@id": onto["qudt_unit"]}
-        result_prop["prov:qualifiedGeneration"] = "prov:generated"
-        results.append(result_prop)
+    if equipment_id != "etcher":
+        for key, value in (generic_outputs if isinstance(generic_outputs, dict) else {}).items():
+            onto = ontology_map.get(key, {})
+            result_prop = {
+                "@type": "PropertyValue",
+                "name": key,
+                "value": value,
+            }
+            if onto.get("unit"):
+                result_prop["unitText"] = onto["unit"]
+            if onto.get("qudt_quantity_kind"):
+                result_prop["qudt:hasQuantityKind"] = {"@id": onto["qudt_quantity_kind"]}
+            if onto.get("qudt_unit"):
+                result_prop["qudt:hasUnit"] = {"@id": onto["qudt_unit"]}
+            result_prop["prov:qualifiedGeneration"] = "prov:generated"
+            results.append(result_prop)
+    else:
+        for target_id, target_label, target_key in [
+            ("AvgEtchRate", "Etch Rate", "avg_etch_rate"),
+            ("RangeEtchRate", "Thickness Range", "range_etch_rate"),
+        ]:
+            onto = ontology_map.get(target_id, {})
+            result_prop: dict[str, Any] = {
+                "@type": "PropertyValue",
+                "name": target_id,
+                "value": run.get(target_key),
+            }
+            if onto.get("unit"):
+                result_prop["unitText"] = onto["unit"]
+            if onto.get("qudt_quantity_kind"):
+                result_prop["qudt:hasQuantityKind"] = {"@id": onto["qudt_quantity_kind"]}
+            if onto.get("qudt_unit"):
+                result_prop["qudt:hasUnit"] = {"@id": onto["qudt_unit"]}
+            result_prop["prov:qualifiedGeneration"] = "prov:generated"
+            results.append(result_prop)
 
     # Build equipment entity with SAREF metadata
     equipment_entity: dict[str, Any] = {
@@ -864,11 +965,11 @@ def run_to_jsonld(run: dict[str, Any]) -> dict[str, Any]:
             },
         ],
         "@type": "Dataset",
-        "name": f"Etcher Run {run.get('run_id', 'unknown')}",
+        "name": f"{run_label} Run {run.get('run_id', 'unknown')}",
         "identifier": str(run.get("run_id", "")),
         "dateCreated": run.get("run_date") or run.get("created_at") or "",
         "description": (
-            f"Etcher run {run.get('run_id')} from lot {run.get('lot_name', 'N/A')}. "
+            f"{run_label} run {run.get('run_id')} from lot {run.get('lot_name', 'N/A')}. "
             f"Equipment: {equipment_name}. Project: {project_name}."
         ),
         "license": {"@id": DEFAULT_LICENSE},

diff --git a/api/main.py b/api/main.py
@@ -141,6 +141,7 @@ def _run_startup_migrations() -> None:
     """
     from data_loader_pg import get_pg_superuser_connection
     from data_loader_pg import ensure_etcher_run_file_refs_pg
+    from data_loader_pg import ensure_equipment_runs_pg
     from metadata_pg import (
         ensure_experiment_proposal_generation_columns_pg,
         ensure_experiment_type_reuse_columns_pg,
@@ -165,6 +166,7 @@ def _run_startup_migrations() -> None:
         lock_conn.commit()
 
         ensure_etcher_run_file_refs_pg()
+        ensure_equipment_runs_pg()
         ensure_experiment_proposal_generation_columns_pg()
         ensure_experiment_type_reuse_columns_pg()
         ensure_experiment_type_versioning_columns_pg()

diff --git a/api/metadata_pg.py b/api/metadata_pg.py
@@ -1078,22 +1078,57 @@ def _serialize_equipment(
     }
 
 
-def _fetch_equipment_run_metrics(conn) -> dict[str, dict[str, Any]]:
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        cur.execute(
-            """
-            SELECT
-                p.equipment_id,
-                COUNT(r.idruns)::int AS total_runs,
-                MAX(COALESCE(r.run_date, r.created_at)) AS last_data_at
-            FROM projects p
-            JOIN etcher_runs r ON r.project_id = p.id
-            WHERE p.equipment_id IS NOT NULL
-              AND p.equipment_id <> ''
-            GROUP BY p.equipment_id
-            """
-        )
-        rows = cur.fetchall()
+def _fetch_equipment_run_metrics(_conn=None) -> dict[str, dict[str, Any]]:
+    """Aggregate fleet-wide run counts and latest-data timestamps per equipment.
+
+    Combines the canonical etcher_runs (attributed via the project's equipment)
+    with the generic equipment_runs table (attributed via the run's own
+    equipment_id) so manually uploaded data for any registered equipment is
+    reflected in the inventory metrics.
+
+    Runs on the superuser connection so these non-sensitive aggregate counts are
+    complete (fleet-wide) regardless of the caller's project membership. The
+    RLS-scoped connection passed by callers would otherwise hide runs in
+    private/shared projects, even from members and admins, and undercount.
+    """
+    conn = get_pg_superuser_connection()
+    try:
+        with conn.cursor(cursor_factory=RealDictCursor) as cur:
+            cur.execute(
+                """
+                SELECT
+                    equipment_id,
+                    SUM(total_runs)::int AS total_runs,
+                    MAX(last_data_at) AS last_data_at
+                FROM (
+                    -- etcher_runs is canonical etcher data: attribute every row
+                    -- to the constant 'etcher' id regardless of the project's
+                    -- equipment association (which may be empty or another tool),
+                    -- so canonical runs are never dropped or miscredited.
+                    SELECT
+                        'etcher'::text AS equipment_id,
+                        COUNT(r.idruns) AS total_runs,
+                        MAX(COALESCE(r.run_date, r.created_at)) AS last_data_at
+                    FROM etcher_runs r
+
+                    UNION ALL
+
+                    SELECT
+                        er.equipment_id AS equipment_id,
+                        COUNT(er.id) AS total_runs,
+                        MAX(COALESCE(er.run_date, er.created_at)) AS last_data_at
+                    FROM equipment_runs er
+                    WHERE er.equipment_id IS NOT NULL
+                      AND er.equipment_id <> ''
+                    GROUP BY er.equipment_id
+                ) combined
+                GROUP BY equipment_id
+                """
+            )
+            rows = cur.fetchall()
+        conn.commit()
+    finally:
+        conn.close()
     return {row["equipment_id"]: dict(row) for row in rows}