Source code for coreplexml.experiments

from __future__ import annotations

"""Experiments resource for the CorePlexML SDK."""
from coreplexml._http import HTTPClient



[docs]
class ExperimentsResource:
    """Create and manage AutoML experiments.

    Experiments run H2O AutoML to train multiple models on a dataset,
    automatically selecting the best model based on a chosen metric.
    """

    def __init__(self, http: HTTPClient):
        self._http = http


[docs]
    def list(self, project_id: str | None = None, limit: int = 50, offset: int = 0) -> dict:
        """List experiments, optionally filtered by project.

        Args:
            project_id: Filter by project UUID (optional).
            limit: Maximum results (default 50).
            offset: Pagination offset.

        Returns:
            Dictionary with ``items`` list and ``total`` count.
        """
        params: dict = {"limit": limit, "offset": offset}
        if project_id:
            params["project_id"] = project_id
        return self._http.get("/api/experiments", params=params)



[docs]
    def create(
        self,
        project_id: str,
        dataset_version_id: str,
        target_column: str,
        name: str = "Experiment",
        problem_type: str = "classification",
        config: dict | None = None,
        engine: str | None = None,
        engines: list[str] | None = None,
        execution_mode: str = "single",
        use_gpu: bool = False,
    ) -> dict:
        """Create a new AutoML experiment.

        Args:
            project_id: UUID of the owning project.
            dataset_version_id: UUID of the dataset version to train on.
            target_column: Name of the target (label) column.
            name: Experiment name (default ``Experiment``).
            problem_type: ``classification`` or ``regression`` (default ``classification``).
            config: Optional training configuration overrides.
            engine: Preferred engine for single mode (e.g. ``"h2o"`` or ``"flaml"``).
            engines: Engine list for single/parallel mode. In single mode, first item is used.
            execution_mode: ``"single"`` (default) or ``"parallel"``.
            use_gpu: Request GPU-capable worker when available.

        Returns:
            Created experiment dictionary with ``id`` and ``status``.
        """
        body = {
            "project_id": project_id,
            "dataset_version_id": dataset_version_id,
            "name": name,
            "target_column": target_column,
            "problem_type": problem_type,
            "config": config or {},
            "execution_mode": execution_mode,
            "use_gpu": bool(use_gpu),
        }
        if engine:
            body["engine"] = engine
        if engines:
            body["engines"] = list(engines)
        return self._http.post("/api/experiments", json=body)



[docs]
    def get(self, experiment_id: str) -> dict:
        """Get experiment details.

        Args:
            experiment_id: UUID of the experiment.

        Returns:
            Experiment dictionary.
        """
        return self._http.get(f"/api/experiments/{experiment_id}")



[docs]
    def wait(self, experiment_id: str, interval: float = 5.0, timeout: float = 3600.0) -> dict:
        """Poll experiment until training completes.

        Blocks until the experiment reaches ``succeeded``, ``failed``, or ``error`` status.

        Args:
            experiment_id: UUID of the experiment.
            interval: Seconds between polls (default 5.0).
            timeout: Maximum seconds to wait (default 3600.0).

        Returns:
            Final experiment status dictionary.

        Raises:
            CorePlexMLError: If the experiment times out.
        """
        import time
        start = time.time()
        while time.time() - start < timeout:
            data = self._http.get(f"/api/experiments/{experiment_id}/status")
            status = data.get("status", "")
            if status in ("succeeded", "failed", "error"):
                return data
            time.sleep(interval)
        from coreplexml.exceptions import CorePlexMLError
        raise CorePlexMLError(f"Experiment {experiment_id} timed out after {timeout}s")



[docs]
    def delete(self, experiment_id: str) -> dict:
        """Delete an experiment and its models.

        Args:
            experiment_id: UUID of the experiment.

        Returns:
            Empty dictionary on success.
        """
        return self._http.delete(f"/api/experiments/{experiment_id}")



[docs]
    def explain(self, experiment_id: str) -> dict:
        """Get model explainability data for an experiment.

        Args:
            experiment_id: UUID of the experiment.

        Returns:
            Explainability data (feature importance, SHAP values, etc.).
        """
        return self._http.get(f"/api/experiments/{experiment_id}/explain")



[docs]
    def logs(self, experiment_id: str) -> dict:
        """Get training logs for an experiment.

        Args:
            experiment_id: UUID of the experiment.

        Returns:
            Dictionary with ``logs`` list.
        """
        return self._http.get(f"/api/experiments/{experiment_id}/logs")



[docs]
    def capabilities(self) -> dict:
        """Return effective AutoML capabilities for the authenticated user."""
        return self._http.get("/api/experiments/capabilities")



[docs]
    def engine_runs(self, experiment_id: str) -> dict:
        """Return per-engine run status rows for an experiment."""
        return self._http.get(f"/api/experiments/{experiment_id}/engine-runs")