Source code for coreplexml.datasets

from __future__ import annotations

"""Datasets resource for the CorePlexML SDK."""
from coreplexml._http import HTTPClient


[docs] class DatasetsResource: """Manage datasets and dataset versions. Datasets are the foundation for training experiments. Upload CSV files and CorePlexML will version, profile, and analyze them automatically. """ def __init__(self, http: HTTPClient): self._http = http @staticmethod def _normalize_dataset_upload(payload: dict) -> dict: """Normalize upload response keys for docs/examples compatibility.""" if not isinstance(payload, dict): return {} out = dict(payload) if "id" not in out and out.get("dataset_id"): out["id"] = out["dataset_id"] if "version_id" not in out and out.get("dataset_version_id"): out["version_id"] = out["dataset_version_id"] return out
[docs] def list(self, project_id: str | None = None, limit: int = 50, offset: int = 0) -> dict: """List datasets, optionally filtered by project. Args: project_id: Filter by project UUID (optional). limit: Maximum results (default 50). offset: Pagination offset. Returns: Dictionary with ``items`` list and ``total`` count. """ params: dict = {"limit": limit, "offset": offset} if project_id: params["project_id"] = project_id return self._http.get("/api/datasets", params=params)
[docs] def upload(self, project_id: str, file_path: str, name: str, description: str = "") -> dict: """Upload a CSV file as a new dataset. Args: project_id: UUID of the owning project. file_path: Local path to the CSV file. name: Display name for the dataset. description: Optional description. Returns: Created dataset dictionary with ``id``, ``name``, etc. """ data = self._http.upload( "/api/datasets/upload", file_path, fields={"project_id": project_id, "name": name, "description": description}, ) return self._normalize_dataset_upload(data)
[docs] def get(self, dataset_id: str) -> dict: """Get dataset details by ID. Args: dataset_id: UUID of the dataset. Returns: Dataset dictionary. """ return self._http.get(f"/api/datasets/{dataset_id}")
[docs] def versions(self, dataset_id: str) -> dict: """List all versions of a dataset. Args: dataset_id: UUID of the dataset. Returns: Dictionary with paginated ``items`` list plus ``total``, ``limit``, and ``offset``. """ return self._http.get(f"/api/datasets/{dataset_id}/versions")
[docs] def quality(self, dataset_id: str) -> dict: """Get data quality report for a dataset. Args: dataset_id: UUID of the dataset. Returns: Quality metrics dictionary. """ return self._http.get(f"/api/datasets/{dataset_id}/quality")
[docs] def columns(self, dataset_id: str) -> dict: """Get column metadata for a dataset. Args: dataset_id: UUID of the dataset. Returns: Dictionary with ``columns`` list. """ return self._http.get(f"/api/datasets/{dataset_id}/columns")
[docs] def analyze(self, dataset_id: str) -> dict: """Run statistical analysis on a dataset. Args: dataset_id: UUID of the dataset. Returns: Analysis results dictionary. """ return self._http.get(f"/api/datasets/{dataset_id}/analyze")
[docs] def delete(self, dataset_id: str) -> dict: """Delete a dataset. Args: dataset_id: UUID of the dataset. Returns: Empty dictionary on success. """ return self._http.delete(f"/api/datasets/{dataset_id}")
[docs] def download(self, dataset_id: str, output_path: str, format: str = "csv") -> str: """Download dataset to a local file. Args: dataset_id: UUID of the dataset. output_path: Local path to save the file. format: Output format -- ``csv`` or ``parquet`` (default ``csv``). Returns: The output_path on success. """ return self._http.download( f"/api/datasets/{dataset_id}/download", output_path, params={"format": format}, )