Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions sagemaker-mlops/src/sagemaker/mlops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,21 @@
Key components:
- workflow: Pipeline and step orchestration
- model_builder: Model building and orchestration
- feature_store: Feature Store management (FeatureGroup, FeatureStore, ingestion, etc.)

Example usage:
from sagemaker.mlops import ModelBuilder
from sagemaker.mlops.workflow import Pipeline, TrainingStep

# Feature Store
from sagemaker.mlops.feature_store import (
FeatureGroup,
FeatureStore,
FeatureMetadata,
create_dataset,
ingest_dataframe,
create_athena_query,
)
"""
from __future__ import absolute_import

Expand All @@ -27,7 +38,12 @@
# from sagemaker.mlops import workflow
# from sagemaker.mlops.workflow import Pipeline, TrainingStep, etc.

# Feature Store submodule is available via:
# from sagemaker.mlops import feature_store
# from sagemaker.mlops.feature_store import FeatureGroup, FeatureStore, create_dataset, etc.

__all__ = [
"ModelBuilder",
"workflow", # Submodule
"feature_store", # Submodule
]
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ These V2 features are **not wrapped** because core provides them directly:
from sagemaker.mlops.feature_store import (
# Resources (from core)
FeatureGroup,
FeatureStore,
FeatureStore, # For search() and batch operations
FeatureMetadata,

# Shapes (from core)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""SageMaker FeatureStore V3 - powered by sagemaker-core."""

# Resources from core
from sagemaker.core.resources import FeatureGroup, FeatureMetadata
from sagemaker.core.resources import FeatureGroup, FeatureMetadata, FeatureStore

# Shapes from core (Pydantic - no to_dict() needed)
from sagemaker.core.shapes import (
Expand Down Expand Up @@ -51,6 +51,7 @@
from sagemaker.mlops.feature_store.feature_utils import (
as_hive_ddl,
create_athena_query,
create_dataset,
get_session_from_role,
ingest_dataframe,
load_feature_definitions_from_dataframe,
Expand All @@ -74,6 +75,7 @@
# Resources
"FeatureGroup",
"FeatureMetadata",
"FeatureStore",
# Shapes
"DataCatalogConfig",
"FeatureParameter",
Expand Down Expand Up @@ -110,6 +112,7 @@
# Utility functions
"as_hive_ddl",
"create_athena_query",
"create_dataset",
"get_session_from_role",
"ingest_dataframe",
"load_feature_definitions_from_dataframe",
Expand Down
58 changes: 58 additions & 0 deletions sagemaker-mlops/src/sagemaker/mlops/feature_store/feature_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,64 @@ def as_hive_ddl(
return ddl


@_telemetry_emitter(Feature.FEATURE_STORE, "create_dataset")
def create_dataset(
base,
output_path: str,
session: Session,
record_identifier_feature_name: str = None,
event_time_identifier_feature_name: str = None,
included_feature_names=None,
kms_key_id: str = None,
):
"""Create a DatasetBuilder for generating a Dataset from FeatureGroups.

This is a convenience function that constructs a DatasetBuilder instance.
The base can be either a FeatureGroup or a pandas DataFrame.

Args:
base (Union[FeatureGroup, DataFrame]): A FeatureGroup or DataFrame to use as the base.
output_path (str): S3 URI for storing query results.
session (Session): SageMaker session for boto calls.
record_identifier_feature_name (str): Required if base is a DataFrame.
The feature name used as the record identifier (default: None).
event_time_identifier_feature_name (str): Required if base is a DataFrame.
The feature name used as the event time identifier (default: None).
included_feature_names (List[str]): Features to include in the output.
If not set, all features will be included (default: None).
kms_key_id (str): KMS key for encryption (default: None).

Returns:
DatasetBuilder: A DatasetBuilder instance configured with the provided parameters.

Raises:
ValueError: If base is a DataFrame and record_identifier_feature_name or
event_time_identifier_feature_name is not provided.

Example:
>>> from sagemaker.mlops.feature_store import create_dataset, FeatureGroup
>>> fg = FeatureGroup.get(feature_group_name="my-fg")
>>> builder = create_dataset(
... base=fg,
... output_path="s3://bucket/output",
... session=session,
... )
>>> builder.with_feature_group(other_fg, target_feature_name_in_base="id")
>>> df, query = builder.to_dataframe()
"""
from sagemaker.mlops.feature_store.dataset_builder import DatasetBuilder

return DatasetBuilder.create(
base=base,
output_path=output_path,
session=session,
record_identifier_feature_name=record_identifier_feature_name,
event_time_identifier_feature_name=event_time_identifier_feature_name,
included_feature_names=included_feature_names,
kms_key_id=kms_key_id,
)


@_telemetry_emitter(Feature.FEATURE_STORE, "ingest_dataframe")
def ingest_dataframe(
feature_group_name: str,
Expand Down
Loading
Loading