;
name: string;
createdAt: Date;
}
);
```
### Create the materialized view to migrate data
Create a materialized view that:
- SELECTs from the old table (`events_v0`)
- copies fields 1:1 to the new table
- writes into the versioned target table (`events_v1`)
Pass the versioned `OlapTable` instance as `targetTable`. If you only pass a `tableName`, Moose will create an unversioned target.
```ts filename="app/views/migrate_events_to_v01.ts" copy
);
```
What happens when you export this view:
- Moose creates the versioned table if needed
- Moose creates the MATERIALIZED VIEW and immediately runs a one-time backfill (`INSERT INTO ... SELECT ...`)
- ClickHouse keeps the view active: any new inserts into `events` automatically flow into `events_0_1`
## Cutover and cleanup
- Update readers to query the new table export (`eventsV1`).
- Update writers/streams to produce to the new table if applicable.
- After verifying parity and retention windows, drop the old table and the migration view.
## Notes and tips
- Use semantic versions like `0.1`, `1.0`, `1.1`. Moose will render `events_1_1` as the physical name.
- Keep the migration view simple and deterministic. If you need complex transforms, prefer explicit SQL in the `selectStatement`.
- Very large backfills can take time. Consider deploying during low-traffic windows.
---
## Supported Column Types
Source: moose/olap/supported-types.mdx
Complete guide to defining columns for ClickHouse tables in Moose
# Supported Column Types
Moose supports a comprehensive set of ClickHouse column types across both TypeScript and Python libraries. This guide covers all supported types, their syntax, and best practices for defining table schemas.
## Basic Types
### String Types
```typescript
interface User {
string: string; // String
lowCardinality: string & LowCardinality; // LowCardinality(String)
uuid: string & tags.Format<"uuid">; // UUID (with typia tags)
}
```
| ClickHouse Type | TypeScript | Description |
|------|------------|--------|
| `String` | `string` | Variable-length string |
| `LowCardinality(String)` | `string & LowCardinality` | Optimized for repeated values |
| `UUID` | `string & tags.Format<"uuid">` | UUID format strings |
### Numeric Types
### Integer Types
```typescript
interface Metrics {
user_id: Int32; // Int32
count: UInt64; // UInt64
small_value: Int8; // Int8
}
// Alternative: You can still use the verbose syntax if preferred
interface MetricsVerbose {
user_id: number & ClickHouseInt<"int32">;
count: number & ClickHouseInt<"uint64">;
small_value: number & ClickHouseInt<"int8">;
}
```
| ClickHouse Type | TypeScript (New Helper) | TypeScript (Verbose) | Description |
|------|------------|------------|--------|
| `Int8` | `Int8` | `number & ClickHouseInt<"int8">` | -128 to 127 |
| `Int16` | `Int16` | `number & ClickHouseInt<"int16">` | -32,768 to 32,767 |
| `Int32` | `Int32` | `number & ClickHouseInt<"int32">` | -2,147,483,648 to 2,147,483,647 |
| `Int64` | `Int64` | `number & ClickHouseInt<"int64">` | -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 |
| `UInt8` | `UInt8` | `number & ClickHouseInt<"uint8">` | 0 to 255 |
| `UInt16` | `UInt16` | `number & ClickHouseInt<"uint16">` | 0 to 65,535 |
| `UInt32` | `UInt32` | `number & ClickHouseInt<"uint32">` | 0 to 4,294,967,295 |
| `UInt64` | `UInt64` | `number & ClickHouseInt<"uint64">` | 0 to 18,446,744,073,709,551,615 |
### Floating Point Types
```typescript
interface SensorData {
temperature: Float32; // Float32
humidity: Float64; // Float64
pressure: number; // Default Float64
}
// Alternative: You can still use the verbose syntax if preferred
interface SensorDataVerbose {
temperature: number & tags.Type<"float">; // Float32
humidity: number; // Float64
}
```
| ClickHouse Type | TypeScript (New Helper) | TypeScript (Verbose) | Description |
|-----------------|------------|------------|---------------------|
| `Float32` | `Float32` | `number & tags.Type<"float">` | 32-bit floating point |
| `Float64` | `Float64` or `number` | `number` | 64-bit floating point (default) |
### Decimal Types
```typescript
interface FinancialData {
amount: Decimal<10, 2>; // Decimal(10,2)
rate: Decimal<5, 4>; // Decimal(5,4)
fee: Decimal<8, 3>; // Decimal(8,3)
}
// Alternative: You can still use the verbose syntax if preferred
interface FinancialDataVerbose {
amount: string & ClickHouseDecimal<10, 2>; // Decimal(10,2)
rate: string & ClickHouseDecimal<5, 4>; // Decimal(5,4)
}
```
| ClickHouse Type | TypeScript (New Helper) | TypeScript (Verbose) | Description |
|------|------------|------------|---------------------|
| `Decimal(P,S)` | `Decimal` | `string & ClickHouseDecimal
` | Fixed-point decimal with P total digits, S decimal places |
### Boolean Type
```typescript
interface User {
is_active: boolean;
verified: boolean;
}
```
| ClickHouse Type | TypeScript | Description |
|------|------------|--------|
| `Boolean` | `boolean` | `boolean` |
### Date and Time Types
```typescript
interface Event {
created_at: DateTime; // DateTime
updated_at: DateTime64<3>; // DateTime(3)
logged_at: DateTime64<6>; // DateTime(6) - microsecond precision
birth_date: Date; // Date
}
// Alternative: You can still use the verbose syntax if preferred
interface EventVerbose {
created_at: Date; // DateTime
updated_at: Date & ClickHousePrecision<3>; // DateTime(3)
birth_date: Date; // Date
}
```
| ClickHouse Type | TypeScript (New Helper) | TypeScript (Verbose) | Description |
|-----------------|------------|------------|-------------|
| `Date` | `Date` | `Date` | Date only |
| `Date16` | `Date` | `Date` | Compact date format |
| `DateTime` | `DateTime` | `Date` | Date and time |
| `DateTime(P)` | `DateTime64` | `Date & ClickHousePrecision` | DateTime with precision (P=0-9) |
### Network Types
```typescript
interface NetworkEvent {
source_ip: string & tags.Format<"ipv4">;
dest_ip: string & tags.Format<"ipv6">;
}
```
| ClickHouse Type | TypeScript | Description |
|------|------------|--------|
| `IPv4` | `string & tags.Format<"ipv4">` | IPv4 addresses |
| `IPv6` | `string & tags.Format<"ipv6">` | IPv6 addresses |
## Complex Types
### Geometry Types
Moose supports ClickHouse geometry types. Use the helpers in each language to get type-safe models and correct ClickHouse mappings.
```typescript
ClickHousePoint,
ClickHouseRing,
ClickHouseLineString,
ClickHouseMultiLineString,
ClickHousePolygon,
ClickHouseMultiPolygon,
} from "@514labs/moose-lib";
interface GeoTypes {
point: ClickHousePoint; // Point → [number, number]
ring: ClickHouseRing; // Ring → Array<[number, number]>
lineString: ClickHouseLineString; // LineString → Array<[number, number]>
multiLineString: ClickHouseMultiLineString; // MultiLineString → Array>
polygon: ClickHousePolygon; // Polygon → Array>
multiPolygon: ClickHouseMultiPolygon; // MultiPolygon → Array>>
}
```
| ClickHouse Type | TypeScript |
|------|------------|
| `Point` | `ClickHousePoint` |
| `Ring` | `ClickHouseRing` |
| `LineString` | `ClickHouseLineString` |
| `MultiLineString` | `ClickHouseMultiLineString` |
| `Polygon` | `ClickHousePolygon` |
| `MultiPolygon` | `ClickHouseMultiPolygon` |
Geometry coordinates are represented as numeric pairs `[x, y]` (TypeScript) or `tuple[float, float]` (Python).
### Array Types
Arrays are supported for all basic types and some complex types.
```typescript
interface User {
tags: string[]; // Array(String)
scores: number[]; // Array(Float64)
metadata: Record[]; // Array(Json)
tuple: {
name: string;
age: number;
} & ClickHouseNamedTuple[]; // Array(Tuple(String, Int32))
}
```
### Map Types
Maps store key-value pairs with specified key and value types.
```typescript
interface User {
preferences: Record; // Map(String, String)
metrics: Record; // Map(String, Float64)
}
```
### Nested Types
Nested types allow embedding complex objects within tables.
```typescript
interface Address {
street: string;
city: string;
zip: string;
}
interface User {
name: string;
address: Address; // Nested type
}
```
### Named Tuple Types
Named tuples provide structured data with named fields.
```typescript
interface Point {
x: number;
y: number;
}
interface Shape {
center: Point & ClickHouseNamedTuple; // Named tuple
radius: number;
}
```
### Enum Types
Enums map to ClickHouse enums with string or integer values.
```typescript
enum UserRole {
ADMIN = "admin",
USER = "user",
GUEST = "guest"
}
interface User {
role: UserRole; // Enum with string values
}
```
## Special Types
### JSON Type
The `Json` type stores arbitrary JSON data with optional schema configuration for performance and type safety.
#### Basic JSON (Unstructured)
For completely dynamic JSON data without any schema:
```typescript
interface Event {
metadata: Record; // Basic JSON - accepts any structure
config: any; // Basic JSON - fully dynamic
}
```
#### Rich JSON with Type Configuration
For better performance and validation, you can define typed fields within your JSON using `ClickHouseJson`. This creates a ClickHouse `JSON` column with explicit type hints for specific paths.
```typescript
// Define the structure for your JSON payload
interface PayloadStructure {
name: string;
count: number;
timestamp?: Date;
}
interface Event {
id: string;
// JSON with typed paths - better performance, allows extra fields
payload: PayloadStructure & ClickHouseJson;
// JSON with performance tuning options
metadata: PayloadStructure & ClickHouseJson<
256, // max_dynamic_paths: limit tracked paths (default: no limit)
16, // max_dynamic_types: limit type variations (default: no limit)
["skip.me"], // skip_paths: exclude specific paths
["^tmp\\."] // skip_regexps: exclude paths matching regex
>;
}
```
#### Configuration Options
| Option | Type | Description |
|--------|------|-------------|
| `max_dynamic_paths` | `number` | Maximum number of unique JSON paths to track. Helps control memory usage for highly variable JSON structures. |
| `max_dynamic_types` | `number` | Maximum number of type variations allowed per path. Useful when paths may contain different types. |
| `skip_paths` | `string[]` | Array of exact JSON paths to ignore during ingestion (e.g., `["temp", "debug.info"]`). |
| `skip_regexps` | `string[]` | Array of regex patterns for paths to exclude (e.g., `["^tmp\\.", ".*_internal$"]`). |
#### Benefits of Typed JSON
1. **Better Performance**: ClickHouse can optimize storage and queries for known paths
2. **Type Safety**: Validates that specified paths match expected types
3. **Flexible Schema**: Allows additional fields beyond typed paths
4. **Memory Control**: Configure limits to prevent unbounded resource usage
- **Basic JSON** (`any`, `Dict[str, Any]`): Use when JSON structure is completely unknown or rarely queried
- **Rich JSON** (`ClickHouseJson`): Use when you have known fields that need indexing/querying, but want to allow additional dynamic fields
#### Example: Product Event Tracking
```typescript
interface ProductProperties {
category: string;
price: number;
inStock: boolean;
}
interface ProductEvent {
eventId: Key;
timestamp: DateTime;
// Typed paths for common fields, but allows custom properties
properties: ProductProperties & ClickHouseJson<
128, // Track up to 128 unique paths
8, // Allow up to 8 type variations per path
["_internal"], // Ignore internal fields
["^debug_"] // Ignore debug fields
>;
}
```
With this schema, you can send events like:
```json
{
"eventId": "evt_123",
"timestamp": "2025-10-22T12:00:00Z",
"properties": {
"category": "electronics", // Typed field ✓
"price": 99.99, // Typed field ✓
"inStock": true, // Typed field ✓
"customTag": "holiday-sale", // Extra field - accepted ✓
"brandId": 42, // Extra field - accepted ✓
"_internal": "ignored" // Skipped by skip_paths ✓
}
}
```
### Nullable Types
All types support nullable variants using optional types.
```typescript
interface User {
name: string; // Required
email?: string; // Nullable
age?: number; // Nullable
}
```
If a field is optional in your app model but you provide a ClickHouse default, Moose infers a non-nullable ClickHouse column with a DEFAULT clause.
- Optional without default (e.g., `field?: number`) → ClickHouse Nullable type.
- Optional with default (e.g., `field?: number & ClickHouseDefault<"18">` or `WithDefault`) → non-nullable column with default `18`.
This lets you keep optional fields at the application layer while avoiding Nullable columns in ClickHouse when a server-side default exists.
### SimpleAggregateFunction
`SimpleAggregateFunction` is designed for use with `AggregatingMergeTree` tables. It stores pre-aggregated values that are automatically merged when ClickHouse combines rows with the same primary key.
```typescript
interface DailyStats {
date: DateTime;
userId: string;
totalViews: number & SimpleAggregated<"sum", number>;
maxScore: number & SimpleAggregated<"max", number>;
lastSeen: DateTime & SimpleAggregated<"anyLast", DateTime>;
}
const statsTable = new OlapTable("daily_stats", {
engine: ClickHouseEngines.AggregatingMergeTree,
orderByFields: ["date", "userId"],
});
```
See [ClickHouse docs](https://clickhouse.com/docs/en/sql-reference/data-types/simpleaggregatefunction) for the complete list of functions.
## Table Engines
Moose supports all common ClickHouse table engines:
| Engine | Python | Description |
|--------|------------|-------------|
| `MergeTree` | `ClickHouseEngines.MergeTree` | Default engine |
| `ReplacingMergeTree` | `ClickHouseEngines.ReplacingMergeTree` | Deduplication |
| `SummingMergeTree` | `ClickHouseEngines.SummingMergeTree` | Aggregates numeric columns |
| `AggregatingMergeTree` | `ClickHouseEngines.AggregatingMergeTree` | Advanced aggregation |
| `ReplicatedMergeTree` | `ClickHouseEngines.ReplicatedMergeTree` | Replicated version of MergeTree |
| `ReplicatedReplacingMergeTree` | `ClickHouseEngines.ReplicatedReplacingMergeTree` | Replicated with deduplication |
| `ReplicatedSummingMergeTree` | `ClickHouseEngines.ReplicatedSummingMergeTree` | Replicated with aggregation |
| `ReplicatedAggregatingMergeTree` | `ClickHouseEngines.ReplicatedAggregatingMergeTree` | Replicated with advanced aggregation |
```typescript
const userTable = new OlapTable("users", {
engine: ClickHouseEngines.ReplacingMergeTree,
orderByFields: ["id", "updated_at"]
});
```
## Best Practices
### Type Selection
- **Use specific integer types** when you know the value ranges to save storage
- **Prefer `Float64`** for most floating-point calculations unless storage is critical
- **Use `LowCardinality`** for string columns with repeated values
- **Choose appropriate DateTime precision** based on your accuracy needs
### Performance Considerations
- **Order columns by cardinality** (low to high) for better compression
- **Use `ReplacingMergeTree`** for tables with frequent updates
- **Specify `orderByFields` or `orderByExpression`** for optimal query performance
- **Consider `LowCardinality`** for string columns with < 10,000 unique values
---
## moose/olap/ttl
Source: moose/olap/ttl.mdx
## TTL (Time-to-Live) for ClickHouse Tables
Moose lets you declare ClickHouse TTL directly in your data model:
- Table-level TTL via the `ttl` option on `OlapTable` config
- Column-level TTL via `ClickHouseTTL` on individual fields
### When to use TTL
- Automatically expire old rows to control storage cost
- Mask or drop sensitive columns earlier than the full row expiry
### TypeScript
```ts
interface Event {
id: Key;
timestamp: DateTime;
email: string & ClickHouseTTL<"timestamp + INTERVAL 30 DAY">; // column TTL
}
);
```
### Python
```python
from typing import Annotated
from moose_lib import OlapTable, OlapConfig, Key, ClickHouseTTL
from pydantic import BaseModel
from datetime import datetime
class Event(BaseModel):
id: Key[str]
timestamp: datetime
email: Annotated[str, ClickHouseTTL("timestamp + INTERVAL 30 DAY")]
events = OlapTable[Event](
"Events",
OlapConfig(
order_by_fields=["id", "timestamp"],
ttl="timestamp + INTERVAL 90 DAY DELETE",
),
)
```
### Notes
- Expressions must be valid ClickHouse TTL expressions, but do not include the leading `TTL` keyword.
- Column TTLs are independent from the table TTL and can be used together.
- Moose will apply TTL changes via migrations using `ALTER TABLE ... MODIFY TTL` and `MODIFY COLUMN ... TTL`.
### Related
- See `Modeling Tables` for defining your schema
- See `Applying Migrations` to roll out TTL changes
---
## Python Moose Lib Reference
Source: moose/reference/py-moose-lib.mdx
Python Moose Lib Reference
# API Reference
This is a comprehensive reference for the Python `moose_lib`, detailing all exported components, types, and utilities.
## Core Types
### `Key[T]`
A type annotation for marking fields as primary keys in data models. Used with Pydantic.
```python
from moose_lib import Key
from pydantic import BaseModel
class MyModel(BaseModel):
id: Key[str] # Marks 'id' as a primary key of type string
```
### `BaseModel`
Pydantic base model used for data modeling in Moose.
```python
from pydantic import BaseModel
class MyDataModel(BaseModel):
id: str
name: str
count: int
```
### `MooseClient`
Client for interacting with ClickHouse and Temporal.
```python
class MooseClient:
query: QueryClient # For database queries
workflow: Optional[WorkflowClient] # For workflow operations
```
### `ApiResult`
Class representing the result of a analytics API call.
```python
@dataclass
class ApiResult:
status: int # HTTP status code
body: Any # Response body
```
## Configuration Types
### `OlapConfig`
Configuration for OLAP tables.
```python
from typing import Union, Optional
from moose_lib.blocks import EngineConfig
class OlapConfig(BaseModel):
database: Optional[str] = None # Optional database name (defaults to moose.config.toml clickhouse_config.db_name)
order_by_fields: list[str] = [] # Fields to order by
engine: Optional[EngineConfig] = None # Table engine configuration
```
### `EngineConfig` Classes
Base class and implementations for table engine configurations.
```python
# Base class
class EngineConfig:
pass
# Available engine implementations
class MergeTreeEngine(EngineConfig):
pass
class ReplacingMergeTreeEngine(EngineConfig):
ver: Optional[str] = None # Version column for keeping latest
is_deleted: Optional[str] = None # Soft delete marker (requires ver)
class AggregatingMergeTreeEngine(EngineConfig):
pass
class SummingMergeTreeEngine(EngineConfig):
columns: Optional[List[str]] = None # Columns to sum
# Replicated engines
class ReplicatedMergeTreeEngine(EngineConfig):
keeper_path: Optional[str] = None # ZooKeeper/Keeper path (optional for Cloud)
replica_name: Optional[str] = None # Replica name (optional for Cloud)
class ReplicatedReplacingMergeTreeEngine(EngineConfig):
keeper_path: Optional[str] = None # ZooKeeper/Keeper path (optional for Cloud)
replica_name: Optional[str] = None # Replica name (optional for Cloud)
ver: Optional[str] = None # Version column for keeping latest
is_deleted: Optional[str] = None # Soft delete marker (requires ver)
class ReplicatedAggregatingMergeTreeEngine(EngineConfig):
keeper_path: Optional[str] = None # ZooKeeper/Keeper path (optional for Cloud)
replica_name: Optional[str] = None # Replica name (optional for Cloud)
class ReplicatedSummingMergeTreeEngine(EngineConfig):
keeper_path: Optional[str] = None # ZooKeeper/Keeper path (optional for Cloud)
replica_name: Optional[str] = None # Replica name (optional for Cloud)
columns: Optional[List[str]] = None # Columns to sum
```
### `StreamConfig`
Configuration for data streams.
```python
class StreamConfig(BaseModel):
parallelism: int = 1
retention_period: int = 60 * 60 * 24 * 7 # 7 days
destination: Optional[OlapTable[Any]] = None
```
### `IngestConfig`
Configuration for data ingestion.
```python
class IngestConfig(BaseModel):
destination: Optional[OlapTable[Any]] = None
```
### `IngestPipelineConfig`
Configuration for creating a complete data pipeline.
```python
class IngestPipelineConfig(BaseModel):
table: bool | OlapConfig = True
stream: bool | StreamConfig = True
ingest_api: bool | IngestConfig = True
```
## Infrastructure Components
### `OlapTable[T]`
Creates a ClickHouse table with the schema of type T.
```python
from moose_lib import OlapTable, OlapConfig
from moose_lib.blocks import ReplacingMergeTreeEngine
# Basic usage
my_table = OlapTable[UserProfile]("user_profiles")
# With configuration (fields)
my_table = OlapTable[UserProfile]("user_profiles", OlapConfig(
order_by_fields=["id", "timestamp"],
engine=ReplacingMergeTreeEngine(),
))
# With configuration (expression)
my_table_expr = OlapTable[UserProfile]("user_profiles_expr", OlapConfig(
order_by_expression="(id, timestamp)",
engine=ReplacingMergeTreeEngine(),
))
# With custom database override
analytics_table = OlapTable[UserProfile]("user_profiles", OlapConfig(
database="analytics", # Override default database
order_by_fields=["id", "timestamp"]
))
# Disable sorting entirely
my_table_unsorted = OlapTable[UserProfile]("user_profiles_unsorted", OlapConfig(
order_by_expression="tuple()",
))
```
### `Stream[T]`
Creates a Redpanda topic with the schema of type T.
```python
# Basic usage
my_stream = Stream[UserEvent]("user_events")
# With configuration
my_stream = Stream[UserEvent]("user_events", StreamConfig(
parallelism=3,
retention_period=86400 # 1 day in seconds
))
# Adding transformations
def transform_user_event(event: UserEvent) -> ProfileUpdate:
return ProfileUpdate(user_id=event.user_id, update_type="event")
my_stream.add_transform(profile_stream, transform_user_event)
```
### `IngestApi[T]`
Creates an HTTP endpoint for ingesting data of type T.
```python
# Basic usage with destination stream
my_ingest_api = IngestApi[UserEvent]("user_events", IngestConfigWithDestination(
destination=my_user_event_stream
))
```
### `Api[T, U]`
Creates an HTTP endpoint for querying data with request type T and response type U.
```python
# Basic usage
def get_user_profiles(params: UserQuery) -> list[UserProfile]:
# Query implementation
return [UserProfile(...), UserProfile(...)]
my_api = Api[UserQuery, list[UserProfile]](
"get_user_profiles",
get_user_profiles
)
```
### `IngestPipeline[T]`
Combines ingest API, stream, and table creation in a single component.
```python
from moose_lib import IngestPipeline, IngestPipelineConfig, StreamConfig, OlapConfig
from moose_lib.blocks import ReplacingMergeTreeEngine
# Basic usage
pipeline = IngestPipeline[UserEvent]("user_pipeline", IngestPipelineConfig(
ingest_api=True,
stream=True,
table=True
))
# With advanced configuration
pipeline = IngestPipeline[UserEvent]("user_pipeline", IngestPipelineConfig(
ingest_api=True,
stream=StreamConfig(parallelism=3),
table=OlapConfig(
order_by_fields=["id", "timestamp"],
engine=ReplacingMergeTreeEngine(),
)
))
```
### `MaterializedView[T]`
Creates a materialized view in ClickHouse.
```python
# Basic usage
view = MaterializedView[UserStatistics](MaterializedViewOptions(
select_statement="SELECT user_id, COUNT(*) as event_count FROM user_events GROUP BY user_id",
table_name="user_events",
materialized_view_name="user_statistics",
order_by_fields=["user_id"]
))
```
## ClickHouse Utilities
### Engine Configuration Classes
Type-safe configuration classes for table engines:
```python
from moose_lib.blocks import (
MergeTreeEngine,
ReplacingMergeTreeEngine,
AggregatingMergeTreeEngine,
SummingMergeTreeEngine,
ReplicatedMergeTreeEngine,
ReplicatedReplacingMergeTreeEngine,
ReplicatedAggregatingMergeTreeEngine,
ReplicatedSummingMergeTreeEngine,
S3QueueEngine
)
# ReplacingMergeTree with version control and soft deletes
dedup_engine = ReplacingMergeTreeEngine(
ver="updated_at", # Optional: version column for keeping latest
is_deleted="deleted" # Optional: soft delete marker (requires ver)
)
# ReplicatedMergeTree with explicit keeper paths (self-managed ClickHouse)
replicated_engine = ReplicatedMergeTreeEngine(
keeper_path="/clickhouse/tables/{database}/{shard}/my_table",
replica_name="{replica}"
)
# ReplicatedReplacingMergeTree with deduplication
replicated_dedup_engine = ReplicatedReplacingMergeTreeEngine(
keeper_path="/clickhouse/tables/{database}/{shard}/my_dedup_table",
replica_name="{replica}",
ver="updated_at",
is_deleted="deleted"
)
# For ClickHouse Cloud or Boreal - omit keeper parameters
cloud_replicated = ReplicatedMergeTreeEngine() # No parameters needed
# S3Queue configuration for streaming from S3
s3_engine = S3QueueEngine(
s3_path="s3://bucket/data/*.json",
format="JSONEachRow",
aws_access_key_id="AKIA...", # Optional
aws_secret_access_key="secret...", # Optional
compression="gzip", # Optional
headers={"X-Custom": "value"} # Optional
)
# Use with OlapTable
s3_table = OlapTable[MyData]("s3_events", OlapConfig(
engine=s3_engine,
settings={
"mode": "unordered",
"keeper_path": "/clickhouse/s3queue/events",
"loading_retries": "3"
}
))
# S3 engine for direct S3 access (not streaming)
s3_direct_engine = S3Engine(
path="s3://bucket/data/file.json",
format="JSONEachRow",
aws_access_key_id="AKIA...", # Optional
aws_secret_access_key="secret...", # Optional
compression="gzip" # Optional
)
s3_direct_table = OlapTable[MyData]("s3_data", OlapConfig(
engine=s3_direct_engine
))
# Buffer engine for high-throughput buffered writes
buffer_engine = BufferEngine(
target_database="local",
target_table="destination_table",
num_layers=16,
min_time=10,
max_time=100,
min_rows=10000,
max_rows=1000000,
min_bytes=10485760,
max_bytes=104857600
)
buffer_table = OlapTable[MyData]("buffer", OlapConfig(
engine=buffer_engine
))
# Distributed engine for cluster-wide distributed tables
distributed_engine = DistributedEngine(
cluster="my_cluster",
target_database="default",
target_table="local_table",
sharding_key="cityHash64(id)" # Optional
)
distributed_table = OlapTable[MyData]("distributed", OlapConfig(
engine=distributed_engine
))
```
## Task Management
### `Task[T, U]`
A class that represents a single task within a workflow system, with typed input and output.
```python
from moose_lib import Task, TaskConfig, TaskContext
from pydantic import BaseModel
# Define input and output models
class InputData(BaseModel):
user_id: str
class OutputData(BaseModel):
result: str
status: bool
# Task with input and output
def process_user(ctx: TaskContext[InputData]) -> OutputData:
# Process the user data
return OutputData(result=f"Processed {ctx.input.user_id}", status=True)
user_task = Task[InputData, OutputData](
name="process_user",
config=TaskConfig(
run=process_user,
retries=3,
timeout="30s"
)
)
# Task with no input, but with output
def fetch_data(ctx: TaskContext[None]) -> OutputData:
return OutputData(result="Fetched data", status=True)
fetch_task = Task[None, OutputData](
name="fetch_data",
config=TaskConfig(run=fetch_data)
)
# Task with input but no output
def log_event(ctx: TaskContext[InputData]) -> None:
print(f"Event logged for: {ctx.input.user_id}")
log_task = Task[InputData, None](
name="log_event",
config=TaskConfig(run=log_event)
)
# Task with neither input nor output
def cleanup(ctx: TaskContext[None]) -> None:
print("Cleanup complete")
cleanup_task = Task[None, None](
name="cleanup",
config=TaskConfig(run=cleanup)
)
```
### `TaskConfig[T, U]`
Configuration for a Task.
```python
@dataclasses.dataclass
class TaskConfig(Generic[T, U]):
# The handler function that executes the task logic
# Can be any of: () -> None, () -> U, (T) -> None, or (T) -> U depending on input/output types
run: TaskRunFunc[T, U]
# Optional list of tasks to run after this task completes
on_complete: Optional[list[Task[U, Any]]] = None
# Optional function that is called when the task is cancelled
on_cancel: Optional[Callable[[TaskContext[T_none]], Union[None, Awaitable[None]]]] = None
# Optional timeout string (e.g. "5m", "1h", "never")
timeout: Optional[str] = None
# Optional number of retry attempts
retries: Optional[int] = None
```
### `Workflow`
Represents a workflow composed of one or more tasks.
```python
from moose_lib import Workflow, WorkflowConfig
# Create a workflow that starts with the fetch_task
data_workflow = Workflow(
name="data_processing",
config=WorkflowConfig(
starting_task=fetch_task,
schedule="@every 1h", # Run every hour
timeout="10m", # Timeout after 10 minutes
retries=2 # Retry up to 2 times if it fails
)
)
```
### `WorkflowConfig`
Configuration for a workflow.
```python
@dataclasses.dataclass
class WorkflowConfig:
# The first task to execute in the workflow
starting_task: Task[Any, Any]
# Optional number of retry attempts for the entire workflow
retries: Optional[int] = None
# Optional timeout string for the entire workflow
timeout: Optional[str] = None
# Optional cron-like schedule string for recurring execution
schedule: Optional[str] = None
```
---
## TypeScript Moose Lib Reference
Source: moose/reference/ts-moose-lib.mdx
TypeScript Moose Lib Reference
# API Reference
This is a comprehensive reference for `@514labs/moose-lib` , detailing all exported components, types, and utilities.
## Core Types
### `Key`
A type for marking fields as primary keys in data models.
```ts
// Example
interface MyModel {
id: Key; // Marks 'id' as a primary key of type string
}
```
### `JWT`
A type for working with JSON Web Tokens.
```ts
// Example
type UserJWT = JWT<{ userId: string, role: string }>;
```
### `ApiUtil`
Interface providing utilities for analytics APIs.
```ts
interface ApiUtil {
client: MooseClient; // Client for interacting with the database
sql: typeof sql; // SQL template tag function
jwt: JWTPayload | undefined; // Current JWT if available
}
```
## Infrastructure Components
### `OlapTable`
Creates a ClickHouse table with the schema of type T.
```ts
// Basic usage with MergeTree (default)
);
// With sorting configuration (expression)
);
// Disable sorting entirely
);
// For deduplication, explicitly set the ReplacingMergeTree engine
);
```
### `BaseOlapConfig`
Base configuration interface for `OlapTable` with common table configuration options.
```ts
interface BaseOlapConfig {
// Optional database name (defaults to moose.config.toml clickhouse_config.db_name)
database?: string;
// Optional array of field names to order by
orderByFields?: (keyof T & string)[];
// Optional SQL expression for ORDER BY clause (alternative to orderByFields)
orderByExpression?: string;
// Optional table engine (defaults to MergeTree)
engine?: ClickHouseEngines;
// Optional settings for table configuration
settings?: { [key: string]: string };
// Optional lifecycle mode (defaults to MOOSE_MANAGED)
lifeCycle?: LifeCycle;
// Additional engine-specific fields (ver, isDeleted, keeperPath, etc.)
// depend on the engine type
}
```
Example with database override:
```ts
// Table in custom database
);
// Default database (from moose.config.toml)
);
```
### `Stream`
Creates a Redpanda topic with the schema of type T.
```ts
// Basic usage
);
// Adding transformations
myConfiguredStream.addTransform(
destinationStream,
(record) => transformFunction(record)
);
```
### `IngestApi`
Creates an HTTP endpoint for ingesting data of type T.
```ts
// Basic usage with destination stream
);
```
### `Api`
Creates an HTTP endpoint for querying data with request type T and response type R.
```ts
// Basic usage
) => {
const result = await client.query.execute(
sql`SELECT * FROM user_profiles WHERE age > ${params.minAge} LIMIT 10`
);
return result;
}
);
```
### `IngestPipeline`
Combines ingest API, stream, and table creation in a single component.
```ts
// Basic usage
);
// With advanced configuration
);
```
### `MaterializedView`
Creates a materialized view in ClickHouse.
```ts
// Basic usage
);
```
## SQL Utilities
### `sql` Template Tag
Template tag for creating type-safe SQL queries with parameters.
```ts
// Basic usage
const query = sql`SELECT * FROM users WHERE id = ${userId}`;
// With multiple parameters
const query = sql`
SELECT * FROM users
WHERE age > ${minAge}
AND country = ${country}
LIMIT ${limit}
`;
```
### `MooseClient`
Client for interacting with ClickHouse and Temporal.
```ts
class MooseClient {
query: QueryClient; // For database queries
workflow: WorkflowClient; // For workflow operations
}
```
## ClickHouse Utilities
### Table Engine Configurations
#### `ClickHouseEngines` Enum
Available table engines:
```ts
enum ClickHouseEngines {
MergeTree = "MergeTree",
ReplacingMergeTree = "ReplacingMergeTree",
AggregatingMergeTree = "AggregatingMergeTree",
SummingMergeTree = "SummingMergeTree",
ReplicatedMergeTree = "ReplicatedMergeTree",
ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree",
ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree",
ReplicatedSummingMergeTree = "ReplicatedSummingMergeTree",
S3Queue = "S3Queue"
}
```
#### `ReplacingMergeTreeConfig`
Configuration for ReplacingMergeTree tables:
```ts
type ReplacingMergeTreeConfig = {
engine: ClickHouseEngines.ReplacingMergeTree;
orderByFields?: (keyof T & string)[];
ver?: keyof T & string; // Optional: version column for keeping latest
isDeleted?: keyof T & string; // Optional: soft delete marker (requires ver)
settings?: { [key: string]: string };
}
```
#### Replicated Engine Configurations
Configuration for replicated table engines:
```ts
// ReplicatedMergeTree
type ReplicatedMergeTreeConfig = {
engine: ClickHouseEngines.ReplicatedMergeTree;
keeperPath?: string; // Optional: ZooKeeper/Keeper path (omit for Cloud)
replicaName?: string; // Optional: replica name (omit for Cloud)
orderByFields?: (keyof T & string)[];
settings?: { [key: string]: string };
}
// ReplicatedReplacingMergeTree
type ReplicatedReplacingMergeTreeConfig = {
engine: ClickHouseEngines.ReplicatedReplacingMergeTree;
keeperPath?: string; // Optional: ZooKeeper/Keeper path (omit for Cloud)
replicaName?: string; // Optional: replica name (omit for Cloud)
ver?: keyof T & string; // Optional: version column
isDeleted?: keyof T & string; // Optional: soft delete marker
orderByFields?: (keyof T & string)[];
settings?: { [key: string]: string };
}
// ReplicatedAggregatingMergeTree
type ReplicatedAggregatingMergeTreeConfig = {
engine: ClickHouseEngines.ReplicatedAggregatingMergeTree;
keeperPath?: string; // Optional: ZooKeeper/Keeper path (omit for Cloud)
replicaName?: string; // Optional: replica name (omit for Cloud)
orderByFields?: (keyof T & string)[];
settings?: { [key: string]: string };
}
// ReplicatedSummingMergeTree
type ReplicatedSummingMergeTreeConfig = {
engine: ClickHouseEngines.ReplicatedSummingMergeTree;
keeperPath?: string; // Optional: ZooKeeper/Keeper path (omit for Cloud)
replicaName?: string; // Optional: replica name (omit for Cloud)
columns?: string[]; // Optional: columns to sum
orderByFields?: (keyof T & string)[];
settings?: { [key: string]: string };
}
```
**Note**: The `keeperPath` and `replicaName` parameters are optional. When omitted, Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments (default path: `/clickhouse/tables/{uuid}/{shard}` with replica `{replica}`). You can still provide both parameters explicitly if you need custom replication paths.
### `S3QueueTableSettings`
Type-safe interface for S3Queue-specific table settings (ClickHouse 24.7+).
```ts
interface S3QueueTableSettings {
mode?: "ordered" | "unordered"; // Processing mode
after_processing?: "keep" | "delete"; // File handling after processing
keeper_path?: string; // ZooKeeper path for coordination
loading_retries?: string; // Number of retry attempts
processing_threads_num?: string; // Parallel processing threads
// ... and many more settings
}
```
### S3Queue Configuration
Configure S3Queue tables for streaming data from S3 buckets (ORDER BY is not supported):
```ts
);
```
### S3 Configuration
Configure S3 tables for direct read/write access to S3 storage:
```ts
);
// Public bucket (no authentication) - omit credentials for NOSIGN
);
```
### Buffer Configuration
Configure Buffer tables for high-throughput buffered writes (ORDER BY is not supported):
```ts
// First create destination table
);
// Then create buffer table
);
```
### Distributed Configuration
Configure Distributed tables for cluster-wide distributed queries (ORDER BY is not supported):
```ts
);
```
## Task Management
### `Task`
A class that represents a single task within a workflow system.
```ts
// No input, no output
);
// With input and output
);
```
### `TaskContext`
A context object that includes input & state passed between the task's run/cancel functions.
```ts
export type TaskContext = T extends null ? { state: any; input?: null } : { state: any; input: T };
```
### `TaskConfig`
Configuration options for tasks.
```ts
interface TaskConfig {
// The main function that executes the task logic
run: (context: TaskContext) => Promise;
// Optional array of tasks to execute after this task completes
onComplete?: (Task | Task)[];
// Optional function that is called when the task is cancelled.
onCancel?: (context: TaskContext) => Promise;
// Optional timeout duration (e.g., "30s", "5m", "never")
timeout?: string;
// Optional number of retry attempts
retries?: number;
}
```
### `Workflow`
A class that represents a complete workflow composed of interconnected tasks.
```ts
const myWorkflow = new Workflow("getData", {
startingTask: callAPI,
schedule: "@every 5s", // Run every 5 seconds
timeout: "1h",
retries: 3
});
```
### `WorkflowConfig`
Configuration options for defining a workflow.
```ts
interface WorkflowConfig {
// The initial task that begins the workflow execution
startingTask: Task | Task | Task | Task;
// Optional number of retry attempts
retries?: number;
// Optional timeout duration (e.g., "10m", "1h", "never")
timeout?: string;
// Optional cron-style schedule string
schedule?: string;
}
```
---
**Important:** The following components must be exported from your `app/index.ts` file for Moose to detect them:
- `OlapTable` instances
- `Stream` instances
- `IngestApi` instances
- `Api` instances
- `IngestPipeline` instances
- `MaterializedView` instances
- `Task` instances
- `Workflow` instances
**Configuration objects and utilities** (like `DeadLetterQueue`, `Key`, `sql`) do not need to be exported as they are used as dependencies of the main components.
---
## Moose Streaming
Source: moose/streaming.mdx
Build real-time data pipelines with Redpanda/Kafka streams, transformations, and event processing
# Moose Streaming
## Overview
The Streaming module provides standalone real-time data processing with Kafka/Redpanda topics. You can use this capability independently to build event-driven architectures, data transformations, and real-time pipelines without requiring other MooseStack components.
## Basic Usage
```ts filename="Stream.ts" copy
interface ExampleEvent {
id: string;
userId: string;
timestamp: Date;
eventType: string;
}
// Create a standalone stream for events
);
// Add consumers for real-time processing
exampleStream.addConsumer((event) => {
console.log("Processing event:", event);
// Custom processing logic here
});
```
### Enabling Streaming
To enable streaming, you need to ensure that the `streaming_engine` feature flag is set to `true` in your `moose.config.toml` file:
```toml
[features]
streaming_engine = true
```
## Core Capabilities
## Integration with Other Capabilities
The Streaming capability can be used independently, or in conjunction with other MooseStack modules:
---
## moose/streaming/connect-cdc
Source: moose/streaming/connect-cdc.mdx
# Connect to CDC Services
Coming Soon!
---
## Streaming Consumer Functions
Source: moose/streaming/consumer-functions.mdx
Read and process data from streams with consumers and processors
# Streaming Consumer Functions
## Overview
Consuming data from streams allows you to read and process data from Kafka/Redpanda topics. This is essential for building real-time applications, analytics, and event-driven architectures.
## Basic Usage
Consumers are just functions that are called when new data is available in a stream. You add them to a stream like this:
```typescript filename="StreamConsumer.ts"
interface UserEvent {
id: string;
userId: string;
timestamp: Date;
eventType: string;
}
const userEventsStream = new Stream("user-events");
// Add a consumer to process events
userEventsStream.addConsumer((event: UserEvent) => {
console.log(`Processing event: ${event.id}`);
console.log(`User: ${event.userId}, Type: ${event.eventType}`);
// Your processing logic here
// e.g., update analytics, send notifications, etc.
});
// Add multiple consumers for different purposes
userEventsStream.addConsumer((event: UserEvent) => {
// Analytics processing
if (event.eventType === 'purchase') {
updatePurchaseAnalytics(event);
}
});
userEventsStream.addConsumer((event: UserEvent) => {
// Notification processing
if (event.eventType === 'signup') {
sendWelcomeEmail(event.userId);
}
});
```
## Processing Patterns
### Stateful Processing with MooseCache
Maintain state across event processing using MooseCache for distributed state management:
```typescript filename="StatefulProcessing.ts"
// State container for accumulating data
interface AccumulatorState {
id: string;
counter: number;
sum: number;
lastModified: Date;
attributes: Record;
}
// Input message structure
interface InputMessage {
id: string;
groupId: string;
numericValue: number;
messageType: string;
timestamp: Date;
payload: Record;
}
const messageStream = new Stream("input-stream");
messageStream.addConsumer(async (message: InputMessage) => {
// Get distributed cache instance
const cache = await MooseCache.get();
const cacheKey = `state:${message.groupId}`;
// Load existing state or create new one
let state: AccumulatorState | null = await cache.get(cacheKey);
if (!state) {
// Initialize new state
state = {
id: message.groupId,
counter: 0,
sum: 0,
lastModified: new Date(),
attributes: {}
};
}
// Apply message to state
state.counter += 1;
state.sum += message.numericValue;
state.lastModified = message.timestamp;
state.attributes = { ...state.attributes, ...message.payload };
// Determine cache lifetime based on message type
const ttlSeconds = message.messageType === 'complete' ? 60 : 3600;
if (message.messageType === 'complete' || shouldFinalize(state)) {
// Finalize and remove state
await finalizeState(state);
await cache.delete(cacheKey);
} else {
// Persist updated state
await cache.set(cacheKey, state, ttlSeconds);
}
});
// Condition for automatic state finalization
function shouldFinalize(state: AccumulatorState): boolean {
const threshold = 100;
const timeLimit = 30 * 60 * 1000; // 30 minutes
const elapsed = new Date().getTime() - state.lastModified.getTime();
return state.counter >= threshold || elapsed > timeLimit;
}
async function finalizeState(state: AccumulatorState): Promise {
console.log(`Finalizing state ${state.id}: counter=${state.counter}, sum=${state.sum}`);
}
```
## Propagating Events to External Systems
You can use consumer functions to trigger actions across external systems - send notifications, sync databases, update caches, or integrate with any other service when events occur:
### HTTP API Calls
Send processed data to external APIs:
```typescript filename="HttpIntegration.ts"
interface WebhookPayload {
id: string;
data: Record;
timestamp: Date;
}
const webhookStream = new Stream("webhook-events");
webhookStream.addConsumer(async (payload: WebhookPayload) => {
try {
// Send to external webhook
const response = await fetch('https://external-api.com/webhook', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + process.env.API_TOKEN
},
body: JSON.stringify({
eventId: payload.id,
eventData: payload.data,
processedAt: new Date().toISOString()
})
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
console.log(`Successfully sent event ${payload.id} to external API`);
} catch (error) {
console.error(`Failed to send event ${payload.id}:`, error);
// Could implement retry logic or dead letter queue here
}
});
```
#### Database Operations
Write processed data to external databases:
```typescript filename="DatabaseIntegration.ts"
interface DatabaseRecord {
id: string;
category: string;
value: number;
metadata: Record;
timestamp: Date;
}
const dbStream = new Stream("database-events");
// Initialize database connection
const dbConfig = {
host: process.env.DB_HOST,
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
database: process.env.DB_NAME
};
dbStream.addConsumer(async (record: DatabaseRecord) => {
const connection = await createConnection(dbConfig);
try {
// Insert record into external database
await connection.execute(
'INSERT INTO processed_events (id, category, value, metadata, created_at) VALUES (?, ?, ?, ?, ?)',
[
record.id,
record.category,
record.value,
JSON.stringify(record.metadata),
record.timestamp
]
);
console.log(`Inserted record ${record.id} into database`);
} catch (error) {
console.error(`Database insert failed for record ${record.id}:`, error);
} finally {
await connection.end();
}
});
```
#### File System Operations
Write processed data to files or cloud storage:
```typescript filename="FileSystemIntegration.ts"
interface FileOutput {
id: string;
filename: string;
content: string;
directory: string;
format: 'json' | 'csv' | 'txt';
}
const fileStream = new Stream("file-events");
fileStream.addConsumer(async (output: FileOutput) => {
try {
// Ensure directory exists
await mkdir(output.directory, { recursive: true });
// Format content based on type
let formattedContent: string;
switch (output.format) {
case 'json':
formattedContent = JSON.stringify(JSON.parse(output.content), null, 2);
break;
case 'csv':
formattedContent = output.content; // Assume already CSV formatted
break;
default:
formattedContent = output.content;
}
// Write file with timestamp
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const filename = `${output.filename}_${timestamp}.${output.format}`;
const filepath = join(output.directory, filename);
await writeFile(filepath, formattedContent, 'utf8');
console.log(`Written file: ${filepath}`);
} catch (error) {
console.error(`Failed to write file for output ${output.id}:`, error);
}
});
```
#### Email and Notifications
Send alerts and notifications based on processed events:
```typescript filename="NotificationIntegration.ts"
interface NotificationEvent {
id: string;
type: 'email' | 'slack' | 'webhook';
recipient: string;
subject: string;
message: string;
priority: 'low' | 'medium' | 'high';
metadata: Record;
}
const notificationStream = new Stream("notifications");
// Configure email transporter
const emailTransporter = nodemailer.createTransporter({
host: process.env.SMTP_HOST,
port: parseInt(process.env.SMTP_PORT || '587'),
secure: false,
auth: {
user: process.env.SMTP_USER,
pass: process.env.SMTP_PASS
}
});
notificationStream.addConsumer(async (notification: NotificationEvent) => {
try {
switch (notification.type) {
case 'email':
await emailTransporter.sendMail({
from: process.env.SMTP_FROM,
to: notification.recipient,
subject: notification.subject,
text: notification.message,
html: `
${notification.subject}
${notification.message}
Priority: ${notification.priority}
`
});
break;
case 'slack':
await fetch(`https://hooks.slack.com/services/${process.env.SLACK_WEBHOOK}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text: notification.message,
channel: notification.recipient,
username: 'Moose Alert',
icon_emoji: notification.priority === 'high' ? ':warning:' : ':information_source:'
})
});
break;
case 'webhook':
await fetch(notification.recipient, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
id: notification.id,
subject: notification.subject,
message: notification.message,
priority: notification.priority,
metadata: notification.metadata
})
});
break;
}
console.log(`Sent ${notification.type} notification ${notification.id}`);
} catch (error) {
console.error(`Failed to send notification ${notification.id}:`, error);
}
});
```
---
## Create Streams
Source: moose/streaming/create-stream.mdx
Define and create Kafka/Redpanda topics with type-safe schemas
# Creating Streams
## Overview
Streams serve as the transport layer between your data sources and database tables. Built on Kafka/Redpanda topics, they provide a way to implement real-time pipelines for ingesting and processing incoming data.
## Creating Streams
You can create streams in two ways:
- High-level: Using the `IngestPipeline` class (recommended)
- Low-level: Manually configuring the `Stream` component
### Streams for Ingestion
The `IngestPipeline` class provides a convenient way to set up streams with ingestion APIs and tables. This is the recommended way to create streams for ingestion:
```ts filename="IngestionStream.ts" copy {10}
interface RawData {
id: Key;
value: number;
}
);
```
While the `IngestPipeline` provides a convenient way to set up streams with ingestion APIs and tables, you can also configure these components individually for more granular control:
```ts filename="StreamObject.ts" copy {8-12}
interface RawData {
id: string;
value: number;
}
// Create a table for the raw data
);
// Create an ingestion API for the raw data
);
```
### Streams for Transformations
If the raw data needs to be transformed before landing in the database, you can define a transform destination stream and a transform function to process the data:
#### Single Stream Transformation
```ts filename="TransformDestinationStream.ts" copy
interface RawData {
id: Key;
value: number;
}
interface TransformedData {
id: Key;
transformedValue: number;
transformedAt: Date;
}
// Configure components for raw data ingestion & buffering
const rawData = new IngestPipeline("raw_data", {
ingestApi: true,
stream: true, // Buffers data between the ingestion API and the database table
table: false // Don't create a table for the raw data
});
// Create a table for the transformed data
const transformedData = new IngestPipeline("transformed_data", {
ingestApi: false, // Don't create an ingestion API for the transformed data
stream: true, // Create destination stream for the transformed data
table: true // Create a table for the transformed data
});
rawData.stream.addTransform(transformedData.stream, (record) => ({
id: record.id,
transformedValue: record.value * 2,
transformedAt: new Date()
}));
```
```ts filename="TransformDestinationStream.ts" copy
interface RawData {
id: Key;
value: number;
}
interface TransformedData {
id: Key;
transformedValue: number;
transformedAt: Date;
}
// Configure components for raw data ingestion & buffering
);
// Configure components for transformed data stream & storage
);
// Add a transform to the raw data stream to transform the data
rawDataStream.addTransform(transformedStream, (record) => ({
id: record.id,
transformedValue: record.value * 2,
transformedAt: new Date()
}));
```
#### Chaining Transformations
For more complex transformations, you can chain multiple transformations together. This is a use case where using a standalone Stream for intermediate stages of your pipeline may be useful:
```ts filename="ChainedTransformations.ts" copy
// Define the schema for raw input data
interface RawData {
id: Key;
value: number;
}
// Define the schema for intermediate transformed data
interface IntermediateData {
id: Key;
transformedValue: number;
transformedAt: Date;
}
// Define the schema for final transformed data
interface FinalData {
id: Key;
transformedValue: number;
anotherTransformedValue: number;
transformedAt: Date;
}
// Create the first pipeline for raw data ingestion
// Only create an API and a stream (no table) since we're ingesting the raw data
const rawData = new IngestPipeline("raw_data", {
ingestApi: true, // Enable HTTP ingestion endpoint
stream: true, // Create a stream to buffer data
table: false // Don't store raw data in a table
});
// Create an intermediate stream to hold data between transformations (no api or table needed)
));
// Create the final pipeline that will store the fully transformed data
const finalData = new IngestPipeline("final_stream", {
ingestApi: false, // No direct ingestion to this pipeline
stream: true, // Create a stream for processing
table: true // Store final results in a table
});
// Second transformation: further transform the intermediate data
intermediateStream.addTransform(finalData.stream, (record) => ({
id: record.id,
transformedValue: record.transformedValue * 2, // Double the intermediate value
anotherTransformedValue: record.transformedValue * 3, // Triple the intermediate value
transformedAt: new Date() // Update timestamp
}));
```
## Stream Configurations
### Parallelism and Retention
```typescript filename="StreamConfig.ts"
);
```
### LifeCycle Management
Control how Moose manages your stream resources when your code changes. See the [LifeCycle Management guide](./lifecycle) for detailed information.
```typescript filename="LifeCycleStreamConfig.ts"
// Production stream with external management
);
// Development stream with full management
);
```
See the [API Reference](/moose/reference/ts-moose-lib#stream) for complete configuration options.
---
## Dead Letter Queues
Source: moose/streaming/dead-letter-queues.mdx
Handle failed stream processing with dead letter queues
# Dead Letter Queues
## Overview
Dead Letter Queues (DLQs) provide a robust error handling mechanism for stream processing in Moose. When streaming functions fail during transformation or consumption, failed messages are automatically routed to a configured dead letter queue for later analysis and recovery.
## Dead Letter Record Structure
When a message fails processing, Moose creates a dead letter record with the following structure:
```ts
interface DeadLetterModel {
originalRecord: Record; // The original message that failed
errorMessage: string; // Error description
errorType: string; // Error class/type name
failedAt: Date; // Timestamp when failure occurred
source: "api" | "transform" | "table"; // Where the failure happened
}
interface DeadLetter extends DeadLetterModel {
asTyped: () => T; // Type-safe access to original record
}
```
## Creating Dead Letter Queues
### Basic Setup
```ts filename="dead-letter-setup.ts" copy
// Define your data model
interface UserEvent {
userId: string;
action: string;
timestamp: number;
}
// Create a dead letter queue for UserEvent failures
const userEventDLQ = new DeadLetterQueue("UserEventDLQ");
```
### Configuring Transformations with Dead Letter Queues
Add a dead letter queue to your Transformation Function configuration, and any errors thrown in the transformation will trigger the event to be routed to the dead letter queue.
```ts filename="transform-with-dlq.ts" copy
// Create dead letter queue
const eventDLQ = new DeadLetterQueue("EventDLQ");
// Add transform with errors to trigger DLQ, and DLQ configuration
rawEvents.stream!.addTransform(
processedEvents.stream!,
(event: RawEvent): ProcessedEvent => {
// This transform might fail for invalid data
if (!event.userId || event.userId.length === 0) {
throw new Error("Invalid userId: cannot be empty");
}
if (event.timestamp < 0) {
throw new Error("Invalid timestamp: cannot be negative");
}
return {
userId: event.userId,
action: event.action,
processedAt: new Date(),
isValid: true
};
},
{
deadLetterQueue: eventDLQ // Configure DLQ for this transform
}
);
```
### Configuring Consumers with Dead Letter Queues
Add a dead letter queue to your Consumer Function configuration, and any errors thrown in the function will trigger the event to be routed to the dead letter queue.
```ts filename="consumer-with-dlq.ts" copy
// Add consumer with errors to trigger DLQ, and DLQ configuration
rawEvents.stream!.addConsumer(
(event: RawEvent): void => {
// This consumer might fail for certain events
if (event.action === "forbidden_action") {
throw new Error("Forbidden action detected");
}
// Process the event (e.g., send to external API)
console.log(`Processing event for user ${event.userId}`);
},
{
deadLetterQueue: eventDLQ // Configure DLQ for this consumer
}
);
```
### Configuring Ingest APIs with Dead Letter Queues
Add a dead letter queue to your Ingest API configuration, and any runtime data validation failures at the API will trigger the event to be routed to the dead letter queue.
```typescript filename="ValidationExample.ts" copy
interface ExampleModel {
id: string;
userId: string;
timestamp: Date;
properties?: {
device?: string;
version?: number;
}
}
);
```
## Processing Dead Letter Messages
### Monitoring Dead Letter Queues
```ts filename="dlq-monitoring.ts" copy
// Add a consumer to monitor dead letter messages
eventDLQ.addConsumer((deadLetter) => {
console.log("Dead letter received:");
console.log(`Error: ${deadLetter.errorMessage}`);
console.log(`Error Type: ${deadLetter.errorType}`);
console.log(`Failed At: ${deadLetter.failedAt}`);
console.log(`Source: ${deadLetter.source}`);
// Access the original typed data
const originalEvent: RawEvent = deadLetter.asTyped();
console.log(`Original User ID: ${originalEvent.userId}`);
});
```
### Recovery and Retry Logic
```ts filename="dlq-recovery.ts" copy
// Create a recovery stream for fixed messages
const recoveredEvents = new Stream("recovered_events", {
destination: processedEvents.table // Send recovered data to main table
});
// Add recovery logic to the DLQ
eventDLQ.addTransform(
recoveredEvents,
(deadLetter): ProcessedEvent | null => {
try {
const originalEvent = deadLetter.asTyped();
// Apply fixes based on error type
if (deadLetter.errorMessage.includes("Invalid userId")) {
// Skip events with invalid user IDs
return null;
}
if (deadLetter.errorMessage.includes("Invalid timestamp")) {
// Fix negative timestamps
const fixedEvent = {
...originalEvent,
timestamp: Math.abs(originalEvent.timestamp)
};
return {
userId: fixedEvent.userId,
action: fixedEvent.action,
processedAt: new Date(),
isValid: true
};
}
return null; // Skip other errors
} catch (error) {
console.error("Recovery failed:", error);
return null;
}
}
);
```
## Best Practices
## Common Patterns
### Circuit Breaker Pattern
```ts filename="circuit-breaker.ts" copy
let failureCount = 0;
const maxFailures = 5;
const resetTimeout = 60000; // 1 minute
rawEvents.stream!.addTransform(
processedEvents.stream!,
(event: RawEvent): ProcessedEvent => {
if (failureCount >= maxFailures) {
throw new Error("Circuit breaker open - too many failures");
}
try {
// Your processing logic here
const result = processEvent(event);
failureCount = 0; // Reset on success
return result;
} catch (error) {
failureCount++;
if (failureCount >= maxFailures) {
setTimeout(() => { failureCount = 0; }, resetTimeout);
}
throw error;
}
},
{ deadLetterQueue: eventDLQ }
);
```
### Retry with Exponential Backoff
```ts filename="retry-backoff.ts" copy
// Create a retry DLQ with delay processing
const retryDLQ = new DeadLetterQueue("RetryDLQ");
retryDLQ.addTransform(
processedEvents.stream!,
(deadLetter): ProcessedEvent | null => {
const retryCount = deadLetter.originalRecord.retryCount || 0;
const maxRetries = 3;
if (retryCount >= maxRetries) {
console.log("Max retries exceeded, giving up");
return null;
}
// Calculate delay (exponential backoff)
const delay = Math.pow(2, retryCount) * 1000; // 1s, 2s, 4s
setTimeout(() => {
try {
const originalEvent = deadLetter.asTyped();
// Add retry count to track attempts
const eventWithRetry = {
...originalEvent,
retryCount: retryCount + 1
};
// Retry the original processing logic
processEvent(eventWithRetry);
} catch (error) {
// Will go back to DLQ with incremented retry count
throw error;
}
}, delay);
return null; // Don't emit immediately, wait for retry
}
);
```
Dead letter queues add overhead to stream processing. Use them judiciously and monitor their impact on throughput. Consider implementing sampling for high-volume streams where occasional message loss is acceptable.
Dead letter queue events can be integrated with monitoring systems like Prometheus, DataDog, or CloudWatch for alerting and dashboards. Consider tracking metrics like DLQ message rate, error types, and recovery success rates.
## Using Dead Letter Queues in Ingestion Pipelines
Dead Letter Queues (DLQs) can be directly integrated with your ingestion pipelines to capture records that fail validation or processing at the API entry point. This ensures that no data is lost, even if it cannot be immediately processed.
```typescript filename="IngestPipelineWithDLQ.ts" copy
interface ExampleSchema {
id: string;
name: string;
value: number;
timestamp: Date;
}
const pipeline = new IngestPipeline("example", {
ingestApi: true,
stream: true,
table: true,
deadLetterQueue: true, // Route failed ingestions to DLQ
});
```
See the [Ingestion API documentation](/moose/apis/ingest-api#validation) for more details and best practices on configuring DLQs for ingestion.
---
## Publish Data
Source: moose/streaming/from-your-code.mdx
Write data to streams from applications, APIs, or external sources
# Publishing Data to Streams
## Overview
Publishing data to streams allows you to write data from various sources into your Kafka/Redpanda topics. This is the first step in building real-time data pipelines.
## Publishing Methods
### Using REST APIs
The most common way to publish data is through Moose's built-in ingestion APIs. These are configured to automatically sit in front of your streams and publish data to them whenever a request is made to the endpoint:
```typescript filename="PublishViaAPI.ts"
// When you create an IngestPipeline with ingestApi: true, Moose automatically creates an API endpoint
const rawData = new IngestPipeline("raw_data", {
ingestApi: true, // Creates POST /ingest/raw_data endpoint
stream: true,
table: true
});
// You can then publish data via HTTP POST requests
const response = await fetch('/ingest/raw_data', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
id: '123',
value: 42
})
});
```
See the [OpenAPI documentation](/stack/open-api) to learn more about how to generate type-safe client SDKs in your language of choice for all of your Moose APIs.
### Direct Stream Publishing
You can publish directly to a stream from your Moose code using the stream's `send` method.
This is useful when emitting events from workflows or other backend logic.
`send` accepts a single record or an array of records.
If your `Stream` is configured with `schemaConfig.kind = "JSON"`,
Moose produces using the Confluent envelope automatically (0x00 + schema id + JSON).
No code changes are needed beyond setting `schemaConfig`. See the [Schema Registry guide](/moose/streaming/schema-registry).
```ts filename="DirectPublish.ts" copy
interface UserEvent {
id: Key;
userId: string;
timestamp: Date;
eventType: string;
}
// Create a stream (optionally configure destination to sync to a table)
const events = new Stream("user-events");
// Publish a single record
await events.send({
id: "evt_1",
userId: "user_123",
timestamp: new Date(),
eventType: "click",
});
// Publish multiple records
await events.send([
{ id: "evt_2", userId: "user_456", timestamp: new Date(), eventType: "view" },
{ id: "evt_3", userId: "user_789", timestamp: new Date(), eventType: "signup" },
]);
```
Moose builds the Kafka topic name from your stream name,
optional namespace, and optional version (dots become underscores).
For example, a stream named `events` with version `1.2.0` becomes `events_1_2_0`
(or `my_ns.events_1_2_0` when the namespace is `"my_ns"`).
### Using the Kafka/Redpanda Client from External Applications
You can also publish to streams from external applications using Kafka/Redpanda clients:
See the [Kafka.js documentation](https://kafka.js.org/docs/getting-started) for more information on how to use the Kafka.js client to publish to streams.
```typescript filename="ExternalPublish.ts"
const { Kafka } = KafkaJS;
const kafka = new Kafka({
kafkaJS: {
clientId: 'my-app',
brokers: ['localhost:19092']
}
});
const producer = kafka.producer();
await producer.connect();
// Publish to the stream topic
await producer.send({
topic: 'user-events', // Stream name becomes the topic name
messages: [
{
key: 'event-123',
value: JSON.stringify({
id: 'event-123',
userId: 'user-456',
timestamp: new Date().toISOString(),
eventType: 'page_view'
})
}
]
});
```
#### Locating Redpanda Connection Details
When running your Moose backend within your local dev environment, you can find the connection details for your Redpanda cluster in the `moose.config.toml` file in the root of your project:
```toml filename="moose.config.toml" copy
[redpanda_config]
broker = "localhost:19092"
message_timeout_ms = 1000
retention_ms = 30000
replication_factor = 1
```
---
## Schema Registry
Source: moose/streaming/schema-registry.mdx
Use Confluent Schema Registry with Moose streams (JSON Schema first)
# Schema Registry Integration
The first supported encoding is JSON Schema. Avro and Protobuf are planned.
## Overview
Moose can publish and consume Kafka/Redpanda messages using Confluent Schema Registry. The first supported encoding is JSON Schema; Avro and Protobuf are planned.
## Configure Schema Registry URL
Set the Schema Registry URL in `moose.config.toml` under `redpanda_config` (aliased as `kafka_config`). You can also override with environment variables.
```toml filename="moose.config.toml" copy
[redpanda_config]
broker = "localhost:19092"
schema_registry_url = "http://localhost:8081"
```
Environment overrides (either key works):
```bash filename="Terminal" copy
export MOOSE_REDPANDA_CONFIG__SCHEMA_REGISTRY_URL=http://localhost:8081
# or
export MOOSE_KAFKA_CONFIG__SCHEMA_REGISTRY_URL=http://localhost:8081
```
## Referencing Schemas
You can attach a Schema Registry reference to any `Stream` via `schemaConfig`. Use one of:
- Subject latest: `{ subjectLatest: string }`
- Subject and version: `{ subject: string, version: number }`
- Schema id: `{ id: number }`
```ts filename="sr-stream.ts" copy {9,16-23}
interface Event {
id: string;
value: number;
}
const schemaConfig: KafkaSchemaConfig = {
kind: "JSON",
reference: { subjectLatest: "event-value" },
};
);
// Producing uses Schema Registry envelope automatically
await events.send({ id: "e1", value: 42 });
```
## Consuming SR JSON in Runners
Moose streaming runners automatically detect the Confluent JSON envelope
when consuming and strip the header before parsing the JSON.
Your transformation code continues to work unchanged.
## Ingestion APIs and SR
When an Ingest API routes to a topic that has a `schemaConfig` of kind JSON,
Moose resolves the schema id and publishes requests using the Schema Registry envelope.
You can also set the reference to a fixed `id` to skip lookups.
## Discover existing topics and schemas
Use the CLI to pull external topics and optionally fetch JSON Schemas from Schema Registry to emit typed models.
```bash filename="Terminal" copy
moose kafka pull \
--schema-registry http://localhost:8081 \
--path app/external-topics \
--include "*" \
--exclude "{__consumer_offsets,_schemas}"
```
This writes external topic declarations under the provided path based on language (default path is inferred).
## Current limitations
- JSON Schema only (Avro/Protobuf planned)
- Ingest API schema declared in code may not match the actual schema in registry.
---
## Sync to Table
Source: moose/streaming/sync-to-table.mdx
Automatically sync stream data to OLAP tables with intelligent batching
# Sync to Table
## Overview
Moose automatically handles batch writes between streams and OLAP tables through a **destination configuration**. When you specify a `destination` OLAP table for a stream, Moose provisions a background synchronization process that batches and writes data from the stream to the table.
### Basic Usage
```ts filename="SyncToTable.ts" copy {13}
interface Event {
id: Key;
userId: string;
timestamp: Date;
eventType: string;
}
const eventsTable = new OlapTable("events");
const eventsStream = new Stream("events", {
destination: eventsTable // This configures automatic batching
});
```
## Setting Up Automatic Sync
### Using IngestPipeline (Easiest)
The simplest way to set up automatic syncing is with an `IngestPipeline`, which creates all components and wires them together:
```ts filename="AutoSync.ts" copy
interface Event {
id: Key;
userId: string;
timestamp: Date;
eventType: string;
}
// Creates stream, table, API, and automatic sync
const eventsPipeline = new IngestPipeline("events", {
ingestApi: true, // Creates HTTP endpoint at POST /ingest/events
stream: true, // Creates buffering stream
table: true // Creates destination table + auto-sync process
});
```
### Standalone Components
For more granular control, you can configure components individually:
```ts filename="ManualSync.ts" copy
interface Event {
id: Key;
userId: string;
timestamp: Date;
eventType: string;
}
// Create table first
const eventsTable = new OlapTable("events");
// Create stream with destination table (enables auto-sync)
const eventsStream = new Stream("events", {
destination: eventsTable // This configures automatic batching
});
// Create API that writes to the stream
const eventsApi = new IngestApi("events", {
destination: eventsStream
});
```
## How Automatic Syncing Works
When you configure a stream with a `destination` table, Moose automatically handles the synchronization by managing a Rust process process in the background.
Moose creates a **Rust background process** that:
1. **Consumes** messages from the stream (Kafka/Redpanda topic)
2. **Batches** records up to 100,000 or flushes every second (whichever comes first)
3. **Executes** optimized ClickHouse `INSERT` statements
4. **Commits** stream offsets after successful writes
5. **Retries** failed batches with exponential backoff
Default batching parameters:
| Parameter | Value | Description |
|-----------|-------|-------------|
| `MAX_BATCH_SIZE` | 100,000 records | Maximum records per batch insert |
| `FLUSH_INTERVAL` | 1 second | Automatic flush regardless of batch size |
Currently, you cannot configure the batching parameters, but we're interested in adding this feature. If you need this capability, let us know on slack!
[ClickHouse inserts need to be batched for optimal performance](https://clickhouse.com/blog/asynchronous-data-inserts-in-clickhouse#data-needs-to-be-batched-for-optimal-performance). Moose automatically handles this optimization internally, ensuring your data is efficiently written to ClickHouse without any configuration required.
## Data Flow Example
Here's how data flows through the automatic sync process:
```ts filename="DataFlow.ts" copy
// 1. Data sent to ingestion API
fetch('http://localhost:4000/ingest/events', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
id: 'evt_123',
userId: 'user_456',
timestamp: '2024-01-15T10:30:00Z',
eventType: 'click'
})
})
// 2. API validates and writes to stream
// 3. Background sync process batches stream data
// 4. Batch automatically written to ClickHouse table when:
// - Batch reaches 100,000 records, OR
// - 1 second has elapsed since last flush
// 5. Data available for queries in events table
sql`SELECT * FROM events WHERE userId = 'user_456';`
```
## Monitoring and Observability
The sync process provides built-in observability within the Moose runtime:
- **Batch Insert Logs**: Records successful batch insertions with sizes and offsets
- **Error Handling**: Logs transient failures with retry information
- **Metrics**: Tracks throughput, batch sizes, and error rates
- **Offset Tracking**: Maintains Kafka consumer group offsets for reliability
---
## Transformation Functions
Source: moose/streaming/transform-functions.mdx
Process and transform data in-flight between streams
# Transformation Functions
## Overview
Transformations allow you to process and reshape data as it flows between streams. You can filter, enrich, reshape, and combine data in-flight before it reaches its destination.
## Implementing Transformations
### Reshape and Enrich Data
Transform data shape or enrich records:
```typescript filename="DataTransform.ts"
interface RawEvent {
id: Key;
timestamp: string;
data: {
user_id: string;
platform: string;
app_version: string;
ip_address: string;
}
}
interface EnrichedEvent {
eventId: Key;
timestamp: Date;
userId: Key;
properties: {
platform: string;
version: string;
country: string;
};
metadata: {
originalTimestamp: string;
processedAt: Date;
}
}
const rawStream = new Stream("raw_events");
const enrichedStream = new Stream("enriched_events");
// Reshape and enrich data
rawStream.addTransform(enrichedStream, async (record: RawEvent) => ({
eventId: record.id,
timestamp: new Date(record.timestamp),
userId: record.data.user_id,
properties: {
platform: record.data.platform || 'unknown',
version: record.data.app_version,
country: await lookupCountry(record.data.ip_address)
},
metadata: {
originalTimestamp: record.timestamp,
processedAt: new Date()
}
}));
```
### Filtering
Remove or filter records based on conditions:
```typescript filename="FilterStream.ts"
interface MetricRecord {
id: string;
name: string;
value: number;
timestamp: Date;
}
const inputStream = new Stream("input_metrics");
const validMetrics = new Stream("valid_metrics");
// Multiple filtering conditions
inputStream.addTransform(validMetrics, (record) => {
// Filter out records with invalid values
if (isNaN(record.value) || record.value < 0) {
return undefined;
}
// Filter out old records
if (record.timestamp < getStartOfDay()) {
return undefined;
}
// Filter out specific metrics
if (record.name.startsWith('debug_')) {
return undefined;
}
return record;
});
```
### Fan Out (1:N)
Send data to multiple downstream processors:
```ts filename="FanOut.ts" copy
interface Order {
orderId: string;
userId: string;
amount: number;
items: string[];
}
interface HighPriorityOrder extends Order {
priority: 'high';
}
interface ArchivedOrder extends Order {
archivedAt: Date;
}
// Define destination streams
const analyticsStream = new Stream("order_analytics");
const notificationStream = new Stream("order_notifications");
const archiveStream = new Stream("order_archive");
// Source stream
const orderStream = new Stream("orders");
// Send all orders to analytics
orderStream.addTransform(analyticsStream, (order) => order);
// Send large orders to notifications
orderStream.addTransform(notificationStream, (order) => {
if (order.amount > 1000) {
return {
...order,
priority: 'high'
};
}
return undefined; // Skip small orders
});
// Archive all orders
orderStream.addTransform(archiveStream, (order) => ({
...order,
archivedAt: new Date()
}));
```
### Fan In (N:1)
Combine data from multiple sources:
```typescript filename="FanIn.ts"
interface UserEvent {
userId: Key;
eventType: string;
timestamp: Date;
source: string;
}
// Source streams
const webEvents = new Stream("web_events");
const mobileEvents = new Stream("mobile_events");
const apiEvents = new Stream("api_events");
// Create a stream and table for the combined events
const eventsTable = new OlapTable("all_events");
const allEvents = new Stream("all_events", {
destination: eventsTable
});
// Fan in from web
webEvents.addTransform(allEvents, (event) => ({
...event,
source: 'web',
timestamp: new Date()
}));
// Fan in from mobile
mobileEvents.addTransform(allEvents, (event) => ({
...event,
source: 'mobile',
timestamp: new Date()
}));
// Fan in from API
apiEvents.addTransform(allEvents, (event) => ({
...event,
source: 'api',
timestamp: new Date()
}));
```
### Unnesting
Flatten nested records:
```typescript filename="Unnest.ts"
interface NestedRecord {
id: Key;
nested: {
value: number;
}[];
}
interface FlattenedRecord {
id: Key;
value: number;
}
const nestedStream = new Stream("nested_records");
const flattenedStream = new Stream("flattened_records");
nestedStream.addTransform(flattenedStream, (record) => record.nested.map((n) => ({
id: record.id,
value: n.value
})));
```
You cannot have multiple transforms between the same source and destination stream. If you need multiple transformation routes, you must either:
- Use conditional logic inside a single streaming function to handle different cases, or
- Implement a fan-out/fan-in pattern, where you route records to different intermediate streams and then merge them back into the destination stream.
## Error Handling with Dead Letter Queues
When stream processing fails, you can configure dead letter queues to capture failed messages for later analysis and recovery. This prevents single message failures from stopping your entire pipeline.
```typescript filename="DeadLetterQueue.ts" copy
interface UserEvent {
userId: string;
action: string;
timestamp: number;
}
interface ProcessedEvent {
userId: string;
action: string;
processedAt: Date;
isValid: boolean;
}
// Create pipelines
const rawEvents = new IngestPipeline("raw_events", {
ingestApi: true,
stream: true,
table: false
});
const processedEvents = new IngestPipeline("processed_events", {
ingestApi: false,
stream: true,
table: true
});
// Create dead letter queue for failed transformations
const eventDLQ = new DeadLetterQueue("EventDLQ");
// Add transform with error handling
rawEvents.stream!.addTransform(
processedEvents.stream!,
(event: UserEvent): ProcessedEvent => {
// This might fail for invalid data
if (!event.userId || event.userId.length === 0) {
throw new Error("Invalid userId: cannot be empty");
}
return {
userId: event.userId,
action: event.action,
processedAt: new Date(),
isValid: true
};
},
{
deadLetterQueue: eventDLQ // Failed messages go here
}
);
// Monitor dead letter messages
eventDLQ.addConsumer((deadLetter) => {
console.log(`Error: ${deadLetter.errorMessage}`);
console.log(`Failed at: ${deadLetter.failedAt}`);
// Access original typed data
const originalEvent: UserEvent = deadLetter.asTyped();
console.log(`Original User ID: ${originalEvent.userId}`);
});
```
For comprehensive dead letter queue patterns, recovery strategies, and best practices, see the [Dead Letter Queues guide](./dead-letter-queues).
---
## Templates & Apps
Source: moose/templates-examples.mdx
Browse templates and demo apps for MooseStack
# Templates & Apps
Moose provides two ways to get started: **templates** and **demo apps**. Templates are simple skeleton applications that you can initialize with `moose init`, while demo apps are more advanced examples available on GitHub that showcase real-world use cases and integrations.
**Initialize a template:**
```bash filename="Terminal" copy
moose init PROJECT_NAME TEMPLATE_NAME
```
**List available templates:**
```bash filename="Terminal" copy
moose template list
```
## Popular Apps
---
## Browse Apps
### Nextjs + Express + MCP demo app: Aircraft data [#plane-transponder-demo]
Complete demo application featuring real-time aircraft transponder data with MCP chat integration.
**Repository:** [https://github.com/514-labs/planes](https://github.com/514-labs/planes)
Key Features:
Next.js
Express
MCP
Moose OLAP
ClickHouse
---
### Postgres to ClickHouse CDC with Debezium [#postgres-clickhouse-cdc]
Easy-to-run demo of a CDC pipeline using Debezium, PostgreSQL, Redpanda, and ClickHouse.
**Repository:** [https://github.com/514-labs/debezium-cdc](https://github.com/514-labs/debezium-cdc)
**Blog Post:** [Code-First CDC to ClickHouse with Debezium, Redpanda, and MooseStack](https://www.fiveonefour.com/blog/cdc-postgres-to-clickhouse-debezium-drizzle)
Key Features:
CDC
Debezium
PostgreSQL
Redpanda
ClickHouse
Drizzle ORM
---
### User-facing analytics reference app (Postgres + Clickhouse + React) [#foobar-ufa]
A complete reference architecture showing how to add a dedicated analytics microservice to an existing application without impacting your primary database. Features Postgres + ClickHouse + React frontend with chat analytics.
**Repository:** [https://github.com/514-labs/area-code/tree/main/ufa](https://github.com/514-labs/area-code/tree/main/ufa)
Key Features:
PostgreSQL
ClickHouse
React
TanStack Query
Supabase
Moose OLAP
Moose Streaming
Moose APIs
Elasticsearch
Temporal
---
### User-facing analytics reference app (Clickhouse Cloud + React) [#foobar-ufa-lite]
A simplified version of the UFA architecture using ClickHouse Cloud + React frontend with chat analytics. This version demonstrates a cloud-native approach without local infrastructure dependencies.
**Repository:** [https://github.com/514-labs/area-code/tree/main/ufa-lite](https://github.com/514-labs/area-code/tree/main/ufa-lite)
Key Features:
ClickHouse Cloud
React
Moose OLAP
Moose APIs
---
## Browse Templates
### TypeScript (Default) [#typescript-default]
Default TypeScript project, seeded with foobar example components.
```bash filename="Terminal" copy
moose init PROJECT_NAME typescript
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/typescript](https://github.com/514-labs/moosestack/tree/main/templates/typescript)
Key Features:
Moose APIs
Moose OLAP
Moose Streaming
Moose Workflows
---
### Python (Default) [#python-default]
Default Python project, seeded with foobar example components.
```bash filename="Terminal" copy
moose init PROJECT_NAME python
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/python](https://github.com/514-labs/moosestack/tree/main/templates/python)
Key Features:
Moose APIs
Moose OLAP
Moose Streaming
Moose Workflows
---
### TypeScript (Empty) [#typescript-empty]
Empty TypeScript project with minimal structure.
```bash filename="Terminal" copy
moose init PROJECT_NAME typescript-empty
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/typescript-empty](https://github.com/514-labs/moosestack/tree/main/templates/typescript-empty)
Key Features:
TypeScript
Moose OLAP
---
### Python (Empty) [#python-empty]
Empty Python project with minimal structure.
```bash filename="Terminal" copy
moose init PROJECT_NAME python-empty
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/python-empty](https://github.com/514-labs/moosestack/tree/main/templates/python-empty)
Key Features:
Python
Moose OLAP
---
### Next.js (Empty) [#nextjs-empty]
TypeScript project with a Next.js frontend (empty).
```bash filename="Terminal" copy
moose init PROJECT_NAME next-app-empty
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/next-app-empty](https://github.com/514-labs/moosestack/tree/main/templates/next-app-empty)
Key Features:
Next.js
TypeScript
Moose APIs
Moose OLAP
Moose Streaming
Moose Workflows
---
### Express.js [#express]
TypeScript project using Express for serving analytical APIs.
```bash filename="Terminal" copy
moose init PROJECT_NAME typescript-express
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/typescript-express](https://github.com/514-labs/moosestack/tree/main/templates/typescript-express)
Key Features:
Express.js
TypeScript
Moose OLAP
Moose Streaming
Moose Workflows
---
### TypeScript MCP [#typescript-mcp]
TypeScript project with an MCP (Model Context Protocol) implementation using Express. The included example tool enables LLMs to query ClickHouse.
```bash filename="Terminal" copy
moose init PROJECT_NAME typescript-mcp
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/typescript-mcp](https://github.com/514-labs/moosestack/tree/main/templates/typescript-mcp)
Key Features:
TypeScript
MCP
Express
Moose OLAP
Moose Streaming
Moose Workflows
---
### FastAPI [#fastapi]
Python project using FastAPI for serving analytical APIs.
```bash filename="Terminal" copy
moose init PROJECT_NAME python-fastapi
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/python-fastapi](https://github.com/514-labs/moosestack/tree/main/templates/python-fastapi)
Key Features:
FastAPI
Python
Moose OLAP
Moose Streaming
Moose Workflows
---
### FastAPI (Client-Only) [#fastapi-client-only]
FastAPI client-only project using MooseStack libraries without requiring the Moose runtime.
```bash filename="Terminal" copy
moose init PROJECT_NAME python-fastapi-client-only
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/python-fastapi-client-only](https://github.com/514-labs/moosestack/tree/main/templates/python-fastapi-client-only)
Key Features:
FastAPI
Python
Moose OLAP
Moose Streaming
Moose Workflows
---
### ADS-B (Aircraft Tracking) [#adsb]
Real-time aircraft transponder data tracking.
```bash filename="Terminal" copy
moose init PROJECT_NAME ads-b
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/ads-b](https://github.com/514-labs/moosestack/tree/main/templates/ads-b)
Key Features:
Moose APIs
Moose OLAP
Moose Streaming
Moose Workflows
---
### ADS-B with Frontend [#adsb-frontend]
Real-time aircraft transponder data with a React frontend.
```bash filename="Terminal" copy
moose init PROJECT_NAME ads-b-frontend
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/ads-b-frontend](https://github.com/514-labs/moosestack/tree/main/templates/ads-b-frontend)
Key Features:
Next.js
React
Moose APIs
Moose OLAP
Moose Workflows
---
### Live Heart Rate Leaderboard [#heartrate]
Live heart rate leaderboard inspired by F45 with Streamlit frontend.
```bash filename="Terminal" copy
moose init PROJECT_NAME live-heartrate-leaderboard
```
**Repository:** [https://github.com/514-labs/moosestack/tree/main/templates/live-heartrate-leaderboard](https://github.com/514-labs/moosestack/tree/main/templates/live-heartrate-leaderboard)
Key Features:
Streamlit
Python
Moose APIs
Moose OLAP
Moose Streaming
Moose Workflows
---
## Moose Workflows
Source: moose/workflows.mdx
Build ETL pipelines, scheduled jobs, and long-running tasks with orchestration
# Moose Workflows
## Overview
The Workflows module provides standalone task orchestration and automation. You can use this capability independently to build ETL pipelines, run scheduled jobs, trigger background tasks, and manage long-running tasks without requiring other MooseStack components like databases or streams.
### Basic Usage
```typescript filename="DataFlow.ts" copy
export interface Foo {
name: string;
}
export interface Bar {
name: string;
greeting: string;
counter: number;
}
);
);
);
```
### Enabling Workflows
To enable workflows, you need to add the `workflows` feature to your `moose.config.toml` file:
```toml filename="moose.config.toml" copy
[features]
workflows = true
```
## Core Capabilities
## Integration with Other Capabilities
While the Workflows capability works independently, it is designed to be used in conjunction with other MooseStack capabilities:
---
## moose/workflows/cancel-workflow
Source: moose/workflows/cancel-workflow.mdx
# Cancel a Running Workflow
To stop a workflow before it has finished running, use the `workflow cancel` command.
```bash filename="Terminal" copy
moose workflow cancel
```
### Implementing Cancelation Callbacks
For workflows that are running and have clean up operations to perform, you can implement a termination callback.
This is especially useful for any long running tasks that have open connections or subscriptions to other services that need to be closed.
You may also use the `state` within the run/cancel context to supplement your business logic.
```typescript filename="workflows/workflows.ts" copy
const task1 = new Task({
name: "task1",
run: async (ctx) => {
connection.open();
},
onCancel: async (ctx) => {
// Clean up any resources
connection.close();
},
});
const myworkflow = new Workflow({
name: "myworkflow",
startingTask: task1,
retries: 3,
});
```
---
## Define Workflows
Source: moose/workflows/define-workflow.mdx
Create workflow definitions with task sequences and data flow
# Define Workflows
## Overview
Workflows automate task sequences with built-in reliability and monitoring. Tasks execute in order, passing data between steps.
Built on Temporal for reliability, retries, and monitoring via GUI dashboard.
## Writing Workflow Tasks
Tasks are objects with a `run` function. Return values automatically pass to the next task.
```typescript filename="app/index.ts" copy
export interface Foo {
name: string;
}
);
);
```
Export `Task` and `Workflow` objects. Specify `startingTask` in the `Workflow` config.
## Data Flow Between Tasks
Tasks communicate through their return values. Each task can return an object that is automatically passed as input to the next task in the workflow.
- Only values inside the object are passed to the next task.
- The object must be JSON-serializable.
```typescript filename="app/index.ts" copy
export interface Foo {
name: string;
}
export interface Bar {
name: string;
greeting: string;
counter: number;
}
);
);
);
```
## Debugging Workflows
While the Temporal dashboard is a helpful tool for debugging, you can also leverage the Moose CLI to monitor and debug workflows. This is useful if you want to monitor a workflow without having to leave your terminal.
Use the `moose workflow status` command to monitor a workflow:
```bash filename="Terminal" copy
moose workflow status example
```
This will print high level information about the workflow run:
```txt filename="Terminal"
Workflow Workflow Status: example
Run ID: 446eab6e-663d-4913-93fe-f79d6109391f
Status: WORKFLOW_EXECUTION_STATUS_COMPLETED ✅
Execution Time: 66s
```
If you want more detailed information about the workflow's status, including task level logs and inputs/outputs, you can use the `--verbose` flag:
```bash filename="Terminal" copy
moose workflow status example --verbose
```
```txt filename="Terminal"
Workflow Workflow Status: example
Run ID: 446eab6e-663d-4913-93fe-f79d6109391f
Status: WORKFLOW_EXECUTION_STATUS_COMPLETED ✅
Execution Time: 66s
Request: GetWorkflowExecutionHistoryRequest { namespace: "default", execution: Some(WorkflowExecution { workflow_id: "example", run_id: "446eab6e-663d-4913-93fe-f79d6109391f" }), maximum_page_size: 0, next_page_token: [], wait_new_event: false, history_event_filter_type: Unspecified, skip_archival: false }
Found 17 events
Event History:
• [2025-02-21T14:16:56.234808764+00:00] EVENT_TYPE_WORKFLOW_EXECUTION_STARTED
• [2025-02-21T14:16:56.235132389+00:00] EVENT_TYPE_WORKFLOW_TASK_SCHEDULED
• [2025-02-21T14:16:56.259341847+00:00] EVENT_TYPE_WORKFLOW_TASK_STARTED
• [2025-02-21T14:16:56.329856180+00:00] EVENT_TYPE_WORKFLOW_TASK_COMPLETED
• [2025-02-21T14:16:56.329951889+00:00] EVENT_TYPE_ACTIVITY_TASK_SCHEDULED
Activity: example/task1
• [2025-02-21T14:16:56.333761680+00:00] EVENT_TYPE_ACTIVITY_TASK_STARTED
• [2025-02-21T14:16:56.497156055+00:00] EVENT_TYPE_ACTIVITY_TASK_COMPLETED
Result:
{
"counter": 1,
"greeting": "hello, no name!",
"name": "no name",
}
```
With this more detailed output, you can see the exact sequence of events and the inputs and outputs of each task. This is useful for debugging and understanding the workflow's behavior.
The result of each task is included in the output, allowing you to inspect the data that was passed between task for debugging purposes.
If your workflow fails due to some runtime error, you can use the event history timeline to identify the task that failed.
---
## moose/workflows/retries-and-timeouts
Source: moose/workflows/retries-and-timeouts.mdx
# Error Detection and Handling
Moose provides multiple layers of error protection, both at the workflow and task level:
### Workflow-Level Retries and Timeouts
Moose automatically catches any runtime errors during workflow execution. Errors are logged for debugging, and the orchestrator will retry failed tasks according to the `retries` option.
In your `Workflow`, you can configure the following options to control workflow behavior, including timeouts and retries:
```typescript filename="app/index.ts" {5,6} copy
);
```
### Task-Level Errors and Retries
For more granular control over task-level errors and retries, you can configure your individual tasks to have their own retry behavior.
For workflows & tasks that may not have a predefined timeout, you may set `never` as the timeout.
```typescript filename="app/index.ts" {5-6} copy
);
);
```
### Example: Workflow and Task Retry Interplay
When configuring retries, it's important to understand how workflow-level and task-level retries interact. Consider the following scenario:
- **Workflow Retry Policy**: 2 attempts
- **Task Retry Policy**: 3 attempts
```typescript filename="app/index.ts" {5,10} copy
);
);
```
If the execution of the workflow encounters an error, the retry sequence would proceed as follows:
1. **Workflow Attempt 1**
- **Task Attempt 1**: Task fails
- **Task Attempt 2**: Task fails
- **Task Attempt 3**: Task fails
- Workflow attempt fails after exhausting task retries
2. **Workflow Attempt 2**
- **Task Attempt 1**: Task fails
- **Task Attempt 2**: Task fails
- **Task Attempt 3**: Task fails
- Workflow attempt fails after exhausting task retries
In this example, the workflow will make a total of 2 attempts, and each task within those attempts will retry up to 3 times before the workflow itself retries.
---
## Schedule Workflows
Source: moose/workflows/schedule-workflow.mdx
Set up recurring and scheduled workflow execution
# Schedule Workflows
## Overview
Moose workflows can be configured to run automatically on a schedule using cron expressions or interval-based scheduling. This enables you to automate recurring tasks, data processing jobs, and maintenance operations.
## Scheduling Workflows
Workflows can be configured to run on a schedule using the `schedule` field in `Workflow`. This field is optional and blank by default.
### Cron Expressions
```typescript filename="app/scheduled-workflow.ts" copy
);
```
#### Cron Expression Format
```text
|------------------------------- Minute (0-59)
| |------------------------- Hour (0-23)
| | |------------------- Day of the month (1-31)
| | | |------------- Month (1-12; or JAN to DEC)
| | | | |------- Day of the week (0-6; or SUN to SAT; or 7 for Sunday)
| | | | |
| | | | |
* * * * *
```
#### Common Cron Examples
| Cron Expression | Description |
|-----------------|-------------|
| 0 12 * * * | Runs at 12:00 PM every day |
| 0 0 * * 0 | Runs at 12:00 AM every Sunday |
| 0 8 * * 1-5 | Runs at 8:00 AM on weekdays (Monday to Friday) |
| * * * * * | Runs every minute |
| 0 */6 * * * | Runs every 6 hours |
| 0 9 1 * * | Runs at 9:00 AM on the first day of every month |
| 0 0 1 1 * | Runs at midnight on January 1st every year |
Use an online cron expression visualizer like [crontab.guru](https://crontab.guru/) to help you understand how the cron expression will schedule your workflow.
### Interval Schedules
Interval schedules can be specified as a string `"@every "`. The interval follows standard duration format:
```typescript filename="app/interval-workflow.ts" copy
);
```
#### Interval Examples
| Interval | Description |
|----------|-------------|
| `@every 30s` | Every 30 seconds |
| `@every 5m` | Every 5 minutes |
| `@every 1h` | Every hour |
| `@every 12h` | Every 12 hours |
| `@every 24h` | Every 24 hours |
| `@every 7d` | Every 7 days |
## Practical Scheduling Examples
### Daily Data Processing
```typescript filename="app/daily-etl.ts" copy
);
```
### Weekly Reports
```typescript filename="app/weekly-reports.ts" copy
);
```
### High-Frequency Monitoring
```typescript filename="app/monitoring.ts" copy
);
```
## Monitoring Scheduled Workflows
### Development Environment
If your dev server is running, you should see logs in the terminal when your scheduled workflow is executed:
```bash filename="Terminal" copy
moose dev
```
```txt filename="Terminal"
[2024-01-15 12:00:00] Scheduled workflow 'daily-data-processing' started
[2024-01-15 12:00:01] Task 'extract' completed successfully
[2024-01-15 12:00:15] Task 'transform' completed successfully
[2024-01-15 12:00:30] Task 'load' completed successfully
[2024-01-15 12:00:30] Workflow 'daily-data-processing' completed successfully
```
### Checking Workflow Status
You can check the status of scheduled workflows using the CLI:
```bash filename="Terminal" copy
# List all workflows defined in your project
moose workflow list
# Alternative command to list all workflows
moose ls --type workflows
# View workflow execution history
moose workflow history
# Check specific workflow status
moose workflow status daily-data-processing
# Get detailed execution history
moose workflow status daily-data-processing --verbose
```
### Temporal Dashboard
Access the Temporal dashboard to view scheduled workflow executions:
```bash filename="Terminal" copy
# Open Temporal dashboard (typically at http://localhost:8080)
open http://localhost:8080
```
The dashboard shows:
- Scheduled workflow definitions
- Execution history and timing
- Success/failure rates
- Retry attempts and errors
## Best Practices for Scheduled Workflows
### Timeout and Retry Configuration
Configure appropriate timeouts and retries for scheduled workflows:
```typescript filename="app/robust-scheduled-workflow.ts" copy
);
);
```
## Troubleshooting Scheduled Workflows
### Common Issues
- **Timezone considerations**: Cron schedules use UTC by default
- **Resource conflicts**: Ensure scheduled workflows don't compete for resources
- **Long-running tasks**: Set appropriate timeouts for lengthy operations
- **Error handling**: Implement proper error handling and logging
---
## Trigger Workflows
Source: moose/workflows/trigger-workflow.mdx
Start workflows from events, APIs, or external triggers
# Trigger Workflows
## Overview
Moose workflows can be triggered programmatically from various sources including APIs, events, external systems, or manual execution. This enables you to build reactive data processing pipelines and on-demand task execution.
## Manual Workflow Execution
The simplest way to trigger a workflow is using the Moose CLI:
```bash filename="Terminal" copy
# Run a workflow manually
moose workflow run example
# Run with input parameters
moose workflow run example --input '{"name": "John", "email": "john@example.com"}'
```
### Passing Input to Workflows
When triggering workflows, you can pass input data that will be passed to the starting task:
```bash filename="Terminal" copy
moose workflow run data-processing --input '{
"sourceUrl": "https://api.example.com/data",
"apiKey": "your-api-key",
"batchSize": 100
}'
```
The input is parsed as JSON and passed to the workflow's starting task.
## API-Triggered Workflows
Trigger workflows directly via an HTTP POST endpoint exposed by the webserver.
- Endpoint: `/workflows/{workflowName}/trigger`
### Request
- Body: optional JSON payload passed to the workflow's starting task.
Example:
```bash filename="Terminal" copy
curl -X POST 'http://localhost:4000/workflows/data-processing/trigger' \
-H 'Content-Type: application/json' \
-d '{
"inputValue": "process-user-data",
"priority": "high"
}'
```
### Authentication
- Local development: no auth required.
- Production: protect the endpoint using an API key. Follow these steps:
1. Generate a token and hashed key (see the Token Generation section in the API Auth docs):
```bash filename="Terminal" copy
moose generate hash-token
# Outputs:
# - ENV API Key (hashed) → for environment/config
# - Bearer Token (plain) → for Authorization header
```
2. Configure the server with the hashed key:
```bash copy
MOOSE_CONSUMPTION_API_KEY=""
```
3. Call the endpoint using the plain Bearer token from step 1:
```bash filename="Terminal" copy
curl -X POST 'https://your-host/workflows/data-processing/trigger' \
-H 'Authorization: Bearer ' \
-H 'Content-Type: application/json' \
-d '{"inputValue":"process-user-data"}'
```
For details, see the API Auth page under “Token Generation” and “API Endpoints”.
### Response
```json filename="Response"
{
"workflowId": "data-processing-",
"runId": "",
}
```
In local development, the response also includes a `dashboardUrl` to Temporal UI:
```json filename="Response (dev)"
{
"workflowId": "data-processing-",
"runId": "",
"dashboardUrl": "http://localhost:8080/namespaces//workflows/data-processing-//history"
}
```
## Terminate a Running Workflow
After triggering a workflow, you can terminate it via an HTTP endpoint.
- Endpoint: `POST /workflows/{workflowId}/terminate`
### Request
- Local development (no auth):
```bash filename="Terminal" copy
curl -X POST 'http://localhost:4000/workflows/data-processing-/terminate'
```
- Production (Bearer token required):
```bash filename="Terminal" copy
curl -X POST 'https://your-host/workflows/data-processing-/terminate' \
-H 'Authorization: Bearer ''
```
---
## October 24, 2025
Source: release-notes/2025-10-24.mdx
Release notes for October 24, 2025
# October 24, 2025
* **New:** `moose migrate --clickhouse-url` enables serverless ClickHouse schema deploys
* **Improved:** JSON columns now accept dynamic payloads with fine-grained controls
## Serverless ClickHouse migrations with `moose migrate`
Run schema diffs and applies straight against a ClickHouse endpoint—perfect for OLAP-only or CI/CD environments that don’t boot the full Moose runtime.
```bash
# Detect changes and persist the migration plan
moose generate migration \
--clickhouse-url "https://user:pass@ch.serverless.dev/main" \
--save
# Apply the plan directly to ClickHouse
moose migrate --clickhouse-url "https://user:pass@ch.serverless.dev/main"
```
🐙 PR: ([#2872](https://github.com/514labs/moose/pull/2872)) | 📘 Docs: [Serverless ClickHouse migrations guide](/moose/migrate#serverless-deployments)
## Adaptive JSON columns with `__mooseJsonOptions`
Model semi-structured payloads while locking in typed paths for the fields you care about.
```typescript
export interface UserActivity {
id: Key;
metadata: {
userId: string;
sessionId: string;
__mooseJsonOptions: {
maxDynamicPaths: 256;
typedPaths: [
["userId", "String"],
["sessionId", "String"]
];
skipRegexps: ["^debug\\."];
};
};
}
```
🐙 PR:([#2887](https://github.com/514labs/moose/pull/2887)) | 📘 Docs: [Configurable JSON columns reference](/moose/data-modeling#configurable-json-columns)
## Moose
- **`moose migrate --clickhouse-url`** – Generate and apply migrations directly against hosted ClickHouse, ideal for OLAP-only or CI/CD workflows that run without the full Moose runtime. [Docs: Serverless ClickHouse migrations](/moose/migrate#serverless-deployments) | PRs [#2872](https://github.com/514labs/moose/pull/2872).
- **LLM-friendly docs & endpoints** – Framework pages expose TS/Py “LLM view” links and the CLI now serves `/llm-ts.txt` + `/llm-py.txt` for assistants that need scoped context. [Docs: LLM docs](/moose/llm-docs) | PRs [#2892](https://github.com/514labs/moose/pull/2892).
- **Flexible JSON columns** – `__mooseJsonOptions` lets models cap dynamic paths, pin typed paths, or skip keys/regexes so ingestion can accept evolving payloads without breaking typed reads. [Docs: Configurable JSON columns](/moose/data-modeling#configurable-json-columns) | PRs [#2887](https://github.com/514labs/moose/pull/2887).
- **Configurable `source_dir`** – `moose.config.toml` can point at `src/` (or any folder) instead of the default `app/`, simplifying adoption inside existing repos. [Docs: Custom source directory](/moose/configuration#custom-source-directory) | PRs [#2886](https://github.com/514labs/moose/pull/2886).
- **Array transforms can fan out events** – Transform functions that return arrays automatically emit one Kafka message per element, covering explode/normalize patterns without extra producers. [Docs: Array transforms](/moose/streaming/transform-functions#array-transforms) | PRs [#2882](https://github.com/514labs/moose/pull/2882).
- **ClickHouse modeling controls** – Table DSL now covers TTL per table/column, `sampleByExpression`, and fully configurable secondary indexes (type, args, granularity) so you can encode retention + performance plans directly in code. [Docs: TTL](/moose/olap/ttl) • [Docs: Sample BY](/moose/olap/schema-optimization#sample-by-expressions) • [Docs: Secondary indexes](/moose/olap/indexes) | PRs [#2845](https://github.com/514labs/moose/pull/2845), [#2867](https://github.com/514labs/moose/pull/2867), [#2869](https://github.com/514labs/moose/pull/2869).
- **`get_source` MCP tool** – AI assistants can resolve a Moose component (tables, APIs, streams) back to its source file for faster code navigation. [Docs: MCP get_source tool](/moose/mcp-dev-server#get_source) | PRs [#2848](https://github.com/514labs/moose/pull/2848).
- **Google Analytics v4 connector** – Service-account authenticated connector streams GA4 reports and realtime metrics into Moose pipelines so marketing data lands without bespoke ETL. [Docs: Connector reference](https://registry.moosestack.com/connectors/google-analytics) | PRs [registry#121](https://github.com/514-labs/registry/pull/121).
- **Connector registry APIs** – Public REST endpoints expose connector metadata, docs, schemas, and versions for catalogs or automation. [Docs: Registry API docs](https://registry.moosestack.com/docs/api) | PRs [registry#120](https://github.com/514-labs/registry/pull/120).
- **Onboarding & docs polish** – Quickstart, auth, materialized view, and config guides now call out install checkpoints, nullable column behavior, and when to prefer `moose.config.toml` over Docker overrides. [Docs: Quickstart](/moose/getting-started/quickstart) | PRs [#2903](https://github.com/514labs/moose/pull/2903), [#2894](https://github.com/514labs/moose/pull/2894), [#2893](https://github.com/514labs/moose/pull/2893), [#2890](https://github.com/514labs/moose/pull/2890).
- **Integer validation parity** – The ingest API enforces every ClickHouse integer range (Int8–UInt256) with clear errors, preventing silent overflows. [Docs: Ingest API](/moose/apis/ingest-api) | PRs [#2861](https://github.com/514labs/moose/pull/2861).
- **MCP watcher stability** – The MCP server now waits for file-system changes to settle before responding so IDE bots always read consistent artifacts. [Docs: MCP server](/moose/mcp-dev-server) | PRs [#2884](https://github.com/514labs/moose/pull/2884).
- **Release + schema compiler hardening** – Version detection ignores CI-only tags, and ClickHouse parsing handles ORDER BY around `PARTITION BY`, `TTL`, `SAMPLE BY`, and secondary indexes even when optional arguments are omitted. PRs [#2902](https://github.com/514labs/moose/pull/2902), [#2898](https://github.com/514labs/moose/pull/2898), [#2897](https://github.com/514labs/moose/pull/2897), [#2889](https://github.com/514labs/moose/pull/2889).
- **Proxy request fidelity** – Consumption APIs now preserve headers/body metadata end-to-end, keeping auth tokens and content negotiation intact. PRs [#2881](https://github.com/514labs/moose/pull/2881).
## Boreal
- **Navigation slug fix** – Visiting a 404 no longer strips the org ID from subsequent links, so multi-tenant operators stay on the right workspace. ([commercial#1014](https://github.com/514-labs/commercial/pull/1014))
---
## November 1, 2025
Source: release-notes/2025-11-01.mdx
Release notes for November 1, 2025
# November 1, 2025
- **New:** Support for ClickHouse table engines:
- [Buffer table engine](/moose/olap/model-table#in-memory-buffer-buffer): smooth ingest spikes before data lands in MergeTree ([ClickHouse docs](https://clickhouse.com/docs/en/engines/table-engines/special/buffer))
- [S3 table engine](/moose/olap/model-table#direct-s3-access-s3): keep data in S3 while ClickHouse reads and writes it on demand ([ClickHouse docs](https://clickhouse.com/docs/en/engines/table-engines/integrations/s3))
- Beta (self-hosted only): [Distributed table engine](/moose/olap/model-table#distributed-tables-distributed): serve cluster-wide queries through Moose-managed tables ([ClickHouse docs](https://clickhouse.com/docs/en/engines/table-engines/special/distributed))
- **Improved:** Serverless migrations gain Redis-backed state storage plus per-table databases.
- **Feature that will make a small number of people very happy:** Moose now has a flake.nix to let users install the cli via nix run github:514-labs/moosestack (also provides a dev shell to make contributing easier!)
## Buffer tables for burst protection
You can now model ClickHouse Buffer engines in MooseOLAP TypeScript and Python projects.
```ts filename="bufferTable.ts" copy
interface PaymentEvent {
eventId: Key;
amount: number;
capturedAt: Date;
}
);
```
PR: [#2908](https://github.com/514-labs/moosestack/pull/2908) | Docs: [Buffer engine](/moose/olap/model-table#in-memory-buffer-buffer) • [ClickHouse Buffer engine docs](https://clickhouse.com/docs/en/engines/table-engines/special/buffer)
## S3 tables for object storage
You can now model ClickHouse S3 engines in MooseOLAP TypeScript and Python projects. The CLI serializes engine settings, resolves runtime credentials, and enforces the S3 rule set (`PARTITION BY` allowed, `ORDER BY` rejected) so you can read or write datasets that live entirely in S3.
```ts filename="s3Archive.ts" copy
interface ArchivedOrder {
orderId: string;
status: string;
processedAt: Date;
}
);
```
PR: [#2908](https://github.com/514-labs/moosestack/pull/2908) | Docs: [S3 engine](/moose/olap/model-table#direct-s3-access-s3) • [ClickHouse S3 docs](https://clickhouse.com/docs/en/engines/table-engines/integrations/s3)
## Distributed tables for cluster fan-out
Beta (self-hosted only): Not supported on Boreal or ClickHouse Cloud.
You can now model ClickHouse Distributed engines in MooseOLAP TypeScript and Python projects. Plans capture the cluster, target database/table, and optional sharding key, while validation checks that the referenced local tables exist on every node before executing migrations.
```ts filename="distributedEvents.ts" copy
interface UserEvent {
userId: Key;
eventType: string;
occurredAt: Date;
}
);
```
PR: [#2908](https://github.com/514-labs/moosestack/pull/2908) | Docs: [Distributed engine](/moose/olap/model-table#distributed-tables-distributed) • [ClickHouse Distributed docs](https://clickhouse.com/docs/en/engines/table-engines/special/distributed)
## Redis state storage for serverless migrations
`moose generate migration` and `moose migrate` accept a Redis URL (flag or `MOOSE_REDIS_CONFIG__URL`) whenever `state_config.storage = "redis"`. The CLI resolves ClickHouse + Redis endpoints, acquires migration locks in Redis, and reuses the same builder across serverless tooling.
```bash filename="Terminal" copy
export MOOSE_CLICKHOUSE_CONFIG__URL="https://user:pass@ch.serverless.dev/main"
export MOOSE_REDIS_CONFIG__URL="redis://redis.example.com:6379"
moose migrate --clickhouse-url "$MOOSE_CLICKHOUSE_CONFIG__URL" --redis-url "$MOOSE_REDIS_CONFIG__URL"
```
PR: [#2907](https://github.com/514-labs/moosestack/pull/2907) | Docs: [Redis state storage](/moose/migrate#state-storage-options)
## Multi-database tables
Tables now carry a `database` field through the CLI, codegen, and infrastructure map. Moose will create any `additional_databases`, validate plans that attempt to move an existing table, and surface fully qualified names in `moose ls`.
```ts filename="auditLogs.ts" copy
interface AuditLog {
id: Key;
recordedAt: Date;
message: string;
}
);
```
PR: [#2876](https://github.com/514-labs/moosestack/pull/2876) | Docs: [Multi-database setup](/moose/olap/model-table#multi-database-setup)
## Moose
- **Buffer tables** – Burst-friendly Buffer engines ship with typed config, CLI validation, and template coverage. Docs: [Buffer engine](/moose/olap/model-table#in-memory-buffer-buffer) • [ClickHouse Buffer docs](https://clickhouse.com/docs/en/engines/table-engines/special/buffer) | PR [#2908](https://github.com/514-labs/moosestack/pull/2908)
- **S3 tables** – Direct object storage workflows stay in code with S3 engine support and credential handling. Docs: [S3 engine](/moose/olap/model-table#direct-s3-access-s3) • [ClickHouse S3 docs](https://clickhouse.com/docs/en/engines/table-engines/integrations/s3) | PR [#2908](https://github.com/514-labs/moosestack/pull/2908)
- **Distributed tables** – Cluster fan-out models emit the correct ClickHouse DDL and guard against missing local tables. Docs: [Distributed engine](/moose/olap/model-table#distributed-tables-distributed) • [ClickHouse Distributed docs](https://clickhouse.com/docs/en/engines/table-engines/special/distributed) | PR [#2908](https://github.com/514-labs/moosestack/pull/2908)
- **Serverless migrations stay coordinated** – Redis-backed locks and state storage plug into the existing `moose migrate` flow with env-var overrides for CI/CD. Docs: [Redis state storage](/moose/migrate#state-storage-options) | PR [#2907](https://github.com/514-labs/moosestack/pull/2907)
- **Per-table databases** – The migration planner now respects `database` overrides, auto-creates configured databases, and blocks accidental moves between them. Docs: [Multi-database setup](/moose/olap/model-table#multi-database-setup) | PR [#2876](https://github.com/514-labs/moosestack/pull/2876)
- **Runtime S3Queue credentials** – Environment variable markers resolve at deploy time for S3Queue sources, keeping AWS keys out of source. Docs: [Streaming from S3](/moose/olap/model-table#streaming-from-s3-s3queue) | PR [#2875](https://github.com/514-labs/moosestack/pull/2875)
## Boreal
- Blog redesign.
- Fixed the redirect loop after deleting an organization so users land back on the create-org screen instead of bouncing between routes.
## Nix development environment
`flake.nix` now bootstraps a full MooseStack build environment (`nix develop` drops you into a development shell that will have everything you need to build all the components of moosestack). If you use Nix, let us know!
Give it a go if you have nix installed on your machine with:
```bash
nix run github:514-labs/moosestack # -- to pass additional arguments to the cli
```
PR: [#2920](https://github.com/514-labs/moosestack/pull/2920)
---
## Release Notes
Source: release-notes/index.mdx
Moose / Sloan / Boreal Release Notes
# Release Notes
Welcome to the Moose, Sloan, and Boreal release notes. Here you'll find information about new features, improvements, and bug fixes for all our products.
## Latest Updates
* [November 1, 2025](/release-notes/2025-11-01)
* [October 24, 2025](/release-notes/2025-10-24)
## Installation
To get the latest versions of Moose and Sloan:
```bash
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) moose,sloan
```
## Products
Our release notes cover updates across three main products:
**Moose** - Build analytical backends with code-first infrastructure, including OLAP tables, streaming pipelines, and APIs.
**Sloan** - AI-powered MCP tools for automated data engineering and project setup.
**Boreal** - Our hosting platform for deploying and managing Moose applications.
Select a date from the sidebar to view detailed release notes for that period.
---
## release-notes/upcoming
Source: release-notes/upcoming.mdx
### Highlights
* **Moose and Sloan users** can now embed metadata into their Moose primitives, for use by users, their agents, and their metadata tools.
* **Sloan users** can read and write from **Databricks** (more coming here soon).
### Sloan MCP
#### [Experimental] Wrangler Agent—Databricks tools.
We've had a bunch of our users (especially in the enterprise) request deeper integration with Databricks. We've created MCP tooling to allow you to read from Databricks, and create new derivative Databricks managed tables.
Turn it on with `sloan config tools`, and adding `experimental-databricks-tools`. [Docs](https://docs.fiveonefour.com/sloan/tool-reference). To connect with your Databricks instance, you'll need to modify the relevant `MCP.json` file to add:
```JSON
"DATABRICKS_HOST": "[-].databricks.com",
"DATABRICKS_PATH": "/sql/1.0/warehouses/[-]",
"DATABRICKS_TOKEN": "[-]",
```
**This allows you to:**
* Read from Databricks tables
* Create queries and run them against Databricks tables
* Create new Databricks managed tables
**Future of the feature:**
* **Workflows V2**: We're working on bringing schemas into our workflow creation tools, our Databricks integration will be able to leverage these in interacting with Databricks.
* **DatabricksTable**: We're working on a new primitive that will allow you to create a Databricks table from a Moose primitive.
# Moose + Sloan
#### Descriptions in your Moose primitives
**Store context about why you are building what you are building, for you and your agents.**
Moose primitives can now include descriptions.
```ts
const acPipeline = new IngestPipeline(
"AircraftTrackingProcessed",
{
table: true,
stream: true,
ingest: false,
},
{ description: "Pipeline for ingesting raw aircraft data" } // new description field!
);
```
**Where is this used?** Sloan tools that create Moose primitives will now write the intention of the Moose primitive into the description field to give tools/agents that work with that primitive in the future more context. Practically, every description is now served to the tools as context too when the infra map is loaded up as context.
**Future of the feature:** Two main extensions of this feature are planned:
* Embedding the descriptions into the underlying infrastructure (e.g. as table level comments in ClickHouse)
* Extending the metadata:
* To be on a field level as well as a primitive level
* To include any arbitrary key value pair, not just a description (use this for managing labels like PII!)
---
## Data Collection Policy
Source: sloan/data-collection-policy.mdx
Data Collection Policy for Sloan
# Sloan Data Collection Policy
Sloan is in research preview. Accordingly, we collect usage data to improve the product. By default, we collect the most granular data on "ready to use" templates (`comprehensive` data collection), and we collect less information from the other templates (`standard` data collection).
You can change the data you share with us by changing the following:
```ts filename="sloan.config.toml"
enable_telemetry = "standard"
```
The available options are:
- `standard` - collects tool usage and success or error status.
- `comprehensive` - collects tool usage, success or error status, and parameters used by the tools.
- `off` - disables telemetry.
Example standard data collection:
```javascript filename="standard data collection" copy
{
distinctId: distinctIdForEvent,
event: 'sloan_mcp_tool_usage',
properties: {
tool_name: toolName,
success: !error,
telemetry_level: 'standard',
source: 'sloan_mcp_execution',
timestamp: new Date().toISOString()
}
}
```
Example comprehensive data collection:
```javascript filename="comprehensive data collection" copy
{distinctId: distinctIdForEvent,
event: 'sloan_mcp_tool_usage',
properties: {
tool_name: toolName,
success: !error,
telemetry_level: 'comprehensive',
source: 'sloan_mcp_execution',
timestamp: new Date().toISOString(),
parameters: args, // Full tool parameters
error_message: error?.message, // Detailed error information
error_stack: error?.stack, // Error stack trace
machine_id: machineId, // Machine identifier
os: process.platform, // Operating system
arch: process.arch, // Architecture
node_version: process.version // Node.js version
}
}
```
Our privacy policy is available [here](https://www.fiveonefour.com/legal/privacy.pdf).
---
## Sloan Demos
Source: sloan/demos.mdx
Templates and instructions for achieving common tasks with Sloan
# Sloan Demos
This page will provide a series of templates and instructions for achieving common tasks with Sloan.
All demo flows will assume that you have already installed the Sloan CLI and Moose CLI.
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan,moose
```
---
## Context Demo -- Aircraft Metrics Definition
Source: sloan/demos/context.mdx
Learn how to use Sloan's context management to build knowledge-driven data applications
# Aircraft Metrics Definition
## Airspeed Metrics
**Ground Speed vs True Airspeed**: Ground speed (`gs`) represents the aircraft's speed relative to the ground, while true airspeed (`tas`) accounts for air density and temperature conditions. True airspeed calculation requires outside air temperature (`oat`) and pressure altitude data not currently available in our model.
**Indicated Airspeed (IAS)**: The airspeed reading from the aircraft's pitot-static system (`ias`), which differs from true airspeed based on altitude and atmospheric conditions. This metric requires direct airspeed sensor data not present in our current ADS-B feed.
## Climb/Descent Performance Metrics
**Vertical Speed**: Calculated using `baro_rate` (barometric rate) and `geom_rate` (geometric rate) to determine climb or descent performance. Positive values indicate climb, negative values indicate descent.
**Climb Efficiency**: Ratio of altitude gained to ground distance covered, calculated using altitude change (`alt_baro` or `alt_geom`) and position changes (`lat`, `lon`).
## Flight Phase Detection Metrics
**Takeoff Phase**: Identified by rapid altitude gain (`alt_baro` increasing) combined with increasing ground speed (`gs`) and high climb rate (`baro_rate` > 500 ft/min).
**Cruise Phase**: Characterized by stable altitude (minimal `baro_rate`), consistent ground speed (`gs`), and straight track (`track` changes < 5°).
**Approach Phase**: Detected by decreasing altitude (`baro_rate` < -300 ft/min), decreasing ground speed, and altitude below typical cruise levels.
**Landing Phase**: Final approach with very low altitude (`alt_baro` < 1000 ft), decreasing speed, and stable track toward runway.
## Signal Quality Metrics
**Signal Strength**: Direct measurement using `rssi` (Received Signal Strength Indicator) to assess reception quality.
**Data Freshness**: Calculated using `seen` (seconds since last message) and `seen_pos` (seconds since last position update) to determine data reliability.
**Message Frequency**: Messages per minute calculated from `messages` count and time window to assess tracking consistency.
## Position Accuracy Metrics
**Navigation Accuracy**: Composite score using `nic` (Navigation Integrity Category), `nac_p` (Navigation Accuracy Category - Position), and `nac_v` (Navigation Accuracy Category - Velocity) to determine positional reliability.
**Surveillance Accuracy**: Assessment using `sil` (Surveillance Integrity Level) and `sda` (System Design Assurance) to evaluate overall tracking quality.
## Flight Efficiency Metrics
**Great Circle Deviation**: Comparison of actual flight path (derived from sequential `lat`, `lon` coordinates) against the shortest great circle distance between origin and destination.
**Altitude Optimization**: Analysis of altitude profile against optimal flight levels for given aircraft type and distance.
**Speed Consistency**: Variance in ground speed (`gs`) throughout different flight phases to assess flight smoothness.
**Fuel Efficiency**: Calculated using fuel flow rate (`fuel_flow`) and ground speed to determine nautical miles per gallon. Requires engine performance data not available in our current dataset.
## Environmental & Weather Metrics
**Wind Speed & Direction**: Calculated by comparing true airspeed (`tas`) with ground speed (`gs`) and track changes. Requires true airspeed data and wind vector information (`wind_speed`, `wind_direction`) not present in our model.
**Turbulence Detection**: Identified through rapid changes in altitude (`alt_baro`) and track (`track`) combined with accelerometer data (`vertical_g_force`, `lateral_g_force`) not available in ADS-B transmissions.
**Weather Avoidance**: Analysis of flight path deviations around weather systems using onboard weather radar data (`weather_radar_returns`) and precipitation intensity (`precip_intensity`) not included in our current data model.
## Traffic Density & Separation Metrics
**Aircraft Density**: Count of aircraft within defined geographical boundaries using `lat`, `lon` coordinates and configurable radius.
**Separation Metrics**: Minimum distances between aircraft calculated using position data and altitude differences.
**Airspace Utilization**: Percentage of available airspace occupied by tracked aircraft at different altitude bands.
## Operational Metrics
**Emergency Detection**: Identification of emergency situations using `emergency` codes, `squawk` codes (7500, 7600, 7700), and `alert` flags.
**Autopilot Usage**: Analysis of autopilot engagement using navigation modes (`nav_modes`) and flight path consistency.
**Communication Quality**: Assessment based on transponder performance, message consistency, and data completeness across all available fields.
---
## Egress Demo
Source: sloan/demos/egress.mdx
Learn how to create data egress APIs using Sloan with a step-by-step TypeScript example
# Data Egress
## Create analytics APIs to serve your data
This demo will walk you through using Sloan tools to prompt your way to creating analytics APIs that serve the aircraft telemetry data you've ingested.
[Skip to prompt](#prompt-the-llm-to-create-a-geolocation-api) if you started with the ads-b template.
- **Sloan and Moose CLIs**: [Install them here](/sloan/getting-started/cursor)
- **OS**: macOS or Linux (WSL supported for Windows)
- **Docker Desktop/Engine**: [24.0.0+](https://docs.docker.com/get-started/get-docker/)
- **Node**: [version 20+](https://nodejs.org/en/download) (LTS recommended)
- **Anthropic API Key**: [Get one here](https://docs.anthropic.com/en/docs/initial-setup)
- **Client**: [Cursor](https://www.cursor.com/)
- **Completed**: [Ingest Demo](/sloan/demos/ingest) (or have data already in your system)
### Start with the ads-b template
```bash filename="Terminal" copy
sloan init egress-demo ads-b
```
```bash filename="Terminal" copy
cd egress-demo
npm install
```
### Run the Moose Dev Server
```bash filename="Terminal" copy
moose dev
```
### Open the project in Cursor
### Initialize the Sloan MCP
Navigate to `Cursor > Settings > Cursor Settings > Tools and Integrations` then toggle on the `Sloan MCP` tool.
### For best results, set the LLM to `claude-4-sonnet`
Gemini 2.5 and o3 are also reasonably good, but claude-4-sonnet has the most consistent results.
### Prompt the LLM to create a geolocation API
> Can you create an API that returns every aircraft within X miles of Y,Z coordinates.
### Action
The LLM should now use sloan tools to:
1. create the analytics API endpoint that calculates distance and filters aircraft
2. test the API to ensure it's working correctly with the existing aircraft data
You'll know it is succeeding if:
1. the LLM successfully tests the API
2. you can curl the API generated
3. you can see the generated openapi spec in `path/to/your/project/.moose/openapi.yaml`
### Continue this demo with:
* [Creating Materialized Views](/sloan/demos/mvs)
---
## Ingest Demo
Source: sloan/demos/ingest.mdx
Learn how to ingest data using Sloan with a step-by-step TypeScript example
# Ingest
## Ingest data periodically from an API
This demo will walk you through using Sloan tools to prompt your way to ingesting a whole bunch of aircraft telemetry data from adsb.lol.
[Skip to prompt](#prompt-the-llm-to-create-an-ingest) if you started with the ads-b template.
- **Sloan and Moose CLIs**: [Install them here](/sloan/getting-started/cursor)
- **OS**: macOS or Linux (WSL supported for Windows)
- **Docker Desktop/Engine**: [24.0.0+](https://docs.docker.com/get-started/get-docker/)
- **Node**: [version 20+](https://nodejs.org/en/download) (LTS recommended)
- **Anthropic API Key**: [Get one here](https://docs.anthropic.com/en/docs/initial-setup)
- **Client**: [Cursor](https://www.cursor.com/)
### Create and initialize a new typescript project
```bash filename="Terminal" copy
sloan init ingest-demo typescript-empty
```
```bash filename="Terminal" copy
cd ingest-demo
npm install
```
### Open the project in Cursor
### Run the Moose Dev Servers
```bash filename="Terminal" copy
moose dev
```
### Initialize the Sloan MCP
Navigate to `Cursor > Settings > Cursor Settings > Tools and Integrations` then toggle on the `Sloan MCP` tool.
### For best results, set the LLM to `claude-4-sonnet`
Gemini 2.5 and o3 are also reasonably good, but claude-4-sonnet has the most consistent results.
### Prompt the LLM to create an ingest
> I want to ingest data from the following aircraft transponder data api every 5 seconds for the purpose of creating a set of visualizations.
>
> API: @https://api.adsb.lol/v2/mil
>
> Docs: @https://api.adsb.lol/docs#/v2/v2_mil_v2_mil_get (note, I think the schema here is inaccurate, check out the data before you trust the docs).
>
> Can you execute on this?
The LLM might do a planning step, in which case, you can ask it to execute on the plan.
> Go for it!
### Action
The LLM should now use sloan tools to:
1. get sample data from the API using a temporary script
2. use that sample to create a schema that's used to create an ingest pipeline (ingest API, Redpanda stream, ClickHouse table)
3. create a Moose managed temporal workflow to periodically ingest the data
You'll know it is succeeding if you see dozens of events per second hit your Moose dev console.
### Continue this demo with:
* [Creating Egress APIs](/sloan/demos/egress)
* [Creating Materialized Views](/sloan/demos/mvs)
---
## Materialized Views Demo
Source: sloan/demos/mvs.mdx
Learn how to create materialized views using Sloan with a step-by-step TypeScript example
# Materialized Views
## Create materialized views to pre-aggregate your data, based on your egress API
This demo will walk you through using Sloan tools to prompt your way to creating materialized views that pre-aggregate the aircraft telemetry data you've ingested.
[Skip to prompt](#prompt-the-llm-to-create-a-materialized-view) if you started with the ads-b template.
- **Sloan and Moose CLIs**: [Install them here](/sloan/getting-started/cursor)
- **OS**: macOS or Linux (WSL supported for Windows)
- **Docker Desktop/Engine**: [24.0.0+](https://docs.docker.com/get-started/get-docker/)
- **Node**: [version 20+](https://nodejs.org/en/download) (LTS recommended)
- **Anthropic API Key**: [Get one here](https://docs.anthropic.com/en/docs/initial-setup)
- **Client**: [Cursor](https://www.cursor.com/)
- **Completed**: [Ingest Demo](/sloan/demos/ingest) (or have data already in your system)
### Start with the ads-b template
```bash filename="Terminal" copy
sloan init mvs-demo ads-b
```
```bash filename="Terminal" copy
cd mvs-demo
npm install
```
### Run the Moose Dev Server
```bash filename="Terminal" copy
moose dev
```
### Open the project in Cursor
### Initialize the Sloan MCP
Navigate to `Cursor > Settings > Cursor Settings > Tools and Integrations` then toggle on the `Sloan MCP` tool.
### For best results, set the LLM to `claude-4-sonnet`
Gemini 2.5 and o3 are also reasonably good, but claude-4-sonnet has the most consistent results.
### Create an egress API
You can skip this step if you've already completed the [Egress Demo](/sloan/demos/egress).
> Can you create an API that returns every aircraft within X miles of Y,Z coordinates.
If you prefer to implement the egress API manually, you can create the following analytics API:
```typescript filename="app/index.ts"
...
export * from './apis/getAircraftWithinRadius';
```
```typescript filename="app/apis/getAircraftWithinRadius.ts"
/**
* Parameters for the getAircraftWithinRadius API
*/
interface AircraftRadiusParams {
/** Latitude of the center point */
center_lat: number;
/** Longitude of the center point */
center_lon: number;
/** Radius in miles from the center point */
radius_miles: number;
/** Maximum number of results to return (optional) */
limit?: number & tags.Type<"int64"> & tags.Minimum<1> & tags.Maximum<1000>;
}
/**
* API to retrieve aircraft within a specified radius from given coordinates
* Uses ClickHouse's geoDistance function to calculate great circle distance
*/
) => {
// Execute the query with proper parameter handling
const result = await client.query.execute(sql`
SELECT
hex,
flight,
aircraft_type,
lat,
lon,
alt_baro,
gs,
track,
timestamp,
round(geoDistance(lon, lat, ${params.center_lon}, ${params.center_lat}) * 0.000621371, 2) as distance_miles
FROM AircraftTrackingProcessed
WHERE geoDistance(lon, lat, ${params.center_lon}, ${params.center_lat}) * 0.000621371 <= ${params.radius_miles}
ORDER BY distance_miles ASC
LIMIT ${params.limit || 100}
`);
return result;
},
{
metadata: {
description: "Returns all aircraft within a specified distance (in miles) from given coordinates"
}
}
);
```
### Prompt the LLM to create a materialized view
> Given the egress API, can you create a materialized view that improves the performance of the query?
### Action
The LLM should now use sloan tools to:
1. analyze the Moose project and the egress API
2. create a Materialized View that pre-aggregates the data
You'll know it is succeeding if:
1. the LLM successfully creates the materialized view
2. the LLM sees that the materialized view was created in the ClickHouse database
### Optional further prompts
> Can you create a new egress API that leverages the materialized view to improve the performance of the query?
> Can you test both APIs to see what the performance difference is?
> I want to see the difference in speed, number of rows read, amount of data read, compute, and other vectors you think are pertinent.
---
## Getting Started
Source: sloan/getting-started.mdx
Getting started guide for Sloan
CTACard,
CTACards,
ZoomImg,
ChipButton,
Columns,
Column,
FeatureCard,
FeatureGrid,
BulletPointsCard,
QnABullets,
CheckmarkBullets,
Icons,
} from "@/components";
## Getting Started
These guides will walk you through setting up Sloan MCP with your client of choice, using the CLI and using the MCP.JSON configuration file.
---
## Getting Started
Source: sloan/getting-started/claude.mdx
Getting started guide for Sloan
CTACard,
CTACards,
ZoomImg,
ChipButton,
Columns,
Column,
FeatureCard,
FeatureGrid,
BulletPointsCard,
QnABullets,
CheckmarkBullets,
Icons,
} from "@/components";
## Claude Desktop
### Install Claude Desktop
[Install the Claude Desktop application here](https://claude.ai/download). Note, the Pro version appears to work much more stably with MCPs.
### Install Moose and Sloan CLI
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan,moose
```
### Configure Sloan MCP
Create a new project with Claude Desktop MCP preconfigured:
```bash filename="Terminal" copy
sloan init --mcp claude-desktop
```
For other options, see [Sloan CLI docs](/sloan/getting-started/sloan-cli).
```json filename="~/Library/Application Support/Claude/claude_desktop_config.json" copy
{
"mcpServers": {
"sloan": {
"args": [
"@514labs/sloan-mcp@latest",
"--moose-read-tools",
"path/to/your/moose/project"
],
"command": "npx",
"env": {
"ANTHROPIC_API_KEY": "",
"MOOSE_PATH": "path/to/your/moose/installation",
"NODE_PATH": "path/to/your/node/installation",
"PYTHON_PATH": "path/to/your/python/installation"
}
}
}
}
```
### Adding other toolsets
For more information on available toolsets, see [Sloan MCP toolsets](/sloan/reference/tool-reference). For Claude Desktop, we recommend the following toolsets:
* [Moose Read Tools](../sloan/reference/tool-reference#moose-read-tools): gives you chat access to your Moose project and the data within it (enabled by default)
* [Remote ClickHouse Tools](../sloan/reference/tool-reference#remote-clickhouse) (read only): gives you chat access to your remote ClickHouse data
### Using the MCP
1. Open the Claude Desktop application (note, you often have to reload the application after adding a new MCP)
2. If you are using Moose tools, you will need to run your moose dev server
### Warnings / Peculiarities
* You shouldn't use "write"/generative tools with Claude Desktop.
* Every time you add an MCP or change its configuration, you will need to reload the application.
* If you want to change the Moose Project that the Sloan MCP is referring to, manually edit the MCP.JSON file or run `sloan config focus` and select a new project.
### Common issues / troubleshooting
* The MCP is running, but you aren't able to get your data? Look at the tool call response, it will tell you if your Moose dev server is running. If it is not, run `moose dev` in your Moose project directory.
* The MCP is not running. Check your configuration and then restart the application.
## Reference
---
## Getting Started
Source: sloan/getting-started/cursor.mdx
Getting started guide for Sloan
CTACard,
CTACards,
ZoomImg,
ChipButton,
Columns,
Column,
FeatureCard,
FeatureGrid,
BulletPointsCard,
QnABullets,
CheckmarkBullets,
Icons,
} from "@/components";
## Cursor
### Install
[Install Cursor here](https://www.cursor.com/).
### Install Moose and Sloan CLI
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan,moose
```
### Configure Sloan MCP
Create a new project with Cursor MCP preconfigured:
```bash filename="Terminal" copy
sloan init --mcp cursor-project
```
If you want to use this as a global Cursor MCP, use `cursor-global` instead of `cursor-project`.
For other options, see [Sloan CLI docs](/sloan/getting-started/sloan-cli).
```json filename="/path/to/your/project/.cursor/mcp.json" copy
{
"mcpServers": {
"sloan": {
"args": [
"@514labs/sloan-mcp@latest",
"--moose-read-tools",
"path/to/your/moose/project"
],
"command": "npx",
"env": {
"ANTHROPIC_API_KEY": "",
"MOOSE_PATH": "path/to/your/moose/installation",
"NODE_PATH": "path/to/your/node/installation",
"PYTHON_PATH": "path/to/your/python/installation"
}
}
}
}
```
### Adding other toolsets
For more information on available toolsets, see [Sloan MCP toolsets](/sloan/reference/tool-reference). All toolsets are available for Cursor.
### Using the MCP
1. Open Cursor
2. You will see a popup saying that an MCP is detected, and can be enabled. Our experience is that this is not always reliable, and the MCP is more stably launched if you go to `cursor > settings > cursor settings > tools and integrations` and enable the MCP there.
3. If you are using Moose tools, you will need to run your moose dev server with `moose dev`.
### Warnings / Peculiarities
* Every time you add an MCP or change its configuration, you will need to reload the MCP. You can do this by going to `cursor > settings > cursor settings > tools and integrations` and toggling the MCP off and on. If this doesn't work, you can also restart the Cursor application.
* If you have configured the MCP globally, and want to change the Moose Project that the Sloan MCP is referring to, manually edit the MCP.JSON file or run `sloan config focus` and select a new project.
### Common issues / troubleshooting
* The MCP is running, but you aren't able to get your data? Look at the tool call response, it will tell you if your Moose dev server is running. If it is not, run `moose dev` in your Moose project directory.
* The MCP is not running. Check your configuration and then restart the application.
## Reference
---
## Getting Started
Source: sloan/getting-started/other-clients.mdx
Getting started guide for Sloan
CTACard,
CTACards,
ZoomImg,
ChipButton,
Columns,
Column,
FeatureCard,
FeatureGrid,
BulletPointsCard,
QnABullets,
CheckmarkBullets,
Icons,
} from "@/components";
### Other clients
We are working on adding MCP support for other clients. If you are interested in using other clients, please [contact us](mailto:sloan@fiveonefour.com).
To try set up your client with Sloan MCP, use the MCP.JSON file within your client of choice.
### Configure Sloan MCP
**Configuration Object Naming**: Different MCP clients use different naming conventions for the server configuration object:
- **Cursor, Windsurf**: Uses `"mcpServers"`
- **Claude Desktop, VS Code**: Use `"servers"` instead
Make sure to check your specific client's documentation for the correct naming convention.
```json filename="MCP configuration" copy
{
"mcpServers": {
"sloan": {
"args": [
"@514labs/sloan-mcp@latest",
"--moose-read-tools",
"path/to/your/moose/project"
],
"command": "npx",
"env": {
"ANTHROPIC_API_KEY": "",
"MOOSE_PATH": "path/to/your/moose/installation",
"NODE_PATH": "path/to/your/node/installation",
"PYTHON_PATH": "path/to/your/python/installation"
}
}
}
}
```
```json filename="MCP configuration" copy
{
"servers": {
"sloan": {
"args": [
"@514labs/sloan-mcp@latest",
"--moose-read-tools",
"path/to/your/moose/project"
],
"command": "npx",
"env": {
"ANTHROPIC_API_KEY": "",
"MOOSE_PATH": "path/to/your/moose/installation",
"NODE_PATH": "path/to/your/node/installation",
"PYTHON_PATH": "path/to/your/python/installation"
}
}
}
}
```
### Adding other toolsets
For more information on available toolsets, see [Sloan MCP toolsets](/sloan/reference/tool-reference). If the client you are using is a chat client, we recommend the following toolsets:
* [Moose Read Tools](../sloan/reference/tool-reference#moose-read-tools): gives you chat access to your Moose project and the data within it (enabled by default)
* [Remote ClickHouse Tools](../sloan/reference/tool-reference#remote-clickhouse) (read only): gives you chat access to your remote ClickHouse data
If it is an IDE type client, all toolsets are available.
### Using the MCP
1. Open the Claude Desktop application (note, you often have to reload the application after adding a new MCP)
2. If you are using Moose tools, you will need to run your moose dev server
### Common issues / troubleshooting
* The MCP is running, but you aren't able to get your data? Look at the tool call response, it will tell you if your Moose dev server is running. If it is not, run `moose dev` in your Moose project directory.
* The MCP is not running. Check your configuration and then restart the application.
## Reference
---
## Getting Started
Source: sloan/getting-started/vs-code.mdx
Getting started guide for Sloan
CTACard,
CTACards,
ZoomImg,
ChipButton,
Columns,
Column,
FeatureCard,
FeatureGrid,
BulletPointsCard,
QnABullets,
CheckmarkBullets,
Icons,
} from "@/components";
## VS Code
- **VS Code**: [Install VS Code here](https://code.visualstudio.com/).
- **GitHub Copilot in VS Code**: [See VS Code docs](https://code.visualstudio.com/docs/copilot/setup)
### Install VS Code and enable MCPs
[Install VS Code here](https://code.visualstudio.com/).
[Enable MCPs](vscode://settings/chat.mcp.enabled) by toggling on the `chat.mcp.enabled` setting.
### Install Moose and Sloan CLI
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan,moose
```
### Configure Sloan MCP
Create a new project with Claude Desktop MCP preconfigured:
```bash filename="Terminal" copy
sloan init --mcp vscode-project
```
If you want to use this as a global VS Code MCP, use `vscode-global` instead of `vscode-project`.
For other options, see [Sloan CLI docs](/sloan/getting-started/sloan-cli).
```json filename="/path/to/your/project/.cursor/settings.json" copy
{
"mcp": {
"input": [],
"servers": {
"sloan": {
"args": [
"@514labs/sloan-mcp@latest",
"--moose-read-tools",
"path/to/your/moose/project"
],
"command": "npx",
"env": {
"ANTHROPIC_API_KEY": "",
"MOOSE_PATH": "path/to/your/moose/installation",
"NODE_PATH": "path/to/your/node/installation",
"PYTHON_PATH": "path/to/your/python/installation"
}
}
}
}
}
```
### Adding other toolsets
For more information on available toolsets, see [Sloan MCP toolsets](/sloan/reference/tool-reference). All toolsets are available for Cursor.
### Using the MCP
[ ] TODO: Add instructions for running the MCP server in VS Code
### Warnings / Peculiarities
**Recommended Configuration Method**: While VS Code has a feature that allows you to use MCPs from other clients (like Claude Desktop), we strongly recommend using either the **Sloan CLI** or the **settings.json file** method shown above instead. These methods provide better reliability and configuration control specifically for VS Code environments.
### Common issues / troubleshooting
* The MCP is running, but you aren't able to get your data? Look at the tool call response, it will tell you if your Moose dev server is running. If it is not, run `moose dev` in your Moose project directory.
* The MCP is not running. Check your configuration and then restart the application.
## Reference
---
## Getting Started
Source: sloan/getting-started/windsurf.mdx
Getting started guide for Sloan
CTACard,
CTACards,
ZoomImg,
ChipButton,
Columns,
Column,
FeatureCard,
FeatureGrid,
BulletPointsCard,
QnABullets,
CheckmarkBullets,
Icons,
} from "@/components";
## Windsurf
### Install Windsurf
[Install Windsurf here](https://windsurf.com/).
### Install Moose and Sloan CLI
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan,moose
```
### Configure Sloan MCP
Create a new project with Windsurf MCP preconfigured:
```bash filename="Terminal" copy
sloan init --mcp windsurf-global
```
For other options, see [Sloan CLI docs](/sloan/getting-started/sloan-cli).
```json filename="~/.codeium/windsurf/mcp_config.json" copy
{
"mcpServers": {
"sloan": {
"args": [
"@514labs/sloan-mcp@latest",
"--moose-read-tools",
"path/to/your/moose/project"
],
"command": "npx",
"env": {
"ANTHROPIC_API_KEY": "",
"MOOSE_PATH": "path/to/your/moose/installation",
"NODE_PATH": "path/to/your/node/installation",
"PYTHON_PATH": "path/to/your/python/installation"
}
}
}
}
```
### Adding other toolsets
For more information on available toolsets, see [Sloan MCP toolsets](/sloan/reference/tool-reference). All toolsets are available for Windsurf.
### Using the MCP
1. Open Windsurf
2. Run the MCP by going to `windsurf > settings > windsurf settings > cascade > Model Context Protocol (MCP) Servers` and enable the MCP there.
3. If you are using Moose tools, you will need to run your moose dev server with `moose dev`.
### Warnings / Peculiarities
* Every time you add an MCP or change its configuration, you will need to reload the MCP. You can do this by going to `windsurf > settings > windsurf settings > cascade > Model Context Protocol (MCP) Servers` and toggling the MCP off and on or refreshing the server. If this doesn't work, you can also restart the Windsurf application.
* If you have configured the MCP globally, and want to change the Moose Project that the Sloan MCP is referring to, manually edit the MCP.JSON file or run `sloan config focus` and select a new project.
### Common issues / troubleshooting
* The MCP is running, but you aren't able to get your data? Look at the tool call response, it will tell you if your Moose dev server is running. If it is not, run `moose dev` in your Moose project directory.
* The MCP is not running. Check your configuration and then restart the application.
## Reference
---
## Quickstart
Source: sloan/guides.mdx
Quickstart guide for Sloan
CTACard,
CTACards,
ZoomImg,
ChipButton,
Columns,
Column,
FeatureCard,
FeatureGrid,
BulletPointsCard,
QnABullets,
CheckmarkBullets,
Icons,
} from "@/components";
# Sloan Quickstart Guides
Sloan exposes tools to your LLM client for reading, exploring, and building on ClickHouse data—locally or in production. Get started with one of our quickstart guides below:
---
## sloan/guides/clickhouse-chat
Source: sloan/guides/clickhouse-chat.mdx
## Quickstart: AI Chat with ClickHouse
*Use your LLM client to explore your ClickHouse with Sloan MCP tools.*
This will walk you through using Sloan CLI to connect Sloan MCP tools to your ClickHouse database, allowing you to chat with your data in your client of choice. We'll use the ClickHouse Playground as our example database, but you can use any ClickHouse database.
- **OS**: macOS or Linux (WSL supported for Windows)
- **Node**: [version 20+](https://nodejs.org/en/download) (LTS recommended)
- **Client**: [Cursor](https://www.cursor.com/) or [Claude Desktop](https://claude.ai/download) or [Windsurf](https://windsurf.ai/download). For this particular use-case, we recommend Claude Desktop.
### Install Sloan CLI
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan
```
### Configure your Sloan MCP
```bash filename="Terminal" copy
sloan connect clickhouse --connection-string "https://explorer:@play.clickhouse.com:443/?database=default" --mcp cursor-project
```
You need a ClickHouse connection URL. Format looks like this:
```
http://username:password@host:port/?database=database_name
```
Want to test without your own ClickHouse? Use the [ClickHouse Playground](https://clickhouse.com/docs/getting-started/playground) with the connection string above. It has sample datasets (read-only) you can experiment with.
```txt copy
https://explorer:@play.clickhouse.com:443/?database=default
```
1. Log into your [ClickHouse Cloud console](https://clickhouse.cloud/)
2. Go to your service details page
3. Find "Connect" or "Connection Details" section
4. Copy the HTTPS endpoint and your username/password
- Check your ClickHouse config file (usually `/etc/clickhouse-server/config.xml`)
- Look for `` (default: 8123) and `` (default: 8443)
- Check users config in `/etc/clickhouse-server/users.xml` or users.d/ directory
- Default user is often `default` with no password
- Check your docker-compose.yml or docker run command for environment variables
- Look for `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD`, `CLICKHOUSE_DB`
- Default is usually `http://default:@localhost:8123/?database=default`
- **Can't connect?** Try `curl http://your-host:8123/ping` to test connectivity
- **Authentication failed?** Verify username/password with `clickhouse-client --user=username --password=password`
- **Database not found?** Run `SHOW DATABASES` to see available databases
- **Permission denied?** Check user permissions with `SHOW GRANTS FOR username`
**Still stuck?** Check the [ClickHouse documentation](https://clickhouse.com/docs/en/getting-started/install) for your specific deployment method.
### Chat
Open your Claude Desktop client. We recommend starting the chat with a context setting question like "tell me about the data I have available to me in ClickHouse".
You can check that the MCP is correctly configured by looking at `claude > settings > developer > sloan`. It should say "running".
You can can also look at `search and tools` beneath the chat window, you should see `sloan` in the list of MCPs—if you click into it, you should see the tools that are enabled.
### What's next?
Try [creating a Moose Project from your ClickHouse database](https://docs.fiveonefour.com/sloan/quickstart/clickhouse-proj). That way, you can use Sloan MCP tools to create new primitives, like ingestion paths, data models, egress APIs, and more!
Or try [deploying your project to Boreal](https://www.fiveonefour.com/boreal), our hosting platform for Moose projects.
## Other Quickstart Guides
---
## sloan/guides/clickhouse-proj
Source: sloan/guides/clickhouse-proj.mdx
## Quickstart: AI analytics engineering from your ClickHouse
*Generate a local OLAP project from your ClickHouse deployment; Sloan MCP pre-configured for analytics engineering.*
This will walk you through creating a new local Moose project reflecting the structure of your ClickHouse database. It will allow you to add data to your local dev environment from your remote ClickHouse database, and use Sloan MCP tools to enrich your project with metadata, or create new Moose primitives that you can use in your project (e.g.egress APIs). We'll use the ClickHouse Playground as our example database, but you can use any ClickHouse database.
- **OS**: macOS or Linux (WSL supported for Windows)
- **Docker Desktop/Engine**: [24.0.0+](https://docs.docker.com/get-started/get-docker/)
- **Node**: [version 20+](https://nodejs.org/en/download) (LTS recommended)
- **Anthropic API Key**: [Get one here](https://docs.anthropic.com/en/docs/initial-setup)
- **Client**: [Cursor](https://www.cursor.com/) or [Claude Desktop](https://claude.ai/download) or [Windsurf](https://windsurf.ai/download). For this particular use-case, we recommend Claude Desktop.
### Install Moose and Sloan CLIs
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) moose,sloan
```
We'll be using generative MCP tools here, so make sure you add your Anthropic API key in install. If you installed without adding it, you can add it later with `sloan config keys anthropic `. If you need to create one, see: https://docs.anthropic.com/en/docs/initial-setup.
### Create a new Moose project from your ClickHouse database
```bash filename="Terminal" copy
sloan init my_project_name --from-remote 'https://explorer:@play.clickhouse.com:443/?database=default' --language python --mcp cursor-project
```
Want to test without your own ClickHouse? Use the [ClickHouse Playground](https://clickhouse.com/docs/getting-started/playground) with the connection string above. It has sample datasets (read-only) you can experiment with.
```txt copy
https://explorer:@play.clickhouse.com:443/?database=default
```
1. Log into your [ClickHouse Cloud console](https://clickhouse.cloud/)
2. Go to your service details page
3. Find "Connect" or "Connection Details" section
4. Copy the HTTPS endpoint and your username/password
- Check your ClickHouse config file (usually `/etc/clickhouse-server/config.xml`)
- Look for `` (default: 8123) and `` (default: 8443)
- Check users config in `/etc/clickhouse-server/users.xml` or users.d/ directory
- Default user is often `default` with no password
- Check your docker-compose.yml or docker run command for environment variables
- Look for `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD`, `CLICKHOUSE_DB`
- Default is usually `http://default:@localhost:8123/?database=default`
- **Can't connect?** Try `curl http://your-host:8123/ping` to test connectivity
- **Authentication failed?** Verify username/password with `clickhouse-client --user=username --password=password`
- **Database not found?** Run `SHOW DATABASES` to see available databases
- **Permission denied?** Check user permissions with `SHOW GRANTS FOR username`
**Still stuck?** Check the [ClickHouse documentation](https://clickhouse.com/docs/en/getting-started/install) for your specific deployment method.
This will create a new Moose project from your ClickHouse database. [See Moose docs](https://docs.fiveonefour.com/moose) for more information about the project structure, and how it spins up your local development environment (including a local ClickHouse database).
The new project is called "my_project_name" and is created in the current directory. the string after `--from-remote` is the connection string to your ClickHouse database, structured as `clickhouse://:@:/` (note, the ClickHouse Playground has no password).
### Install dependencies and run the dev server
Before you can run Moose's local dev server, Docker Desktop must be running.
Navigate into the project directory:
```bash filename="Terminal" copy
cd my_project_name
```
Install the dependencies:
```bash filename="Terminal" copy
npm i
```
Run the dev server:
```bash filename="Terminal" copy
moose dev
```
### Get sample data
```bash filename="Terminal" copy
moose seed clickhouse --connection-string clickhouse://explorer:@play.clickhouse.com:9440/default --limit 100
```
This will seed your local ClickHouse database with 100 rows of sample data from your remote ClickHouse database—here, the ClickHouse Playground. You can change the number of rows with the `--limit` flag.
This will improve the context provided to Sloan's MCP tools, and make it easier to validate analytic engineering tasks.
### Set up your Client
The `sloan init` command above configured Cursor to use Sloan MCP tools. You can check this by opening Cursor and looking at `cursor > settings > cursor settings > MCP` menu. You should see `sloan` in the list of MCPs, alongside a list of tools.
You may need to enable the MCP. Once you do so, you should see a green 🟢 status indicator next to it.
If you would like to use a different client, you can use the following command from within the project directory:
```bash filename="Terminal" copy
sloan setup --mcp
```
### Enrich project with metadata [coming soon]
Since we have a Moose project with sample data and some metadata, we can use this to create more metadata!
If we ask our client "Can you add a description to each Moose primitive in this project?", the LLM will use the `write_metadata` tool to add a description to each Moose primitive.
```TypeScript filename="my_project_name/index.ts"
const acPipeline = new IngestPipeline(
"AircraftTrackingProcessed",
{
table: true,
stream: true,
ingestApi: false,
metadata: {
description: "Pipeline for ingesting raw aircraft data" } // new description field!
}
);
```
### Chat with your data
You can also now just chat with your client about your data! Try asking "Look at my MTA data in ClickHouse, tell me about the trains that ran in the last 24 hours."
The client will use `read_moose_project`, `read_clickhouse_tables` and maybe `read_production_clickhouse` to answer your question.
### Create new Egress APIs with Sloan MCP tools
If you find a thread that you find interesting enough to want to productionize, try asking the client "can you create an egress API to furnish that data?"
The client will use `create_egress_api` and `test_egress_api` to create an egress API primitives in Moose, that will automatically deploy in your local dev environment when you save.
### What's next?
Try adding new [ingestion scripts, data models, or materialized views to your project using Sloan's experimental tools](https://docs.fiveonefour.com/sloan/reference/tool-reference#experimental-moose-tools)!
## Other Quickstart Guides
---
## sloan/guides/from-template
Source: sloan/guides/from-template.mdx
## Quickstart: AI powered OLAP templates
*Bootstrap a complete OLAP pipeline with a Moose template; with Sloan's AI tools already set up for you.*
This will get you started with a Moose data engineering project ingesting Aircraft Transponder data that you can use to learn about Sloan's Analytics Engineering MCP toolset.
- **OS**: macOS or Linux (WSL supported for Windows)
- **Docker Desktop/Engine**: [24.0.0+](https://docs.docker.com/get-started/get-docker/)
- **Node**: [version 20+](https://nodejs.org/en/download) (LTS recommended)
- **Anthropic API Key**: [Get one here](https://docs.anthropic.com/en/docs/initial-setup)
- **Client**: [Cursor](https://www.cursor.com/) or [Claude Desktop](https://claude.ai/download) or [Windsurf](https://windsurf.ai/download). For this particular use-case, we recommend Claude Desktop.
### Install Sloan and Moose CLIs
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan,moose
```
We'll be using generative MCP tools here, so make sure you add your Anthropic API key in install. If you installed without adding it, you can add it later with `sloan config keys anthropic `. If you need to create one, see: https://docs.anthropic.com/en/docs/initial-setup.
### Create a new Moose project from the ADS-B template
```bash filename="Terminal" copy
sloan init ads-b
```
This will create a new Moose project using the ADS-B template to gather ADS-B (aircraft transponder) data that you can use to explore Sloan's MCP offerings. By default, it will create the project configured for use with Cursor (by creating `~/.cursor/mcp.config`), but if you would like to use Claude Desktop, append `--mcp claude-desktop`.
If you want to create an empty project, and build your own Data Models and ingestion, try `sloan init typescript-empty` or `sloan init python-empty`
### Install dependencies and run the dev server
Navigate into the created project directory:
```bash filename="Terminal" copy
cd
```
Install the dependencies:
```bash filename="Terminal" copy
npm i
```
Run the dev server:
```bash filename="Terminal" copy
moose dev
```
### Set up your client: open Cursor and Enable the MCPs
Then open your code editor (e.g. by `cursor .`).
Cursor should prompt you to enable the MCP. If it doesn't, go to `cursor > settings > cursor settings > MCP` and enable the MCP called "sloan". Note, the tools will not all work until the dev server is run locally! Note, you might need to refresh the MCP until its status indicator shows 🟢.
### Start Ingesting Data
Run the command to start ingesting data with the configured ingest scripts: `moose workflow run military_aircraft_tracking`
You should start to see hundreds of live datapoints ingesting instantly!
### Enrich project with metadata [coming soon]
Since we have a Moose project with sample data and some metadata, we can use this to create more metadata!
If we ask our client "Can you add a description to each Moose primitive in this project?", the LLM will use the `write_metadata` tool to add a description to each Moose primitive.
```TypeScript filename="my_project_name/index.ts"
const acPipeline = new IngestPipeline(
"AircraftTrackingProcessed",
{
table: true,
stream: true,
ingestApi: false,
metadata: {
description: "Pipeline for ingesting raw aircraft data" } // new description field!
}
);
```
### Chat with your data
You can also now just chat with your client about your data! Try asking "What aircraft are listed in the data I have available."
The client will use `read_moose_project`, `read_clickhouse_tables` and maybe `read_production_clickhouse` to answer your question.
### Create new Egress APIs with Sloan MCP tools
If you find a thread that you find interesting enough to want to productionize, try asking the client "can you create an egress API to furnish that data?"
The client will use `create_egress_api` and `test_egress_api` to create an egress API primitives in Moose, that will automatically deploy in your local dev environment when you save.
## Other Quickstart Guides
---
## Sloan - Automated Data Engineering
Source: sloan/index.mdx
AI-powered tools and agents exposed through an easy to configure MCP server for data engineering and infrastructure
# Welcome to Sloan
Install [Sloan](/sloan/reference/cli-reference) and [Moose](/moose/reference/cli-reference) CLIs:
```
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan,moose
```
Quick Start →
Start from ClickHouse
See Examples
Tools Reference
## What is Sloan?
Sloan is a set of tools to make your chat, copilot or BI fluent in data engineering. Use the CLI to set up MCPs with the tools you need in the clients you use. Create new data engineering projects with a moose-managed ClickHouse based infrastructure or use these agents with your existing data infrastructure.
## Quickstart Guides
## Core features
Sloan offers a comprehensive suite of MCP tools and agents designed to streamline data engineering workflows, enabling faster and more efficient deployment and management.
3 steps in CLI to chat with ClickHouse
5 minutes to build a full stack OLAP project
Template based new projects for building your own infrastructure
}
variant="sloan"
/>
Data engineering in your IDE
Data analytics and analytics engineering in your chat client
BI in your BI tool of choice
}
variant="sloan"
/>
Opinionated OLAP deployments with Moose: Optimized ClickHouse development and deployment
Direct integration with your architecture: DuckDB, Snowflake, Databricks
Integration with your enterprise: metadata, CICD, logging and more
}
variant="sloan"
/>
Full-stack context: code, logs, data, docs
Self-improving feedback loops
Embedded metadata for continuity
}
variant="sloan"
/>
Each agent: context gathering → implementation → testing → doc workflows
Governance defaults, easily configurable: SDLC, data quality, reporting, privacy and security default practices
Learns your policies with minimal context, enforces them automatically
}
variant="sloan"
/>
## Why Sloan exists
### The DIY Approach
### The Sloan Approach
## What jobs can you do with Sloan?
Ad hoc analytics
Give your LLM client a way to chat with your data, in ClickHouse, Databricks, Snowflake, or wherever it lives
Analytics Engineering
Agents that can build new data products, materialized views and egress methods
Data Engineering
Have agents build and test end to end data pipelines
Data Wrangling
Agents that interact with your data systems like DuckDB, Databricks, Snowflake and more to create scripts, clean data, and prepare data for use
Data Migration
Automated creation of data pipelines to migrate data from legacy systems to a modern data backend
Data quality, governance and reporting
Agents that can help you enforce data quality, governance and reporting during development and in run time
## Getting Involved / Giving Feedback / Community
---
## CLI Reference
Source: sloan/reference/cli-reference.mdx
CLI Reference for Sloan
# CLI Reference
The Sloan CLI is a tool we created to facilitate the easy setup of Sloan MCPs. It is not required, and we hope that LLM clients will eventually support all these features natively, so that we can remove the CLI.
It allows you to:
- set up your Sloan MCP with a variety of clients
- manage multiple projects
- manage multiple toolsets
- update multiple MCP configurations at once
## Install CLI
```bash filename="Terminal" copy
bash -i <(curl -fsSL https://fiveonefour.com/install.sh) sloan
```
## Commands
### init
Creates a data engineering project with Moose, with Sloan MCP preconfigured.
```bash filename="Terminal" copy
sloan init <--mcp > <--location >
```
",
required: true,
description: "Name of your application (this will be the ).",
examples: ["e.g. my-app"]
},
{
name: "--mcp ",
description: "Choice of which MCP host to use.",
examples: ["default: cursor-project", "other options: cursor-global", "claude-desktop", "windsurf-global"]
},
{
name: "--location