Creating Materialized Views | MooseStack Documentation

BasicUsage.py

from moose_lib import MaterializedView, MaterializedViewOptions, ClickHouseEnginesfrom source_table import source_table # Define the schema of the transformed rows-- this is static and it must match the results of your SELECT. It also represents the schema of your entire destination table.class TargetSchema(BaseModel):    id: str    average_rating: float    num_reviews: int mv = MaterializedView[TargetSchema](MaterializedViewOptions(    # The transformation to run on the source table    select_statement="""        SELECT        {source_table.columns.id},        avg({source_table.columns.rating}) AS average_rating,         count(*) AS num_reviews         FROM {source_table}        GROUP BY {source_table.columns.id}    """,    # Reference to the source table(s) that the SELECT reads from    select_tables=[source_table],     # Creates a new OlapTable named "target_table" where the transformed rows are written to.    table_name="target_table",    order_by_fields=["id"],    # The name of the materialized view in ClickHouse    materialized_view_name="mv_to_target_table",))

ViewOptions.py

from moose_lib import MaterializedView, sqlfrom source_table import source_table class MaterializedViewOptions(BaseModel):    select_statement: str    table_name: str    materialized_view_name: str    select_tables: List[OlapTable | View]    engine: ClickHouseEngines = ClickHouseEngines.MergeTree    order_by_fields: List[str] = []

Denormalization.py

from pydantic import BaseModelfrom moose_lib import MaterializedView, MaterializedViewOptions class Dest(BaseModel):    id: str    value: int    created_at: str mv = MaterializedView[Dest](MaterializedViewOptions(    select_statement="""        SELECT {source_table.columns.id}, toInt32({source_table.columns.value}) AS value, {source_table.columns.created_at} AS created_at FROM {source_table} WHERE active = 1    """,    select_tables=[source_table],    table_name="proj_table",    order_by_fields=["id"],    materialized_view_name="mv_to_proj_table",))

FanIn.py

from pydantic import BaseModelfrom moose_lib import MaterializedView, MaterializedViewOptions, OlapConfig, ClickHouseEngines class DailyCounts(BaseModel):    day: str    user_id: str    events: int # Create the destination table explicitlydaily = OlapTable[DailyCounts]("daily_counts", OlapConfig(    engine=ClickHouseEngines.SummingMergeTree,    order_by_fields=["day", "user_id"],)) # MV 1 - write to the daily_counts tablemv1 = MaterializedView[DailyCounts](MaterializedViewOptions(    select_statement="SELECT toDate(ts) AS day, user_id, 1 AS events FROM {webEvents}",    select_tables=[webEvents],    materialized_view_name="mv_web_to_daily_counts",), target_table=daily) # MV 2 - write to the daily_counts tablemv2 = MaterializedView[DailyCounts](MaterializedViewOptions(    select_statement="SELECT toDate(ts) AS day, user_id, 1 AS events FROM {mobileEvents}",    select_tables=[mobileEvents],    materialized_view_name="mv_mobile_to_daily_counts",), target_table=daily)

You can use f-strings to interpolate tables and columns identifiers to your queries. Since these are static, you don't need to worry about SQL injection.

Transformation.py

from pydantic import BaseModelfrom moose_lib import MaterializedView, MaterializedViewOptions, OlapConfig class Dest(BaseModel):    id: str    name: str    day: str mv = MaterializedView[Dest](MaterializedViewOptions(    select_statement="""        SELECT            {events.columns.id}        AS id,            {events.columns.name}      AS name,            toDate({events.columns.ts}) AS day        FROM {events}        JOIN {users} ON {events.columns.user_id} = {users.columns.id}        WHERE {events.columns.active} = 1    """,    select_tables=[events, users],    order_by_fields=["id"],    table_name="user_activity_by_day",    materialized_view_name="mv_user_activity_by_day",))

AggTransform.py

from typing import Annotated, TypedDictfrom moose_lib import MaterializedView, AggregateFunction, MaterializedViewOptions class MetricsById(TypedDict):    id: Key[str]    # avg_rating stores result of avgState(events.rating)    # daily_uniques stores result of uniqExactState(events.user_id)    #   - uniqExact returns an integer; use number & ClickHouseInt<"uint64"> for precision    #   - Aggregated arg type is [string] because the column (events.user_id) is a string    #   - Aggregated function name is "uniqExact"    avg_rating: Annotated[float, AggregateFunction(agg_func="avg", param_types=[float])]        # daily_uniques stores result of uniqExactState(events.user_id)    #   - uniqExact returns an integer; Annotated[int, ...] to model this result type    #   - Aggregated function name is "uniqExact"    #   - The column we are aggregating (events.user_id) is a string, so the Aggregated arg type is [string].    daily_uniques: Annotated[int, AggregateFunction(agg_func="uniqExact", param_types=[str])] # The SELECT must output aggregate statesSTMT = """  SELECT    id,    avgState({events.columns.rating})        AS avg_rating,    uniqExactState({events.columns.user_id}) AS daily_uniques  FROM {events}  GROUP BY {events.columns.id}""" # Create the MV (engine config shown in TS example)mv = MaterializedView[MetricsById](MaterializedViewOptions(    select_statement=STMT,    table_name="metrics_by_id",    materialized_view_name="mv_metrics_by_id",    select_tables=[events],))

Pattern: Annotated[U, AggregateFunction(agg_func="avg", param_types=[float])]
U is the read-time type (e.g., float, int)
agg_func is the aggregation name (e.g., avg, uniqExact)
param_types are the argument types. These are the types of the columns that are being aggregated.

FunctionToTypeMapping.py

Annotated[int, Aggregated["avg", [int]]] # avgState(col: int)Annotated[int, Aggregated["uniqExact", [str]]] # uniqExactState(col: str)Annotated[int, Aggregated["count", []]] # countState(col: any) Annotated[str, Aggregated["argMax", [str, datetime]]] # argMaxState(col: str, value: datetime)Annotated[str, Aggregated["argMin", [str, datetime]]] # argMinState(col: str, value: datetime) Annotated[float, Aggregated["corr", [float, float]]] # corrState(col1: float, col2: float) Annotated[float, Aggregated["quantiles", [float]]] # quantilesState(levels: float, value: float)

AggTransform.py

from pydantic import BaseModelfrom typing import Annotatedfrom moose_lib import MaterializedView, ClickHouseEngines, AggregateFunction, MaterializedViewOptions class MetricsById(BaseModel):    id: str    avg_rating: Annotated[float, AggregateFunction(agg_func="avg", param_types=[float])]    total_reviews: Annotated[int, AggregateFunction(agg_func="sum", param_types=[int])] agg_stmt = '''  SELECT    {reviews.columns.id} AS id,    avgState({reviews.columns.rating}) AS avg_rating,    countState({reviews.columns.id})   AS total_reviews  FROM {reviews}  GROUP BY {reviews.columns.id}''' mv = MaterializedView[MetricsById](MaterializedViewOptions(    select_statement=agg_stmt,    select_tables=[reviews],    table_name="metrics_by_id",    engine=ClickHouseEngines.AggregatingMergeTree,    order_by_fields=["id"],    materialized_view_name="mv_metrics_by_id",))

QueryAgg.py

# Manual finalization using ...MergeQUERY = """SELECT  avgMerge(avg_rating)   AS avg_rating,  countMerge(total_reviews) AS total_reviewsFROM metrics_by_idWHERE id = '123'"""

Modeling Materialized Views

Overview

Materialized Views: ClickHouse vs. Other Databases

Requires two parts: a SELECT transformation and a destination table schema

Transformation is write-time: runs on INSERT into source table(s) and writes to the destination table

SQL‑native pipelines: MV 'triggers' live inside ClickHouse, so pipelines are defined and executed entirely in the database — no external orchestrator needed. Cascading MVs require correct DDL order.

On change, Moose Migrate:

Generates and applies destination table DDL when you update the schema in code

Applies DDL in dependency order across views and tables

Backfills or rewires when the SELECT changes

Hot‑reloads the view and destination table locally and keeps APIs in sync

Roadmap: inferring schema from SELECT

Dependency awareness

Basic Usage

Materialized View is like a trigger

Quick Reference

Modeling the Target Table

Basic Transformation, Cleaning, Filtering, Denormalization

Aggregations

Fan-in Patterns

Blue/green schema migrations

Blue/green table migrations

Defining the transformation

Recommended: ClickHouse SQL Reference

Advanced: Writing SELECT statements to Aggregated tables

Backfill Destination Tables

S3Queue sources are not backfilled

Query Destination Tables

Advanced: Querying Aggregated tables

Advanced: Aggregations + Materialized Views

Target Tables with `AggregatingMergeTree`

Common mistakes

Modeling columns with `AggregateFunction`

Writing SELECT statements to Aggregated tables

Warning:

Querying Aggregated Tables

Choosing the right engine

Overview: Which engine should I use?