Schema Optimization

AvoidNullable.py

from moose_lib import OlapTablefrom pydantic import BaseModel, Field # ❌ Bad: Using nullable columnsclass UserEvent(BaseModel):    id: str    user_id: str    event_type: str    description: str | None = None  # Nullable    metadata: dict | None = None    # Nullable    created_at: Date # ✅ Good: Use default values insteadclass UserEvent(BaseModel):    id: str    user_id: str    event_type: str    description: str = ""  # Default empty string    metadata: dict = Field(default_factory=dict)  # Default empty dict    created_at: Date user_events_table = OlapTable[UserEvent]("user_events", {    "orderByFields": ["id", "created_at"]})

LowCardinality.py

from moose_lib import OlapTable, LowCardinalityfrom pydantic import BaseModelfrom typing import Literal class UserEvent(BaseModel):    id: str    user_id: str    event_type: Annotated[str, "LowCardinality"]  # ✅ Good for limited values    status: Literal["active", "inactive", "pending"]# ✅ Literals become LowCardinality automatically    country: Annotated[str, "LowCardinality"]      # ✅ Good for country codes    user_agent: str                   # ❌ Keep as String for high cardinality    created_at: Date user_events_table = OlapTable[UserEvent]("user_events", {    "orderByFields": ["id", "created_at"]})

IntegerTypes.py

from typing import Annotatedfrom moose_lib import OlapTable, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64from pydantic import BaseModel class UserEvent(BaseModel):    id: str    user_id: str    age: Annotated[int, "uint8"]                     # ✅ 0-255 (1 byte)    score: Annotated[int, "int16"]                   # ✅ -32,768 to 32,767 (2 bytes)    view_count: Annotated[int, "int32"]             # ✅ 0 to ~4 billion (4 bytes)    timestamp: Annotated[int, "int64"]              # ✅ Unix timestamp (8 bytes)    event_type: str    created_at: Date # Integer type ranges:# UInt8:  0 to 255# UInt16: 0 to 65,535# UInt32: 0 to 4,294,967,295# UInt64: 0 to 18,446,744,073,709,551,615# Int8:   -128 to 127# Int16:  -32,768 to 32,767# Int32:  -2,147,483,648 to 2,147,483,647# Int64:  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807

DateTimePrecision.py

from moose_lib import OlapTable, DateTime, DateTime64from pydantic import BaseModelfrom datetime import datetime class UserEvent(BaseModel):    id: str    user_id: str    event_type: str    created_at: datetime            # ✅ Second precision (default)    updated_at: clickhouse_datetime(3)      # ✅ Millisecond precision    processed_at: clickhouse_datetime(6)    # ✅ Microsecond precision    logged_at: clickhouse_datetime(9)       # ✅ Nanosecond precision user_events_table = OlapTable[UserEvent]("user_events", {    "orderByFields": ["id", "created_at"]})

DecimalOverFloat.py

from moose_lib import OlapTable, Decimalfrom pydantic import BaseModel class Order(BaseModel):    id: str    user_id: str    amount: clickhouse_decimal(10, 2)         # ✅ 10 total digits, 2 decimal places    tax: clickhouse_decimal(8, 2)             # ✅ 8 total digits, 2 decimal places    discount: clickhouse_decimal(5, 2)        # ✅ 5 total digits, 2 decimal places    total: clickhouse_decimal(12, 2)          # ✅ 12 total digits, 2 decimal places    created_at: Date # ❌ Bad: Using float for financial dataclass BadOrder(BaseModel):    id: str    amount: float  # Float - can cause precision issues    tax: float     # Float - can cause precision issues orders_table = OlapTable[Order]("orders", {    "orderByFields": ["id", "created_at"]})

NamedTupleOverNested.py

from moose_lib import OlapTable, NamedTuplefrom pydantic import BaseModel class Location(BaseModel):    latitude: float    longitude: float    city: str    country: str class Metadata(BaseModel):    version: str    source: str    priority: int class UserEvent(BaseModel):    id: str    user_id: str    event_type: str    location: Annotated[Location, "ClickHouseNamedTuple"]  # lat, lon, city, country    metadata: Annotated[Metadata, "ClickHouseNamedTuple"]           # version, source, priority    created_at: Date # ❌ Bad: Using nested objects (less efficient)class BadUserEvent(BaseModel):    id: str    location: Location  # Nested - less efficient    metadata: Metadata  # Nested - less efficient user_events_table = OlapTable[UserEvent]("user_events", {    "orderByFields": ["id", "created_at"]})

OrderByOptimization.py

from moose_lib import OlapTablefrom pydantic import BaseModel class UserEvent(BaseModel):    id: str    user_id: str    event_type: str    status: str    created_at: Date    country: str # ✅ Good: Optimized for common query patternsuser_events_table = OlapTable[UserEvent]("user_events", {    "orderByFields": ["user_id", "created_at", "event_type"]  # Most common filters first}) # Common queries this optimizes for:# - WHERE user_id = ? AND created_at > ?# - WHERE user_id = ? AND event_type = ?# - GROUP BY user_id, event_type

LowCardinalityOrdering.py

from moose_lib import OlapTable, LowCardinalityfrom pydantic import BaseModel class UserEvent(BaseModel):    id: str    user_id: str    event_type: LowCardinality[str]     # ✅ Low cardinality    status: LowCardinality[str]          # ✅ Low cardinality    country: LowCardinality[str]         # ✅ Low cardinality    created_at: Date                     # High cardinality    session_id: str                      # High cardinality # ✅ Good: LowCardinality columns firstuser_events_table = OlapTable[UserEvent]("user_events", {    "orderByFields": ["event_type", "status", "country", "created_at", "session_id"]}) # ❌ Bad: High cardinality columns firstbad_user_events_table = OlapTable[UserEvent]("user_events", {    "orderByFields": ["created_at", "session_id", "event_type", "status"]  # Less efficient})

LowCardinalityOrdering.ts

import { OlapTable, LowCardinality } from "@514labs/moose-lib"; interface UserEvent {  id: string;  userId: string;  eventType: LowCardinality<string>;    // ✅ Low cardinality  status: LowCardinality<string>;       // ✅ Low cardinality  country: LowCardinality<string>;      // ✅ Low cardinality  createdAt: Date;                      // High cardinality  sessionId: string;                    // High cardinality} // ✅ Good: LowCardinality columns firstconst userEventsTable = new OlapTable<UserEvent>("user_events", {  orderByFields: ["eventType", "status", "country", "createdAt", "sessionId"]}); // ❌ Bad: High cardinality columns firstconst badUserEventsTable = new OlapTable<UserEvent>("user_events", {  orderByFields: ["createdAt", "sessionId", "eventType", "status"]  // Less efficient});

Schema Optimization Matters

Data Types

Avoid Nullable Columns

Use ClickHouse defaults instead of nulls

Use `LowCardinality` where possible

Pick the right Integer types

Use the right precision for `DateTime`

Use Decimal over Float

Use `NamedTuple` over `Nested`

Ordering

Choose columns that you will use in WHERE and GROUP BY clauses

`ORDER BY` should prioritize LowCardinality columns first