Choosing the right data types and column ordering for your tables is crucial for ClickHouse performance and storage efficiency. Poor schema design can lead to 10-100x slower queries and 2-5x larger storage requirements.
Keep the following best practices in mind when defining your column types:
Nullable columns in ClickHouse have significant performance overhead.
Instead of using ? or | undefined (TypeScript) or | None or Optional[type] (Python), add the & ClickHouseDefault<"..."> (TypeScript) or Annotated[type, clickhouse_default("...")] (Python) to your column type.
import { OlapTable } from "@514labs/moose-lib"; // ❌ Bad: Using nullable columnsinterface UserEvent { id: string; userId: string; eventType: string; description?: string; // Nullable createdAt: Date;} // ✅ Good: Use default values insteadinterface UserEvent { id: string; userId: string; eventType: string; description: string & ClickHouseDefault<"''"> // DEFAULT '' createdAt: Date;} const userEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["id", "createdAt"]});LowCardinality is ClickHouse's most efficient string type for columns with limited unique values.
import { OlapTable, LowCardinality } from "@514labs/moose-lib"; interface UserEvent { id: string; userId: string; eventType: string & LowCardinality; // ✅ Good for limited values status: "active" | "inactive" | "pending"; // ✅ Literals become LowCardinality automatically country: string & LowCardinality; // ✅ Good for country codes userAgent: string; // ❌ Keep as String for high cardinality createdAt: Date;} const userEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["id", "createdAt"]});Choose the smallest integer type that fits your data range to save storage and improve performance.
import { OlapTable, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64 } from "@514labs/moose-lib"; interface UserEvent { id: string; userId: string; age: UInt8; // ✅ 0-255 (1 byte) score: Int16; // ✅ -32,768 to 32,767 (2 bytes) viewCount: UInt32; // ✅ 0 to ~4 billion (4 bytes) timestamp: UInt64; // ✅ Unix timestamp (8 bytes) eventType: string; createdAt: Date;} // Integer type ranges:// UInt8: 0 to 255// UInt16: 0 to 65,535// UInt32: 0 to 4,294,967,295// UInt64: 0 to 18,446,744,073,709,551,615// Int8: -128 to 127// Int16: -32,768 to 32,767// Int32: -2,147,483,648 to 2,147,483,647// Int64: -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807Choose appropriate DateTime precision based on your use case to balance storage and precision.
import { OlapTable, DateTime, DateTime64 } from "@514labs/moose-lib"; interface UserEvent { id: string; userId: string; eventType: string; createdAt: DateTime; // ✅ Second precision (default) updatedAt: DateTime64<3>; // ✅ Millisecond precision processedAt: DateTime64<6>; // ✅ Microsecond precision loggedAt: DateTime64<9>; // ✅ Nanosecond precision} const userEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["id", "createdAt"]});Use Decimal for financial and precise calculations to avoid floating-point precision issues.
import { OlapTable, Decimal } from "@514labs/moose-lib"; interface Order { id: string; userId: string; amount: Decimal<10, 2>; // ✅ 10 total digits, 2 decimal places tax: Decimal<8, 2>; // ✅ 8 total digits, 2 decimal places discount: Decimal<5, 2>; // ✅ 5 total digits, 2 decimal places total: Decimal<12, 2>; // ✅ 12 total digits, 2 decimal places createdAt: Date;} // ❌ Bad: Using float for financial datainterface BadOrder { id: string; amount: number; // Float - can cause precision issues tax: number; // Float - can cause precision issues} const ordersTable = new OlapTable<Order>("orders", { orderByFields: ["id", "createdAt"]});NamedTuple is more efficient than Nested for structured data in ClickHouse.
import { OlapTable, ClickHouseNamedTuple } from "@514labs/moose-lib"; interface UserEvent { id: string; userId: string; eventType: string; location: { latitude: number; longitude: number; city: string; country: string; } & ClickHouseNamedTuple; metadata: { version: string; source: string; priority: number; } & ClickHouseNamedTuple; createdAt: Date;} // ❌ Bad: Using Nested (less efficient)interface BadUserEvent { id: string; location: { latitude: number; longitude: number; };} const userEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["id", "createdAt"]});Optimize your orderByFields (or orderByExpression) for your most common query patterns.
import { OlapTable } from "@514labs/moose-lib"; interface UserEvent { id: string; userId: string; eventType: string; status: string; createdAt: Date; country: string;} // ✅ Good: Optimized for common query patternsconst userEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["userId", "createdAt", "eventType"] // Most common filters first}); // Common queries this optimizes for:// - WHERE userId = ? AND createdAt > ?// - WHERE userId = ? AND eventType = ?// - GROUP BY userId, eventTypePlace LowCardinality columns first in your orderByFields (TypeScript) or order_by_fields (Python) (or reflect this priority in your orderByExpression (TypeScript) or order_by_expression (Python)) for better compression and query performance.
import { OlapTable, LowCardinality } from "@514labs/moose-lib"; interface UserEvent { id: string; userId: string; eventType: LowCardinality<string>; // ✅ Low cardinality status: LowCardinality<string>; // ✅ Low cardinality country: LowCardinality<string>; // ✅ Low cardinality createdAt: Date; // High cardinality sessionId: string; // High cardinality} // ✅ Good: LowCardinality columns firstconst userEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["eventType", "status", "country", "createdAt", "sessionId"]}); // ❌ Bad: High cardinality columns firstconst badUserEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["createdAt", "sessionId", "eventType", "status"] // Less efficient});import { OlapTable, LowCardinality } from "@514labs/moose-lib"; interface UserEvent { id: string; userId: string; eventType: LowCardinality<string>; // ✅ Low cardinality status: LowCardinality<string>; // ✅ Low cardinality country: LowCardinality<string>; // ✅ Low cardinality createdAt: Date; // High cardinality sessionId: string; // High cardinality} // ✅ Good: LowCardinality columns firstconst userEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["eventType", "status", "country", "createdAt", "sessionId"]}); // ❌ Bad: High cardinality columns firstconst badUserEventsTable = new OlapTable<UserEvent>("user_events", { orderByFields: ["createdAt", "sessionId", "eventType", "status"] // Less efficient});