diff --git a/apps/webapp/app/services/queryService.server.ts b/apps/webapp/app/services/queryService.server.ts index a8c6af00ed..4d3ea6f7af 100644 --- a/apps/webapp/app/services/queryService.server.ts +++ b/apps/webapp/app/services/queryService.server.ts @@ -49,7 +49,6 @@ function getDefaultClickhouseSettings(): ClickHouseSettings { ), // Safety settings - allow_experimental_object_type: 1, format_csv_allow_double_quotes: 0, readonly: "1", // Ensure queries are read-only }; diff --git a/internal-packages/tsql/src/query/printer.test.ts b/internal-packages/tsql/src/query/printer.test.ts index 5612b6e257..6b82c08b07 100644 --- a/internal-packages/tsql/src/query/printer.test.ts +++ b/internal-packages/tsql/src/query/printer.test.ts @@ -429,15 +429,15 @@ describe("ClickHousePrinter", () => { }); it("should handle IS NULL syntax", () => { - const { sql } = printQuery("SELECT * FROM task_runs WHERE error IS NULL"); + const { sql } = printQuery("SELECT * FROM task_runs WHERE started_at IS NULL"); - expect(sql).toContain("isNull(error)"); + expect(sql).toContain("isNull(started_at)"); }); it("should handle IS NOT NULL syntax", () => { - const { sql } = printQuery("SELECT * FROM task_runs WHERE error IS NOT NULL"); + const { sql } = printQuery("SELECT * FROM task_runs WHERE started_at IS NOT NULL"); - expect(sql).toContain("isNotNull(error)"); + expect(sql).toContain("isNotNull(started_at)"); }); }); @@ -2172,6 +2172,152 @@ describe("Column metadata", () => { }); }); +describe("Unknown column blocking", () => { + /** + * These tests verify that unknown columns are blocked at compile time, + * preventing access to internal ClickHouse columns that aren't exposed in the schema. + */ + + describe("should block unknown columns in SELECT", () => { + it("should throw error for unknown column in SELECT list", () => { + expect(() => { + printQuery("SELECT id, unknown_column FROM task_runs"); + }).toThrow(QueryError); + expect(() => { + printQuery("SELECT id, unknown_column FROM task_runs"); + }).toThrow(/unknown.*column.*unknown_column/i); + }); + + it("should throw error for internal ClickHouse column name not in schema", () => { + // The schema exposes 'created' but the internal ClickHouse column is 'created_at' + // Using the internal name directly should be blocked + const schema = createSchemaRegistry([runsSchema]); + const ctx = createPrinterContext({ + organizationId: "org_test", + projectId: "proj_test", + environmentId: "env_test", + schema, + }); + + // 'created_at' is not in runsSchema - only 'created' which maps to 'created_at' + expect(() => { + printQuery("SELECT id, created_at FROM runs", ctx); + }).toThrow(QueryError); + }); + + it("should suggest TSQL column name when user types ClickHouse column name", () => { + // The schema exposes 'created' but the internal ClickHouse column is 'created_at' + // When user types 'created_at', we should suggest 'created' + const schema = createSchemaRegistry([runsSchema]); + const ctx = createPrinterContext({ + organizationId: "org_test", + projectId: "proj_test", + environmentId: "env_test", + schema, + }); + + expect(() => { + printQuery("SELECT id, created_at FROM runs", ctx); + }).toThrow(/Did you mean "created"/); + }); + + it("should throw error for unknown qualified column (table.column)", () => { + expect(() => { + printQuery("SELECT task_runs.unknown_column FROM task_runs"); + }).toThrow(QueryError); + }); + }); + + describe("should block unknown columns in WHERE", () => { + it("should throw error for unknown column in WHERE clause", () => { + expect(() => { + printQuery("SELECT id FROM task_runs WHERE unknown_column = 'test'"); + }).toThrow(QueryError); + }); + + it("should throw error for unknown column in complex WHERE", () => { + expect(() => { + printQuery("SELECT id FROM task_runs WHERE status = 'completed' AND unknown_column > 10"); + }).toThrow(QueryError); + }); + }); + + describe("should block unknown columns in ORDER BY", () => { + it("should throw error for unknown column in ORDER BY", () => { + expect(() => { + printQuery("SELECT id, status FROM task_runs ORDER BY unknown_column DESC"); + }).toThrow(QueryError); + }); + }); + + describe("should block unknown columns in GROUP BY", () => { + it("should throw error for unknown column in GROUP BY", () => { + expect(() => { + printQuery("SELECT count(*) FROM task_runs GROUP BY unknown_column"); + }).toThrow(QueryError); + }); + }); + + describe("should allow SELECT aliases", () => { + it("should allow ORDER BY to reference aliased columns", () => { + // This should NOT throw - 'cnt' is a valid alias from SELECT + const { sql } = printQuery( + "SELECT status, count(*) AS cnt FROM task_runs GROUP BY status ORDER BY cnt DESC" + ); + expect(sql).toContain("ORDER BY cnt DESC"); + }); + + it("should allow HAVING to reference aliased columns", () => { + const { sql } = printQuery( + "SELECT status, count(*) AS cnt FROM task_runs GROUP BY status HAVING cnt > 10" + ); + expect(sql).toContain("HAVING"); + }); + + it("should allow ORDER BY to reference implicit aggregation names", () => { + // COUNT() without alias gets implicit name 'count' + const { sql } = printQuery( + "SELECT status, count() FROM task_runs GROUP BY status ORDER BY count DESC" + ); + expect(sql).toContain("ORDER BY count DESC"); + }); + }); + + // Note: CTE support is limited - CTEs are not added to the table context, + // so CTE column references are treated as unknown. This is a pre-existing limitation. + // The tests below verify that unknown column blocking doesn't break existing behavior. + + describe("should allow subquery alias references", () => { + it("should allow referencing columns from subquery in FROM clause", () => { + const { sql } = printQuery(` + SELECT sub.status_name, sub.total + FROM ( + SELECT status AS status_name, count(*) AS total + FROM task_runs + GROUP BY status + ) AS sub + ORDER BY sub.total DESC + `); + expect(sql).toContain("status_name"); + expect(sql).toContain("total"); + }); + + it("should allow unqualified references to subquery columns", () => { + const { sql } = printQuery(` + SELECT status_name, total + FROM ( + SELECT status AS status_name, count(*) AS total + FROM task_runs + GROUP BY status + ) + WHERE total > 10 + `); + expect(sql).toContain("status_name"); + expect(sql).toContain("total"); + }); + }); +}); + describe("Field Mapping Value Transformation", () => { // Test schema with a field mapping column const fieldMappingSchema: TableSchema = { @@ -2278,3 +2424,191 @@ describe("Field Mapping Value Transformation", () => { // But the column metadata should use the exposed name }); }); + +describe("Internal-only column blocking", () => { + /** + * These tests verify that internal columns (tenant columns, required filter columns) + * that are NOT exposed in tableSchema.columns are blocked from user queries in + * SELECT, ORDER BY, GROUP BY, and HAVING clauses, but allowed in system-generated + * tenant isolation guards. + */ + + // Schema with hidden tenant columns (not exposed in public columns) + const hiddenTenantSchema: TableSchema = { + name: "hidden_tenant_runs", + clickhouseName: "trigger_dev.task_runs_v2", + columns: { + // Public columns - these are exposed to users + id: { name: "id", ...column("String") }, + status: { name: "status", ...column("String") }, + task_identifier: { name: "task_identifier", ...column("String") }, + created_at: { name: "created_at", ...column("DateTime64") }, + }, + // Tenant columns are NOT in the public columns above + tenantColumns: { + organizationId: "organization_id", + projectId: "project_id", + environmentId: "environment_id", + }, + }; + + // Schema with hidden required filter column + const hiddenFilterSchema: TableSchema = { + name: "hidden_filter_runs", + clickhouseName: "trigger_dev.task_runs_v2", + columns: { + id: { name: "id", ...column("String") }, + status: { name: "status", ...column("String") }, + created_at: { name: "created_at", ...column("DateTime64") }, + }, + tenantColumns: { + organizationId: "organization_id", + projectId: "project_id", + environmentId: "environment_id", + }, + requiredFilters: [ + { column: "engine", value: "V2" }, // 'engine' is not in public columns + ], + }; + + function createHiddenTenantContext(): PrinterContext { + const schema = createSchemaRegistry([hiddenTenantSchema]); + return createPrinterContext({ + organizationId: "org_test", + projectId: "proj_test", + environmentId: "env_test", + schema, + }); + } + + function createHiddenFilterContext(): PrinterContext { + const schema = createSchemaRegistry([hiddenFilterSchema]); + return createPrinterContext({ + organizationId: "org_test", + projectId: "proj_test", + environmentId: "env_test", + schema, + }); + } + + describe("should block internal-only columns in SELECT", () => { + it("should throw error when selecting hidden tenant column (organization_id)", () => { + const ctx = createHiddenTenantContext(); + expect(() => { + printQuery("SELECT id, organization_id FROM hidden_tenant_runs", ctx); + }).toThrow(QueryError); + expect(() => { + printQuery("SELECT id, organization_id FROM hidden_tenant_runs", ctx); + }).toThrow(/not available for querying/i); + }); + + it("should throw error when selecting hidden tenant column (project_id)", () => { + const ctx = createHiddenTenantContext(); + expect(() => { + printQuery("SELECT project_id FROM hidden_tenant_runs", ctx); + }).toThrow(/not available for querying/i); + }); + + it("should throw error when selecting hidden tenant column (environment_id)", () => { + const ctx = createHiddenTenantContext(); + expect(() => { + printQuery("SELECT environment_id FROM hidden_tenant_runs", ctx); + }).toThrow(/not available for querying/i); + }); + + it("should throw error when selecting hidden required filter column", () => { + const ctx = createHiddenFilterContext(); + expect(() => { + printQuery("SELECT id, engine FROM hidden_filter_runs", ctx); + }).toThrow(/not available for querying/i); + }); + }); + + describe("should block internal-only columns in ORDER BY", () => { + it("should throw error when ordering by hidden tenant column", () => { + const ctx = createHiddenTenantContext(); + expect(() => { + printQuery("SELECT id, status FROM hidden_tenant_runs ORDER BY organization_id", ctx); + }).toThrow(/not available for querying/i); + }); + + it("should throw error when ordering by hidden required filter column", () => { + const ctx = createHiddenFilterContext(); + expect(() => { + printQuery("SELECT id, status FROM hidden_filter_runs ORDER BY engine", ctx); + }).toThrow(/not available for querying/i); + }); + }); + + describe("should block internal-only columns in GROUP BY", () => { + it("should throw error when grouping by hidden tenant column", () => { + const ctx = createHiddenTenantContext(); + expect(() => { + printQuery("SELECT count(*) FROM hidden_tenant_runs GROUP BY organization_id", ctx); + }).toThrow(/not available for querying/i); + }); + + it("should throw error when grouping by hidden required filter column", () => { + const ctx = createHiddenFilterContext(); + expect(() => { + printQuery("SELECT count(*) FROM hidden_filter_runs GROUP BY engine", ctx); + }).toThrow(/not available for querying/i); + }); + }); + + describe("should block internal-only columns in HAVING", () => { + it("should throw error when using hidden column in HAVING", () => { + const ctx = createHiddenTenantContext(); + expect(() => { + printQuery( + "SELECT status, count(*) as cnt FROM hidden_tenant_runs GROUP BY status HAVING organization_id = 'x'", + ctx + ); + }).toThrow(/not available for querying/i); + }); + }); + + describe("should allow tenant guard in WHERE clause", () => { + it("should successfully execute query with tenant isolation (hidden tenant columns work internally)", () => { + const ctx = createHiddenTenantContext(); + // This should succeed - the tenant guard is injected internally and should work + const { sql } = printQuery("SELECT id, status FROM hidden_tenant_runs", ctx); + + // Verify tenant guards are present in WHERE clause + expect(sql).toContain("organization_id"); + expect(sql).toContain("project_id"); + expect(sql).toContain("environment_id"); + // But NOT in SELECT + expect(sql).toMatch(/SELECT\s+id,\s*status\s+FROM/i); + }); + + it("should successfully execute query with required filter (hidden filter columns work internally)", () => { + const ctx = createHiddenFilterContext(); + const { sql, params } = printQuery("SELECT id, status FROM hidden_filter_runs", ctx); + + // Verify required filter is present in WHERE clause + expect(sql).toContain("engine"); + // The value V2 is parameterized, check that it's in the params + expect(Object.values(params)).toContain("V2"); + }); + }); + + describe("should allow exposed tenant columns", () => { + // In task_runs schema, tenant columns ARE exposed in the public columns + it("should allow selecting exposed tenant column", () => { + // task_runs schema exposes organization_id in its columns + const { sql } = printQuery("SELECT id, organization_id FROM task_runs"); + expect(sql).toContain("SELECT id, organization_id"); + }); + + it("should allow ordering by exposed tenant column", () => { + const { sql } = printQuery("SELECT id, status FROM task_runs ORDER BY organization_id"); + expect(sql).toContain("ORDER BY organization_id"); + }); + + it("should allow grouping by exposed tenant column", () => { + const { sql } = printQuery("SELECT organization_id, count(*) FROM task_runs GROUP BY organization_id"); + expect(sql).toContain("GROUP BY organization_id"); + }); + }); +}); diff --git a/internal-packages/tsql/src/query/printer.ts b/internal-packages/tsql/src/query/printer.ts index 734b4dc87c..dcdc82d69b 100644 --- a/internal-packages/tsql/src/query/printer.ts +++ b/internal-packages/tsql/src/query/printer.ts @@ -116,6 +116,27 @@ export class ClickHousePrinter { private inGroupByContext = false; /** Columns hidden when SELECT * is expanded to core columns only */ private hiddenColumns: string[] = []; + /** + * Set of column aliases defined in the current SELECT clause. + * Used to allow ORDER BY/HAVING to reference aliased columns. + */ + private selectAliases: Set = new Set(); + /** + * Set of internal ClickHouse column names that are allowed (e.g., tenant columns). + * These are populated from tableSchema.tenantColumns when processing joins. + */ + private allowedInternalColumns: Set = new Set(); + /** + * Set of internal-only column names that are NOT user-queryable. + * These are tenant columns and required filter columns that are not exposed in tableSchema.columns. + * Used to block user queries from accessing internal columns in SELECT, ORDER BY, GROUP BY, HAVING. + */ + private internalOnlyColumns: Set = new Set(); + /** + * Whether we're currently processing user projection clauses (SELECT, ORDER BY, GROUP BY, HAVING). + * When true, internal-only columns will be rejected. + */ + private inProjectionContext = false; constructor( private context: PrinterContext, @@ -310,9 +331,21 @@ export class ClickHousePrinter { const isTopLevelQuery = this.stack.length <= 1 || (this.stack.length === 2 && partOfSelectUnion); - // Clear table contexts for top-level queries (subqueries inherit parent context) + // Save and clear table contexts + // Top-level queries clear contexts; subqueries save parent context and create fresh context + const savedTableContexts = new Map(this.tableContexts); + const savedInternalColumns = new Set(this.allowedInternalColumns); + const savedInternalOnlyColumns = new Set(this.internalOnlyColumns); if (isTopLevelQuery) { this.tableContexts.clear(); + this.allowedInternalColumns.clear(); + this.internalOnlyColumns.clear(); + } else { + // Subqueries get fresh contexts - they don't inherit parent tables + // (the parent will restore its context after the subquery is processed) + this.tableContexts = new Map(); + this.allowedInternalColumns = new Set(); + this.internalOnlyColumns = new Set(); } // Build WHERE clause starting with any existing where @@ -349,17 +382,30 @@ export class ClickHousePrinter { nextJoin = nextJoin.next_join; } + // Extract SELECT column aliases BEFORE visiting columns + // This allows ORDER BY/HAVING to reference aliased columns + const savedAliases = this.selectAliases; + this.selectAliases = new Set(); + if (node.select) { + for (const col of node.select) { + this.extractSelectAlias(col); + } + } + // Process SELECT columns and collect metadata // Using flatMap because asterisk expansion can return multiple columns + // Set inProjectionContext to block internal-only columns in user projections let columns: string[]; if (node.select && node.select.length > 0) { // Only collect metadata for top-level queries (not subqueries) if (isTopLevelQuery) { this.outputColumns = []; } + this.inProjectionContext = true; columns = node.select.flatMap((col) => this.visitSelectColumnWithMetadata(col, isTopLevelQuery) ); + this.inProjectionContext = false; } else { columns = ["1"]; } @@ -377,16 +423,33 @@ export class ClickHousePrinter { const prewhere = node.prewhere ? this.visit(node.prewhere) : null; const whereStr = where ? this.visit(where) : null; - // Process GROUP BY with context flag to use raw columns for whereTransform columns + // Process GROUP BY with context flags: + // - inGroupByContext: use raw columns for whereTransform columns + // - inProjectionContext: block internal-only columns let groupBy: string[] | null = null; if (node.group_by) { this.inGroupByContext = true; + this.inProjectionContext = true; groupBy = node.group_by.map((col) => this.visit(col)); + this.inProjectionContext = false; this.inGroupByContext = false; } - const having = node.having ? this.visit(node.having) : null; - const orderBy = node.order_by ? node.order_by.map((col) => this.visit(col)) : null; + // Process HAVING with inProjectionContext to block internal-only columns + let having: string | null = null; + if (node.having) { + this.inProjectionContext = true; + having = this.visit(node.having); + this.inProjectionContext = false; + } + + // Process ORDER BY with inProjectionContext to block internal-only columns + let orderBy: string[] | null = null; + if (node.order_by) { + this.inProjectionContext = true; + orderBy = node.order_by.map((col) => this.visit(col)); + this.inProjectionContext = false; + } // Process ARRAY JOIN let arrayJoin = ""; @@ -478,9 +541,57 @@ export class ClickHousePrinter { response = this.pretty ? `(${response.trim()})` : `(${response})`; } + // Restore saved contexts (for nested queries) + this.selectAliases = savedAliases; + this.tableContexts = savedTableContexts; + this.allowedInternalColumns = savedInternalColumns; + this.internalOnlyColumns = savedInternalOnlyColumns; + return response; } + /** + * Extract column aliases from a SELECT expression. + * Handles explicit aliases (AS name) and implicit names from aggregations/functions. + * + * NOTE: We intentionally do NOT add field names as aliases here. + * Field names (e.g., SELECT status) are columns from the table, not aliases. + * Only explicit aliases (SELECT x AS name) and implicit function names + * (SELECT COUNT() → 'count') should be added. + */ + private extractSelectAlias(expr: Expression): void { + // Handle explicit Alias: SELECT ... AS name + if ((expr as Alias).expression_type === "alias") { + this.selectAliases.add((expr as Alias).alias); + return; + } + + // Handle implicit names from function calls (e.g., COUNT() → 'count') + if ((expr as Call).expression_type === "call") { + const call = expr as Call; + // Aggregations and functions get implicit lowercase names + this.selectAliases.add(call.name.toLowerCase()); + return; + } + + // Handle implicit names from arithmetic operations (e.g., a + b → 'plus') + if ((expr as ArithmeticOperation).expression_type === "arithmetic_operation") { + const op = expr as ArithmeticOperation; + const opNames: Record = { + [ArithmeticOperationOp.Add]: "plus", + [ArithmeticOperationOp.Sub]: "minus", + [ArithmeticOperationOp.Mult]: "multiply", + [ArithmeticOperationOp.Div]: "divide", + [ArithmeticOperationOp.Mod]: "modulo", + }; + this.selectAliases.add(opNames[op.op]); + return; + } + + // Field references (e.g., SELECT status) are NOT aliases - they're columns + // that will be validated against the table schema. Don't add them here. + } + /** * Visit a SELECT column expression with metadata collection * @@ -1338,6 +1449,43 @@ export class ClickHousePrinter { // Register this table context for column name resolution this.tableContexts.set(effectiveAlias, tableSchema); + // Register tenant columns as allowed internal columns + // These are ClickHouse column names used for tenant isolation + // Also mark them as internal-only if they're not exposed in tableSchema.columns + if (tableSchema.tenantColumns) { + const { organizationId, projectId, environmentId } = tableSchema.tenantColumns; + if (organizationId) { + this.allowedInternalColumns.add(organizationId); + if (!this.isColumnExposedInSchema(tableSchema, organizationId)) { + this.internalOnlyColumns.add(organizationId); + } + } + if (projectId) { + this.allowedInternalColumns.add(projectId); + if (!this.isColumnExposedInSchema(tableSchema, projectId)) { + this.internalOnlyColumns.add(projectId); + } + } + if (environmentId) { + this.allowedInternalColumns.add(environmentId); + if (!this.isColumnExposedInSchema(tableSchema, environmentId)) { + this.internalOnlyColumns.add(environmentId); + } + } + } + + // Register required filter columns as allowed internal columns + // These are ClickHouse columns used for internal filtering (e.g., engine = 'V2') + // Also mark them as internal-only if they're not exposed in tableSchema.columns + if (tableSchema.requiredFilters) { + for (const filter of tableSchema.requiredFilters) { + this.allowedInternalColumns.add(filter.column); + if (!this.isColumnExposedInSchema(tableSchema, filter.column)) { + this.internalOnlyColumns.add(filter.column); + } + } + } + // Add tenant isolation guard extraWhere = this.createTenantGuard(tableSchema, effectiveAlias); } else if ( @@ -2081,6 +2229,8 @@ export class ClickHousePrinter { /** * Resolve field chain to use ClickHouse column names where applicable * Handles both qualified (table.column) and unqualified (column) references + * + * @throws QueryError if the column is not found in any table schema and is not a SELECT alias */ private resolveFieldChain(chain: Array): Array { if (chain.length === 0) { @@ -2101,11 +2251,11 @@ export class ClickHousePrinter { // This is a table.column reference const columnName = chain[1]; if (typeof columnName === "string") { - const resolvedColumn = this.resolveColumnName(tableSchema, columnName); + const resolvedColumn = this.resolveColumnName(tableSchema, columnName, tableAlias); return [tableAlias, resolvedColumn, ...chain.slice(2)]; } } - // Not a known table alias, might be a nested field - return as-is + // Not a known table alias, might be a nested field or JSON path - return as-is return chain; } @@ -2119,20 +2269,128 @@ export class ClickHousePrinter { } } - // Column not found in any table context - return as-is (might be a function, subquery alias, etc.) + // Check if it's a SELECT alias (e.g., from COUNT() or explicit AS) + if (this.selectAliases.has(columnName)) { + return chain; // Valid alias reference + } + + // Check if this is an internal-only column being accessed in a user projection context + // (SELECT, ORDER BY, GROUP BY, HAVING). These columns are used internally for + // tenant isolation but should not be user-queryable unless explicitly exposed. + if (this.inProjectionContext && this.internalOnlyColumns.has(columnName)) { + const availableColumns = this.getAvailableColumnNames(); + throw new QueryError( + `Column "${columnName}" is not available for querying. Available columns: ${availableColumns.join(", ")}` + ); + } + + // Check if this is an allowed internal column (e.g., tenant columns) + // These are ClickHouse column names that are allowed for internal use only + // (e.g., in WHERE clauses for tenant isolation) + if (this.allowedInternalColumns.has(columnName)) { + return chain; + } + + // Column not found in any table context and not a SELECT alias + // This is a security issue - block access to unknown columns + if (this.tableContexts.size > 0) { + // Only throw if we have tables in context (otherwise might be subquery) + // Check if the user typed a ClickHouse column name instead of the TSQL name + const suggestion = this.findTSQLNameForClickHouseName(columnName); + if (suggestion) { + throw new QueryError(`Unknown column "${columnName}". Did you mean "${suggestion}"?`); + } + + const availableColumns = this.getAvailableColumnNames(); + throw new QueryError( + `Unknown column "${columnName}". Available columns: ${availableColumns.join(", ")}` + ); + } + + // No tables in context (might be FROM subquery without alias) - return as-is return chain; } + /** + * Get list of available column names from all tables in context + */ + private getAvailableColumnNames(): string[] { + const columns: string[] = []; + for (const tableSchema of this.tableContexts.values()) { + columns.push(...Object.keys(tableSchema.columns)); + } + return [...new Set(columns)].sort(); + } + + /** + * Check if a ClickHouse column name is exposed in the table schema's public columns. + * A column is considered exposed if: + * - It exists as a column name in tableSchema.columns, OR + * - It is the clickhouseName of a column in tableSchema.columns + * + * @param tableSchema The table schema to check + * @param clickhouseColumnName The ClickHouse column name to check + * @returns true if the column is exposed to users, false if it's internal-only + */ + private isColumnExposedInSchema(tableSchema: TableSchema, clickhouseColumnName: string): boolean { + for (const [tsqlName, columnSchema] of Object.entries(tableSchema.columns)) { + // Check if the ClickHouse column name matches either: + // 1. The TSQL name (if no clickhouseName mapping exists) + // 2. The explicit clickhouseName mapping + const actualClickhouseName = columnSchema.clickhouseName || tsqlName; + if (actualClickhouseName === clickhouseColumnName) { + return true; + } + } + return false; + } + + /** + * Find the TSQL column name for a given ClickHouse column name. + * This is used to provide helpful suggestions when users accidentally + * use internal ClickHouse column names instead of TSQL names. + * + * @returns The TSQL column name if found, null otherwise + */ + private findTSQLNameForClickHouseName(clickhouseName: string): string | null { + for (const tableSchema of this.tableContexts.values()) { + for (const [tsqlName, columnSchema] of Object.entries(tableSchema.columns)) { + if (columnSchema.clickhouseName === clickhouseName) { + return tsqlName; + } + } + } + return null; + } + /** * Resolve a column name to its ClickHouse name using the table schema + * + * @throws QueryError if the column is not found in the table schema */ - private resolveColumnName(tableSchema: TableSchema, columnName: string): string { + private resolveColumnName( + tableSchema: TableSchema, + columnName: string, + tableAlias?: string + ): string { const columnSchema = tableSchema.columns[columnName]; if (columnSchema) { return columnSchema.clickhouseName || columnSchema.name; } - // Column not in schema - return as-is (might be a computed column, etc.) - return columnName; + + // Column not in schema - this is a security issue, block access + // Check if the user typed a ClickHouse column name instead of the TSQL name + for (const [tsqlName, colSchema] of Object.entries(tableSchema.columns)) { + if (colSchema.clickhouseName === columnName) { + throw new QueryError(`Unknown column "${columnName}". Did you mean "${tsqlName}"?`); + } + } + + const availableColumns = Object.keys(tableSchema.columns).sort().join(", "); + const tableName = tableAlias || tableSchema.name; + throw new QueryError( + `Unknown column "${columnName}" on table "${tableName}". Available columns: ${availableColumns}` + ); } private visitPlaceholder(node: Placeholder): string { diff --git a/internal-packages/tsql/src/query/schema.test.ts b/internal-packages/tsql/src/query/schema.test.ts index 3d1b8b8430..29656b1cb2 100644 --- a/internal-packages/tsql/src/query/schema.test.ts +++ b/internal-packages/tsql/src/query/schema.test.ts @@ -615,6 +615,17 @@ describe("Error message sanitization", () => { expect(sanitized).toBe("SELECT run_id, triggered_at FROM table"); }); + it("should NOT replace column names in 'Did you mean' error messages", () => { + // When a user types an internal ClickHouse column name, we show "Did you mean X?" + // The sanitizer should NOT replace the column name in this case, as it would + // turn "Unknown column 'created_at'. Did you mean 'triggered_at'?" into + // "Unknown column 'triggered_at'. Did you mean 'triggered_at'?" which is confusing + const error = 'Unknown column "created_at". Did you mean "triggered_at"?'; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + // Should preserve both the original column name AND the suggestion + expect(sanitized).toBe('Unknown column "created_at". Did you mean "triggered_at"?'); + }); + it("should prioritize longer matches (table.column before standalone column)", () => { // This tests that we replace "trigger_dev.task_runs_v2.friendly_id" as a unit, // not "trigger_dev.task_runs_v2" and then "friendly_id" separately diff --git a/internal-packages/tsql/src/query/schema.ts b/internal-packages/tsql/src/query/schema.ts index 3f9b5944dc..a5153597ad 100644 --- a/internal-packages/tsql/src/query/schema.ts +++ b/internal-packages/tsql/src/query/schema.ts @@ -857,10 +857,17 @@ export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): s } // Step 5: Replace standalone column names (for unqualified references) - // Sort by length descending to replace longer names first - const sortedColumnNames = [...columnNameMap.entries()].sort((a, b) => b[0].length - a[0].length); - for (const [clickhouseName, { tsqlName }] of sortedColumnNames) { - result = replaceAllOccurrences(result, clickhouseName, tsqlName); + // Skip column replacement for "Did you mean" errors - these already have the correct names + // and replacing would turn "Unknown column 'created_at'. Did you mean 'triggered_at'?" + // into "Unknown column 'triggered_at'. Did you mean 'triggered_at'?" which is confusing + if (!result.includes('Did you mean "')) { + // Sort by length descending to replace longer names first + const sortedColumnNames = [...columnNameMap.entries()].sort( + (a, b) => b[0].length - a[0].length + ); + for (const [clickhouseName, { tsqlName }] of sortedColumnNames) { + result = replaceAllOccurrences(result, clickhouseName, tsqlName); + } } // Step 6: Remove redundant column aliases like "run_id AS run_id" diff --git a/internal-packages/tsql/src/query/validator.test.ts b/internal-packages/tsql/src/query/validator.test.ts index ff54891d3d..11e90c70ad 100644 --- a/internal-packages/tsql/src/query/validator.test.ts +++ b/internal-packages/tsql/src/query/validator.test.ts @@ -47,13 +47,14 @@ describe("validateQuery", () => { expect(result.issues).toHaveLength(0); }); - it("should still report unknown columns that are not aliases", () => { + it("should report unknown columns that are not aliases as errors", () => { const result = validateSQL( "SELECT status, count(*) as count FROM runs GROUP BY status ORDER BY unknown_col DESC" ); - expect(result.valid).toBe(true); // unknown column is a warning, not error + expect(result.valid).toBe(false); // unknown column is now an error expect(result.issues).toHaveLength(1); expect(result.issues[0].type).toBe("unknown_column"); + expect(result.issues[0].severity).toBe("error"); expect(result.issues[0].columnName).toBe("unknown_col"); }); @@ -97,11 +98,12 @@ describe("validateQuery", () => { expect(result.issues).toHaveLength(0); }); - it("should warn about unknown columns", () => { + it("should error on unknown columns", () => { const result = validateSQL("SELECT id, unknown_column FROM runs LIMIT 10"); - expect(result.valid).toBe(true); // warnings don't affect validity + expect(result.valid).toBe(false); // unknown columns are now errors expect(result.issues).toHaveLength(1); expect(result.issues[0].type).toBe("unknown_column"); + expect(result.issues[0].severity).toBe("error"); expect(result.issues[0].columnName).toBe("unknown_column"); }); }); diff --git a/internal-packages/tsql/src/query/validator.ts b/internal-packages/tsql/src/query/validator.ts index 6235a58e41..5d708500c2 100644 --- a/internal-packages/tsql/src/query/validator.ts +++ b/internal-packages/tsql/src/query/validator.ts @@ -425,7 +425,7 @@ function validateField(field: Field, context: ValidationContext): void { const availableColumns = Object.keys(tableSchema.columns).join(", "); context.issues.push({ message: `Unknown column "${columnName}" on table "${tableAlias}". Available columns: ${availableColumns}`, - severity: "warning", + severity: "error", type: "unknown_column", columnName, tableName: tableAlias, @@ -464,7 +464,7 @@ function validateField(field: Field, context: ValidationContext): void { message: `Unknown column "${columnName}". Available columns: ${Array.from(allColumns).join( ", " )}`, - severity: "warning", + severity: "error", type: "unknown_column", columnName, });