sql-parser/src/autocomplete/suggestion-builder.ts at 48fe0177f3a7f742c71af16cad00c3dd067cb9fd · questdb/sql-parser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
// =============================================================================
// Suggestion Builder
// =============================================================================
// Converts parser token types to autocomplete suggestions.
// This is the SINGLE SOURCE OF TRUTH for suggestion building - the UI should
// NOT duplicate this logic.
// =============================================================================

import { TokenType } from "chevrotain"
import {
  Suggestion,
  SuggestionKind,
  SuggestionPriority,
  SchemaInfo,
  ColumnInfo,
} from "./types"
import {
  SKIP_TOKENS,
  PUNCTUATION_TOKENS,
  EXPRESSION_OPERATORS,
  tokenNameToKeyword,
} from "./token-classification"
import { functions } from "../grammar/index"
import type { TableRef } from "./content-assist"

/**
 * Column with table context for detailed suggestions
 */
interface ColumnWithTable extends ColumnInfo {
  tableName: string
  tableAlias?: string
}

/**
 * Get columns from tables in scope
 */
function getColumnsInScope(
  tablesInScope: TableRef[],
  schema: SchemaInfo,
): ColumnWithTable[] {
  const columns: ColumnWithTable[] = []

  for (const tableRef of tablesInScope) {
    // Look up columns by table name (case-insensitive)
    const tableNameLower = tableRef.table.toLowerCase()
    const tableColumns = schema.columns[tableNameLower] ?? []

    for (const col of tableColumns) {
      columns.push({
        ...col,
        tableName: tableRef.table,
        tableAlias: tableRef.alias,
      })
    }
  }

  return columns
}

/**
 * Get all columns from schema (when no tables in scope)
 */
function getAllColumns(schema: SchemaInfo): ColumnWithTable[] {
  const columns: ColumnWithTable[] = []

  for (const [tableName, tableColumns] of Object.entries(schema.columns)) {
    for (const col of tableColumns) {
      columns.push({
        ...col,
        tableName,
      })
    }
  }

  return columns
}

/**
 * Join prefix tokens → compound keyword.
 * When "Join" is among the valid next tokens, these prefixes are combined
 * into compound suggestions (e.g., "Left" → "LEFT JOIN") instead of
 * suggesting bare "LEFT" which is incomplete on its own.
 */
const JOIN_COMPOUND_MAP = new Map<string, string>([
  ["Left", "LEFT JOIN"],
  ["Inner", "INNER JOIN"],
  ["Cross", "CROSS JOIN"],
  ["Asof", "ASOF JOIN"],
  ["Lt", "LT JOIN"],
  ["Splice", "SPLICE JOIN"],
  ["Window", "WINDOW JOIN"],
  ["Horizon", "HORIZON JOIN"],
  ["Outer", "OUTER JOIN"],
])

/**
 * Build suggestions from parser's nextTokenTypes
 *
 * @param tokenTypes - Valid next tokens from parser.computeContentAssist()
 * @param schema - Schema information (tables, columns)
 * @param tablesInScope - Tables found in the query
 * @returns Array of suggestions
 */
export function buildSuggestions(
  tokenTypes: TokenType[],
  schema: SchemaInfo,
  tablesInScope: TableRef[],
  options?: {
    includeColumns?: boolean
    includeTables?: boolean
    isMidWord?: boolean
  },
): Suggestion[] {
  const suggestions: Suggestion[] = []
  const seenKeywords = new Set<string>()
  let expectsIdentifier = false
  const includeColumns = options?.includeColumns ?? true
  const includeTables = options?.includeTables ?? true
  const isMidWord = options?.isMidWord ?? false

  // Detect join context: when "Join" is a valid next token, join prefix
  // keywords (LEFT, RIGHT, ASOF, etc.) should be suggested as compounds.
  const isJoinContext = tokenTypes.some((t) => t.name === "Join")

  // Process each token type from the parser
  for (const tokenType of tokenTypes) {
    const name = tokenType.name

    // Skip internal tokens (operators, literals, punctuation)
    if (SKIP_TOKENS.has(name)) {
      continue
    }

    // IdentifierKeyword means the parser's `identifier` rule is active,
    // so column/table names are expected. Bare Identifier/QuotedIdentifier
    // alone (e.g., for custom type names) should not trigger schema suggestions.
    if (name === "IdentifierKeyword") {
      expectsIdentifier = true
      continue
    }
    if (name === "Identifier" || name === "QuotedIdentifier") {
      continue
    }

    // In join context, combine join prefix tokens into compound keywords
    // (e.g., "Left" → "LEFT JOIN") instead of suggesting bare "LEFT".
    if (isJoinContext && JOIN_COMPOUND_MAP.has(name)) {
      const compound = JOIN_COMPOUND_MAP.get(name)!
      if (seenKeywords.has(compound)) continue
      seenKeywords.add(compound)
      suggestions.push({
        label: compound,
        kind: SuggestionKind.Keyword,
        insertText: compound,
        filterText: compound.toLowerCase(),
        priority: SuggestionPriority.Medium,
      })
      continue
    }

    // Convert token name to keyword display string
    const keyword = tokenNameToKeyword(name)

    // Skip duplicates
    if (seenKeywords.has(keyword)) {
      continue
    }
    seenKeywords.add(keyword)

    // All parser keyword tokens are keywords (not functions).
    // Functions are suggested separately in the functions loop below.
    const kind = SuggestionKind.Keyword
    const priority = EXPRESSION_OPERATORS.has(name)
      ? SuggestionPriority.MediumLow
      : SuggestionPriority.Medium

    suggestions.push({
      label: keyword,
      kind,
      insertText: keyword,
      filterText: keyword.toLowerCase(),
      priority,
    })
  }

  // If identifier is expected, add columns and tables
  if (expectsIdentifier) {
    // Get columns: prefer tables in scope (FROM, JOIN), fall back to all columns.
    // Also fall back when tables are in scope but none have known columns
    // (e.g., FROM read_parquet(...) — function call, not a schema table).
    const scopedColumns =
      tablesInScope.length > 0 ? getColumnsInScope(tablesInScope, schema) : []
    const columnsInScope = includeColumns
      ? scopedColumns.length > 0
        ? scopedColumns
        : getAllColumns(schema)
      : []

    // Add columns with HIGH priority (they should appear first).
    // When a column name appears from multiple distinct aliased table refs
    // (e.g., self-join: trades t1 JOIN trades t2), emit alias-qualified
    // suggestions (e.g., "t1.amount", "t2.amount") to avoid ambiguity.
    // Otherwise, emit a single bare column name.
    if (includeColumns) {
      // Group columns by name, collecting each source (alias/table + type).
      const columnMap = new Map<
        string,
        {
          sources: {
            qualifier: string
            type: string
            hasAlias: boolean
          }[]
        }
      >()
      for (const col of columnsInScope) {
        const qualifier = col.tableAlias ?? col.tableName
        const existing = columnMap.get(col.name)
        if (existing) {
          // Only add if this qualifier is new (avoid duplicates from same alias)
          if (!existing.sources.some((s) => s.qualifier === qualifier)) {
            existing.sources.push({
              qualifier,
              type: col.type,
              hasAlias: !!col.tableAlias,
            })
          }
        } else {
          columnMap.set(col.name, {
            sources: [
              { qualifier, type: col.type, hasAlias: !!col.tableAlias },
            ],
          })
        }
      }
      for (const [colName, info] of columnMap) {
        // Only qualify when there are multiple sources and at least one has
        // an explicit alias. This covers self-joins (trades t1 JOIN trades t2)
        // without qualifying columns from unrelated CTEs that happen to share
        // column names.
        const needsQualification =
          info.sources.length > 1 && info.sources.some((s) => s.hasAlias)
        if (needsQualification) {
          // Ambiguous: emit one qualified suggestion per source
          for (const source of info.sources) {
            const qualified = `${source.qualifier}.${colName}`
            suggestions.unshift({
              label: qualified,
              kind: SuggestionKind.Column,
              insertText: qualified,
              detail: ` (${source.qualifier})`,
              description: source.type,
              filterText: colName,
              priority: SuggestionPriority.High,
            })
          }
        } else {
          // Unambiguous: emit bare column name
          const source = info.sources[0]
          suggestions.unshift({
            label: colName,
            kind: SuggestionKind.Column,
            insertText: colName,
            detail: ` (${source.qualifier})`,
            description: source.type,
            priority: SuggestionPriority.High,
          })
        }
      }
    }

    // Add functions when the user is mid-word (typing a prefix).
    // This avoids flooding the list with ~300 functions when the user
    // just typed "SELECT " with no prefix. Functions are valid in both
    // expression context (SELECT md5(...)) and table context (FROM long_sequence(...)).
    // Skip functions in post-expression position (includeColumns=false,
    // includeTables=false) — e.g., after "SELECT *" the user is typing a
    // keyword (FROM) or alias, not a function call.
    if (isMidWord && (includeColumns || includeTables)) {
      for (const fn of functions) {
        if (seenKeywords.has(fn.toUpperCase())) continue
        suggestions.push({
          label: fn,
          kind: SuggestionKind.Function,
          insertText: fn,
          priority: SuggestionPriority.Low,
        })
      }
    }

    // Add tables with MEDIUM-LOW priority (lower than columns).
    if (includeTables) {
      for (const table of schema.tables) {
        suggestions.push({
          label: table.name,
          kind: SuggestionKind.Table,
          insertText: table.name,
          priority: SuggestionPriority.MediumLow,
        })
      }

      // Add tables from query scope (CTEs, etc.) that aren't in the schema
      const seenTableNames = new Set(
        schema.tables.map((t) => t.name.toLowerCase()),
      )
      for (const ref of tablesInScope) {
        const lower = ref.table.toLowerCase()
        if (!seenTableNames.has(lower)) {
          seenTableNames.add(lower)
          suggestions.push({
            label: ref.table,
            kind: SuggestionKind.Table,
            insertText: ref.table,
            priority: SuggestionPriority.MediumLow,
          })
        }
      }
    }
  }

  // Fallback: when no keyword/identifier suggestions were produced, check if the
  // parser expected punctuation tokens (e.g., "(" after "VALUES (1), ").
  // Suggest those so Monaco doesn't fall back to junk word-based completions.
  if (suggestions.length === 0 && !expectsIdentifier) {
    for (const tokenType of tokenTypes) {
      const name = tokenType.name
      if (!PUNCTUATION_TOKENS.has(name)) continue
      const display = tokenNameToKeyword(name)
      suggestions.push({
        label: display,
        kind: SuggestionKind.Keyword,
        insertText: display,
        filterText: display.toLowerCase(),
        priority: SuggestionPriority.Low,
      })
    }
  }

  return suggestions
}