@@ -39,6 +39,23 @@ def _enum_value_suggestion(text_before_cursor: str, full_text: str) -> dict[str,
3939 }
4040
4141
42+ def _charset_suggestion (tokens : list [Token ]) -> list [dict [str , str ]] | None :
43+ token_values = [token .value .lower () for token in tokens if token .value ]
44+
45+ if len (token_values ) >= 2 and token_values [- 1 ] == 'set' and token_values [- 2 ] == 'character' :
46+ return [{'type' : 'character_set' }]
47+ if len (token_values ) >= 3 and token_values [- 2 ] == 'set' and token_values [- 3 ] == 'character' :
48+ return [{'type' : 'character_set' }]
49+ if len (token_values ) >= 5 and token_values [- 1 ] == 'using' and token_values [- 4 ] == 'convert' :
50+ return [{'type' : 'character_set' }]
51+ if len (token_values ) >= 6 and token_values [- 2 ] == 'using' and token_values [- 5 ] == 'convert' :
52+ return [{'type' : 'character_set' }]
53+ if len (token_values ) >= 1 and token_values [- 1 ] == 'collate' :
54+ return [{'type' : 'collation' }]
55+
56+ return None
57+
58+
4259def _is_where_or_having (token : Token | None ) -> bool :
4360 return bool (token and token .value and token .value .lower () in ("where" , "having" ))
4461
@@ -261,6 +278,7 @@ def suggest_based_on_last_token(
261278
262279 # don't suggest anything inside a string or number
263280 if word_before_cursor :
281+ # todo: example where this fails: completing on COLLATE with string "0900"
264282 if re .match (r'^[\d\.]' , word_before_cursor [0 ]):
265283 return []
266284 # more efficient if no space was typed yet in the string
@@ -272,6 +290,14 @@ def suggest_based_on_last_token(
272290 if is_inside_quotes (text_before_cursor , - 1 ) in ['single' , 'double' ]:
273291 return []
274292
293+ try :
294+ # todo: pass in the complete list of tokens to avoid multiple parsing passes
295+ parsed = sqlparse .parse (text_before_cursor )[0 ]
296+ tokens_wo_space = [x for x in parsed .tokens if x .ttype != sqlparse .tokens .Token .Text .Whitespace ]
297+ except (AttributeError , IndexError , ValueError , sqlparse .exceptions .SQLParseError ):
298+ parsed = sqlparse .sql .Statement ()
299+ tokens_wo_space = []
300+
275301 if isinstance (token , str ):
276302 token_v = token .lower ()
277303 elif isinstance (token , Comparison ):
@@ -286,7 +312,15 @@ def suggest_based_on_last_token(
286312 # sqlparse groups all tokens from the where clause into a single token
287313 # list. This means that token.value may be something like
288314 # 'where foo > 5 and '. We need to look "inside" token.tokens to handle
289- # suggestions in complicated where clauses correctly
315+ # suggestions in complicated where clauses correctly.
316+ #
317+ # This logic also needs to look even deeper in to the WHERE clause.
318+ # We recapitulate some transcoding suggestions here, but cannot
319+ # recapitulate the entire logic of this function.
320+ where_tokens = [x for x in token .tokens if x .ttype != sqlparse .tokens .Token .Text .Whitespace ]
321+ if transcoding_suggestion := _charset_suggestion (where_tokens ):
322+ return transcoding_suggestion
323+
290324 original_text = text_before_cursor
291325 prev_keyword , text_before_cursor = find_prev_keyword (text_before_cursor )
292326 enum_suggestion = _enum_value_suggestion (original_text , full_text )
@@ -303,12 +337,12 @@ def suggest_based_on_last_token(
303337
304338 if not token :
305339 return [{"type" : "keyword" }, {"type" : "special" }]
306- elif token_v == "*" :
340+
341+ if token_v == "*" :
307342 return [{"type" : "keyword" }]
308- elif token_v .endswith ("(" ):
309- p = sqlparse .parse (text_before_cursor )[0 ]
310343
311- if p .tokens and isinstance (p .tokens [- 1 ], Where ):
344+ if token_v .endswith ("(" ):
345+ if parsed .tokens and isinstance (parsed .tokens [- 1 ], Where ):
312346 # Four possibilities:
313347 # 1 - Parenthesized clause like "WHERE foo AND ("
314348 # Suggest columns/functions
@@ -323,7 +357,7 @@ def suggest_based_on_last_token(
323357 column_suggestions = suggest_based_on_last_token ("where" , text_before_cursor , None , full_text , identifier )
324358
325359 # Check for a subquery expression (cases 3 & 4)
326- where = p .tokens [- 1 ]
360+ where = parsed .tokens [- 1 ]
327361 _idx , prev_tok = where .token_prev (len (where .tokens ) - 1 )
328362
329363 if isinstance (prev_tok , Comparison ):
@@ -337,25 +371,29 @@ def suggest_based_on_last_token(
337371 return column_suggestions
338372
339373 # Get the token before the parens
340- idx , prev_tok = p .token_prev (len (p .tokens ) - 1 )
374+ idx , prev_tok = parsed .token_prev (len (parsed .tokens ) - 1 )
341375 if prev_tok and prev_tok .value and prev_tok .value .lower () == "using" :
342376 # tbl1 INNER JOIN tbl2 USING (col1, col2)
343377 tables = extract_tables (full_text )
344378
345379 # suggest columns that are present in more than one table
346380 return [{"type" : "column" , "tables" : tables , "drop_unique" : True }]
347- elif p .token_first ().value .lower () == "select" :
381+ elif parsed . tokens and parsed .token_first ().value .lower () == "select" :
348382 # If the lparen is preceeded by a space chances are we're about to
349383 # do a sub-select.
350384 if last_word (text_before_cursor , "all_punctuations" ).startswith ("(" ):
351385 return [{"type" : "keyword" }]
352- elif p .token_first ().value .lower () == "show" :
386+ elif parsed . tokens and parsed .token_first ().value .lower () == "show" :
353387 return [{"type" : "show" }]
354388
355389 # We're probably in a function argument list
356390 return [{"type" : "column" , "tables" : extract_tables (full_text )}]
357391 elif token_v in ("call" ):
358392 return [{"type" : "procedure" , "schema" : []}]
393+ elif token_v in ('set' ) and len (tokens_wo_space ) >= 3 and tokens_wo_space [- 3 ].value .lower () == 'character' :
394+ return [{'type' : 'character_set' }]
395+ elif token_v in ('set' ) and len (tokens_wo_space ) >= 2 and tokens_wo_space [- 2 ].value .lower () == 'character' :
396+ return [{'type' : 'character_set' }]
359397 elif token_v in ("set" , "order by" , "distinct" ):
360398 return [{"type" : "column" , "tables" : extract_tables (full_text )}]
361399 elif token_v == "as" :
@@ -364,13 +402,19 @@ def suggest_based_on_last_token(
364402 elif token_v in ("show" ):
365403 return [{"type" : "show" }]
366404 elif token_v in ("to" ,):
367- p = sqlparse .parse (text_before_cursor )[0 ]
368- if p .token_first ().value .lower () == "change" :
405+ if parsed .tokens and parsed .token_first ().value .lower () == "change" :
369406 return [{"type" : "change" }]
370407 else :
371408 return [{"type" : "user" }]
372409 elif token_v in ("user" , "for" ):
373410 return [{"type" : "user" }]
411+ elif token_v in ('collate' ):
412+ return [{'type' : 'collation' }]
413+ # some duplication with _charset_suggestion()
414+ elif token_v in ('using' ) and len (tokens_wo_space ) >= 5 and tokens_wo_space [- 5 ].value .lower () == 'convert' :
415+ return [{'type' : 'character_set' }]
416+ elif token_v in ('using' ) and len (tokens_wo_space ) >= 4 and tokens_wo_space [- 4 ].value .lower () == 'convert' :
417+ return [{'type' : 'character_set' }]
374418 elif token_v in ("select" , "where" , "having" ):
375419 # Check for a table alias or schema qualification
376420 parent = (identifier and identifier .get_parent_name ()) or []
@@ -399,7 +443,7 @@ def suggest_based_on_last_token(
399443 return [
400444 {"type" : "column" , "tables" : tables },
401445 {"type" : "function" , "schema" : []},
402- {"type" : "introducer" , "schema" : [] },
446+ {"type" : "introducer" },
403447 {"type" : "alias" , "aliases" : aliases },
404448 ]
405449 elif (
0 commit comments