Skip to content

Commit 724182f

Browse files
authored
Merge pull request #1711 from dbcli/RW/collation-and-charset-completion-improvements
Add collation completions and more charset completions
2 parents cbc3c0c + 63507be commit 724182f

8 files changed

Lines changed: 210 additions & 36 deletions

changelog.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@ Upcoming (TBD)
44
Features
55
---------
66
* Add prompt format string for literal backslash.
7+
* Add collation completions, and complete charsets in more positions.
78

89

910
Bug Fixes
1011
---------
1112
* Suppress warnings when `sqlglotrs` is installed.
1213

1314

14-
1515
1.64.0 (2026/03/13)
1616
==============
1717

mycli/completion_refresher.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,11 @@ def refresh_character_sets(completer: SQLCompleter, executor: SQLExecute) -> Non
165165
completer.extend_character_sets(executor.character_sets())
166166

167167

168+
@refresher("collations")
169+
def refresh_collations(completer: SQLCompleter, executor: SQLExecute) -> None:
170+
completer.extend_collations(executor.collations())
171+
172+
168173
@refresher("special_commands")
169174
def refresh_special(completer: SQLCompleter, executor: SQLExecute) -> None:
170175
completer.extend_special_commands(list(COMMANDS.keys()))

mycli/packages/completion_engine.py

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,23 @@ def _enum_value_suggestion(text_before_cursor: str, full_text: str) -> dict[str,
3939
}
4040

4141

42+
def _charset_suggestion(tokens: list[Token]) -> list[dict[str, str]] | None:
43+
token_values = [token.value.lower() for token in tokens if token.value]
44+
45+
if len(token_values) >= 2 and token_values[-1] == 'set' and token_values[-2] == 'character':
46+
return [{'type': 'character_set'}]
47+
if len(token_values) >= 3 and token_values[-2] == 'set' and token_values[-3] == 'character':
48+
return [{'type': 'character_set'}]
49+
if len(token_values) >= 5 and token_values[-1] == 'using' and token_values[-4] == 'convert':
50+
return [{'type': 'character_set'}]
51+
if len(token_values) >= 6 and token_values[-2] == 'using' and token_values[-5] == 'convert':
52+
return [{'type': 'character_set'}]
53+
if len(token_values) >= 1 and token_values[-1] == 'collate':
54+
return [{'type': 'collation'}]
55+
56+
return None
57+
58+
4259
def _is_where_or_having(token: Token | None) -> bool:
4360
return bool(token and token.value and token.value.lower() in ("where", "having"))
4461

@@ -261,6 +278,7 @@ def suggest_based_on_last_token(
261278

262279
# don't suggest anything inside a string or number
263280
if word_before_cursor:
281+
# todo: example where this fails: completing on COLLATE with string "0900"
264282
if re.match(r'^[\d\.]', word_before_cursor[0]):
265283
return []
266284
# more efficient if no space was typed yet in the string
@@ -272,6 +290,14 @@ def suggest_based_on_last_token(
272290
if is_inside_quotes(text_before_cursor, -1) in ['single', 'double']:
273291
return []
274292

293+
try:
294+
# todo: pass in the complete list of tokens to avoid multiple parsing passes
295+
parsed = sqlparse.parse(text_before_cursor)[0]
296+
tokens_wo_space = [x for x in parsed.tokens if x.ttype != sqlparse.tokens.Token.Text.Whitespace]
297+
except (AttributeError, IndexError, ValueError, sqlparse.exceptions.SQLParseError):
298+
parsed = sqlparse.sql.Statement()
299+
tokens_wo_space = []
300+
275301
if isinstance(token, str):
276302
token_v = token.lower()
277303
elif isinstance(token, Comparison):
@@ -286,7 +312,15 @@ def suggest_based_on_last_token(
286312
# sqlparse groups all tokens from the where clause into a single token
287313
# list. This means that token.value may be something like
288314
# 'where foo > 5 and '. We need to look "inside" token.tokens to handle
289-
# suggestions in complicated where clauses correctly
315+
# suggestions in complicated where clauses correctly.
316+
#
317+
# This logic also needs to look even deeper in to the WHERE clause.
318+
# We recapitulate some transcoding suggestions here, but cannot
319+
# recapitulate the entire logic of this function.
320+
where_tokens = [x for x in token.tokens if x.ttype != sqlparse.tokens.Token.Text.Whitespace]
321+
if transcoding_suggestion := _charset_suggestion(where_tokens):
322+
return transcoding_suggestion
323+
290324
original_text = text_before_cursor
291325
prev_keyword, text_before_cursor = find_prev_keyword(text_before_cursor)
292326
enum_suggestion = _enum_value_suggestion(original_text, full_text)
@@ -303,12 +337,12 @@ def suggest_based_on_last_token(
303337

304338
if not token:
305339
return [{"type": "keyword"}, {"type": "special"}]
306-
elif token_v == "*":
340+
341+
if token_v == "*":
307342
return [{"type": "keyword"}]
308-
elif token_v.endswith("("):
309-
p = sqlparse.parse(text_before_cursor)[0]
310343

311-
if p.tokens and isinstance(p.tokens[-1], Where):
344+
if token_v.endswith("("):
345+
if parsed.tokens and isinstance(parsed.tokens[-1], Where):
312346
# Four possibilities:
313347
# 1 - Parenthesized clause like "WHERE foo AND ("
314348
# Suggest columns/functions
@@ -323,7 +357,7 @@ def suggest_based_on_last_token(
323357
column_suggestions = suggest_based_on_last_token("where", text_before_cursor, None, full_text, identifier)
324358

325359
# Check for a subquery expression (cases 3 & 4)
326-
where = p.tokens[-1]
360+
where = parsed.tokens[-1]
327361
_idx, prev_tok = where.token_prev(len(where.tokens) - 1)
328362

329363
if isinstance(prev_tok, Comparison):
@@ -337,25 +371,29 @@ def suggest_based_on_last_token(
337371
return column_suggestions
338372

339373
# Get the token before the parens
340-
idx, prev_tok = p.token_prev(len(p.tokens) - 1)
374+
idx, prev_tok = parsed.token_prev(len(parsed.tokens) - 1)
341375
if prev_tok and prev_tok.value and prev_tok.value.lower() == "using":
342376
# tbl1 INNER JOIN tbl2 USING (col1, col2)
343377
tables = extract_tables(full_text)
344378

345379
# suggest columns that are present in more than one table
346380
return [{"type": "column", "tables": tables, "drop_unique": True}]
347-
elif p.token_first().value.lower() == "select":
381+
elif parsed.tokens and parsed.token_first().value.lower() == "select":
348382
# If the lparen is preceeded by a space chances are we're about to
349383
# do a sub-select.
350384
if last_word(text_before_cursor, "all_punctuations").startswith("("):
351385
return [{"type": "keyword"}]
352-
elif p.token_first().value.lower() == "show":
386+
elif parsed.tokens and parsed.token_first().value.lower() == "show":
353387
return [{"type": "show"}]
354388

355389
# We're probably in a function argument list
356390
return [{"type": "column", "tables": extract_tables(full_text)}]
357391
elif token_v in ("call"):
358392
return [{"type": "procedure", "schema": []}]
393+
elif token_v in ('set') and len(tokens_wo_space) >= 3 and tokens_wo_space[-3].value.lower() == 'character':
394+
return [{'type': 'character_set'}]
395+
elif token_v in ('set') and len(tokens_wo_space) >= 2 and tokens_wo_space[-2].value.lower() == 'character':
396+
return [{'type': 'character_set'}]
359397
elif token_v in ("set", "order by", "distinct"):
360398
return [{"type": "column", "tables": extract_tables(full_text)}]
361399
elif token_v == "as":
@@ -364,13 +402,19 @@ def suggest_based_on_last_token(
364402
elif token_v in ("show"):
365403
return [{"type": "show"}]
366404
elif token_v in ("to",):
367-
p = sqlparse.parse(text_before_cursor)[0]
368-
if p.token_first().value.lower() == "change":
405+
if parsed.tokens and parsed.token_first().value.lower() == "change":
369406
return [{"type": "change"}]
370407
else:
371408
return [{"type": "user"}]
372409
elif token_v in ("user", "for"):
373410
return [{"type": "user"}]
411+
elif token_v in ('collate'):
412+
return [{'type': 'collation'}]
413+
# some duplication with _charset_suggestion()
414+
elif token_v in ('using') and len(tokens_wo_space) >= 5 and tokens_wo_space[-5].value.lower() == 'convert':
415+
return [{'type': 'character_set'}]
416+
elif token_v in ('using') and len(tokens_wo_space) >= 4 and tokens_wo_space[-4].value.lower() == 'convert':
417+
return [{'type': 'character_set'}]
374418
elif token_v in ("select", "where", "having"):
375419
# Check for a table alias or schema qualification
376420
parent = (identifier and identifier.get_parent_name()) or []
@@ -399,7 +443,7 @@ def suggest_based_on_last_token(
399443
return [
400444
{"type": "column", "tables": tables},
401445
{"type": "function", "schema": []},
402-
{"type": "introducer", "schema": []},
446+
{"type": "introducer"},
403447
{"type": "alias", "aliases": aliases},
404448
]
405449
elif (

mycli/sqlcompleter.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,10 @@ class SQLCompleter(Completer):
927927

928928
users: list[str] = []
929929

930+
character_sets: list[str] = []
931+
932+
collations: list[str] = []
933+
930934
def __init__(
931935
self,
932936
smart_completion: bool = True,
@@ -1087,31 +1091,38 @@ def extend_procedures(self, procedure_data: Generator[tuple]) -> None:
10871091
metadata[self.dbname][elt[0]] = None
10881092

10891093
def extend_character_sets(self, character_set_data: Generator[tuple]) -> None:
1090-
metadata = self.dbmetadata["character_sets"]
1091-
if self.dbname not in metadata:
1092-
metadata[self.dbname] = {}
1093-
10941094
for elt in character_set_data:
10951095
if not elt:
10961096
continue
10971097
if not elt[0]:
10981098
continue
1099-
metadata[self.dbname][elt[0]] = None
1099+
self.character_sets.append(elt[0])
1100+
self.all_completions.update(elt[0])
1101+
1102+
def extend_collations(self, collation_data: Generator[tuple]) -> None:
1103+
for elt in collation_data:
1104+
if not elt:
1105+
continue
1106+
if not elt[0]:
1107+
continue
1108+
self.collations.append(elt[0])
1109+
self.all_completions.update(elt[0])
11001110

11011111
def set_dbname(self, dbname: str | None) -> None:
11021112
self.dbname = dbname or ''
11031113

11041114
def reset_completions(self) -> None:
11051115
self.databases: list[str] = []
11061116
self.users: list[str] = []
1117+
self.character_sets: list[str] = []
1118+
self.collations: list[str] = []
11071119
self.show_items: list[Completion] = []
11081120
self.dbname = ""
11091121
self.dbmetadata: dict[str, Any] = {
11101122
"tables": {},
11111123
"views": {},
11121124
"functions": {},
11131125
"procedures": {},
1114-
"character_sets": {},
11151126
"enum_values": {},
11161127
}
11171128
self.all_completions = set(self.keywords + self.functions)
@@ -1321,15 +1332,30 @@ def get_completions(
13211332
completions.extend([(*x, rank) for x in procs_m])
13221333

13231334
elif suggestion['type'] == 'introducer':
1324-
charsets = self.populate_schema_objects(suggestion['schema'], 'character_sets')
1325-
introducers = [f'_{x}' for x in charsets]
1335+
introducers = [f'_{x}' for x in self.character_sets]
13261336
introducers_m = self.find_matches(
13271337
word_before_cursor,
13281338
introducers,
13291339
text_before_cursor=document.text_before_cursor,
13301340
)
13311341
completions.extend([(*x, rank) for x in introducers_m])
13321342

1343+
elif suggestion['type'] == 'character_set':
1344+
charsets_m = self.find_matches(
1345+
word_before_cursor,
1346+
self.character_sets,
1347+
text_before_cursor=document.text_before_cursor,
1348+
)
1349+
completions.extend([(*x, rank) for x in charsets_m])
1350+
1351+
elif suggestion['type'] == 'collation':
1352+
collations_m = self.find_matches(
1353+
word_before_cursor,
1354+
self.collations,
1355+
text_before_cursor=document.text_before_cursor,
1356+
)
1357+
completions.extend([(*x, rank) for x in collations_m])
1358+
13331359
elif suggestion["type"] == "table":
13341360
# If this is a select and columns are given, parse the columns and
13351361
# then only return tables that have one or more of the given columns.

mycli/sqlexecute.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ class SQLExecute:
105105

106106
character_sets_query = '''SHOW CHARACTER SET'''
107107

108+
collations_query = '''SHOW COLLATION'''
109+
108110
table_columns_query = """select TABLE_NAME, COLUMN_NAME from information_schema.columns
109111
where table_schema = %s
110112
order by table_name,ordinal_position"""
@@ -482,6 +484,20 @@ def character_sets(self) -> Generator[tuple, None, None]:
482484
else:
483485
yield from cur
484486

487+
def collations(self) -> Generator[tuple, None, None]:
488+
"""Yields tuples of (collation_name, )"""
489+
490+
assert isinstance(self.conn, Connection)
491+
with self.conn.cursor() as cur:
492+
_logger.debug("Collations Query. sql: %r", self.collations_query)
493+
try:
494+
cur.execute(self.collations_query)
495+
except pymysql.DatabaseError as e:
496+
_logger.error('No collations completions due to %r', e)
497+
yield ()
498+
else:
499+
yield from cur
500+
485501
def show_candidates(self) -> Generator[tuple, None, None]:
486502
assert isinstance(self.conn, Connection)
487503
with self.conn.cursor() as cur:

0 commit comments

Comments
 (0)