forked from cursorless-dev/cursorless
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfindSurroundingPairParseTreeBased.ts
More file actions
254 lines (231 loc) · 8.58 KB
/
findSurroundingPairParseTreeBased.ts
File metadata and controls
254 lines (231 loc) · 8.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import { Range, TextDocument, TextEditor } from "@cursorless/common";
import type { SyntaxNode } from "web-tree-sitter";
import {
SimpleSurroundingPairName,
SurroundingPairScopeType,
} from "@cursorless/common";
import { getNodeRange } from "../../../util/nodeSelectors";
import { isContainedInErrorNode } from "../../../util/treeSitterUtils";
import { extractSelectionFromSurroundingPairOffsets } from "./extractSelectionFromSurroundingPairOffsets";
import { findSurroundingPairCore } from "./findSurroundingPairCore";
import { getIndividualDelimiters } from "./getIndividualDelimiters";
import {
IndividualDelimiter,
Offsets,
PossibleDelimiterOccurrence,
} from "./types";
/**
* Implements the version of the surrounding pair finding algorithm that
* leverages the parse tree. We use this algorithm when we are in a language
* for which we have parser support, unless we are in a string or comment, where
* we revert to text-based.
*
* The approach is actually roughly the same as the approach we use when we do
* not have access to a parse tree. In both cases we create a list of
* candidate delimiters in the region of the selection, and then pass them to
* the core algorithm, implemented by findSurroundingPairCore.
*
* To generate a list of delimiters to pass to findSurroundingPairCore, we repeatedly walk up the parse tree starting at the given node. Each time, we ask for all descendant tokens whose type is that of one of the delimiters that we're looking for.
* repeatedly walk up the parse tree starting at the given node. Each time, we
* ask for all descendant tokens whose type is that of one of the delimiters
* that we're looking for, and pass this list of tokens to
* findSurroundingPairCore.
*
* Note that walking up the hierarchy one parent at a time is just an
* optimization to avoid handling the entire file if we don't need to. The
* result would be the same if we just operated on the root node of the parse
* tree, just slower if our delimiter pair is actually contained in a small
* piece of a large file.
*
* The main benefits of the parse tree-based approach over the text-based
* approach are the following:
*
* - We can leverage the lexer to ensure that we only consider proper language tokens
* - We can let the language normalize surface forms of delimiter types, so eg
* in Python the leading `f"` on an f-string just has type `"` like any other
* string.
* - We can more easily narrow the scope of our search by walking up the parse tree
* - The actual lexing is done in fast wasm code rather than using a regex
* - We can disambiguate delimiters whose opening and closing symbol is the
* same (eg `"`). Without a parse tree we have to guess whether it is an
* opening or closing quote.
*
* @param editor The text editor containing the selection
* @param selection The selection to find surrounding pair around
* @param node A parse tree node overlapping with the selection
* @param delimiters The acceptable surrounding pair names
* @returns The newly expanded selection, including editor info
*/
export function findSurroundingPairParseTreeBased(
editor: TextEditor,
selection: Range,
node: SyntaxNode,
delimiters: SimpleSurroundingPairName[],
scopeType: SurroundingPairScopeType,
) {
const document: TextDocument = editor.document;
const individualDelimiters = getIndividualDelimiters(
document.languageId,
delimiters,
);
const delimiterTextToDelimiterInfoMap = Object.fromEntries(
individualDelimiters.map((individualDelimiter) => [
individualDelimiter.text,
individualDelimiter,
]),
);
const selectionOffsets = {
start: document.offsetAt(selection.start),
end: document.offsetAt(selection.end),
};
/**
* Context to pass to nested call
*/
const context: Context = {
delimiterTextToDelimiterInfoMap,
individualDelimiters,
delimiters,
selectionOffsets,
scopeType,
};
// Walk up the parse tree from parent to parent until we find a node whose
// descendants contain an appropriate matching pair.
for (
let currentNode: SyntaxNode | null = node;
currentNode != null;
currentNode = currentNode.parent
) {
// Just bail early if the node doesn't completely contain our selection as
// it is a lost cause.
if (!getNodeRange(currentNode).contains(selection)) {
continue;
}
// Here we apply the core algorithm
const pairOffsets = findSurroundingPairContainedInNode(
context,
currentNode,
);
// And then perform postprocessing
if (pairOffsets != null) {
return extractSelectionFromSurroundingPairOffsets(
document,
0,
pairOffsets,
);
}
}
return null;
}
/**
* Context to pass to nested call
*/
interface Context {
/**
* Map from raw text to info about the delimiter at that point
*/
delimiterTextToDelimiterInfoMap: {
[k: string]: IndividualDelimiter;
};
/**
* A list of all opening / closing delimiters that we are considering
*/
individualDelimiters: IndividualDelimiter[];
/**
* The names of the delimiters that we're considering
*/
delimiters: SimpleSurroundingPairName[];
/**
* The offsets of the selection
*/
selectionOffsets: Offsets;
scopeType: SurroundingPairScopeType;
}
/**
* This function is called at each node as we walk up the ancestor hierarchy
* from our start node. It finds all possible delimiters descending from the
* node and passes them to the findSurroundingPairCore algorithm.
*
* @param context Extra context to be used by this function
* @param node The current node to consider
* @returns The offsets of the matching surrounding pair, or `null` if none is found
*/
function findSurroundingPairContainedInNode(
context: Context,
node: SyntaxNode,
) {
const {
delimiterTextToDelimiterInfoMap,
individualDelimiters,
delimiters,
selectionOffsets,
scopeType,
} = context;
/**
* A list of all delimiter nodes descending from `node`, as determined by
* their type.
* Handles the case of error nodes with no text. https://github.com/cursorless-dev/cursorless/issues/688
*/
const possibleDelimiterNodes = node
.descendantsOfType(individualDelimiters.map(({ text }) => text))
.filter((node) => !(node.text === "" && node.hasError()));
/**
* A list of all delimiter occurrences, generated from the delimiter nodes.
*/
const delimiterOccurrences: PossibleDelimiterOccurrence[] =
possibleDelimiterNodes.map((delimiterNode) => {
return {
offsets: {
start: delimiterNode.startIndex,
end: delimiterNode.endIndex,
},
get delimiterInfo() {
const delimiterInfo =
delimiterTextToDelimiterInfoMap[delimiterNode.type];
// Distinguish between a greater-than sign and an angle bracket by
// looking at its position within its parent node.
if (
delimiterInfo.delimiter === "angleBrackets" &&
inferDelimiterSide(delimiterNode) !== delimiterInfo.side &&
!isContainedInErrorNode(delimiterNode)
) {
return undefined;
}
// NB: If side is `"unknown"`, ie we cannot determine whether
// something is a left or right delimiter based on its text / type
// alone (eg `"`), we assume it is a left delimiter if it is the
// first child of its parent, and right delimiter otherwise. This
// approach might not always work, but seems to work in the
// languages we've tried.
const side =
delimiterInfo.side === "unknown" && scopeType.forceDirection == null
? inferDelimiterSide(delimiterNode)
: delimiterInfo.side;
return {
...delimiterInfo,
side,
};
},
};
});
// Just run core algorithm once we have our list of delimiters.
return findSurroundingPairCore(
scopeType,
delimiterOccurrences,
delimiters,
selectionOffsets,
// If we're not the root node of the parse tree (ie `node.parent !=
// null`), we tell `findSurroundingPairCore` to bail if it finds a
// delimiter adjacent to our selection, but doesn't find its opposite
// delimiter within our list. We do so because it's possible that the
// adjacent delimiter's opposite might be found when we run again on a
// parent node later.
node.parent != null,
);
}
function inferDelimiterSide(delimiterNode: SyntaxNode) {
return delimiterNode.parent?.firstChild?.equals(delimiterNode)
? "left"
: delimiterNode.parent?.lastChild?.equals(delimiterNode)
? "right"
: ("unknown" as const);
}