Skip to content
This repository was archived by the owner on Apr 14, 2022. It is now read-only.

Commit 2aed314

Browse files
committed
- Changed LineFormatter:
- Made line 0-based, as it is in `Tokenizer` and LSP. - Removed `Peek` from `TokenizerWrapper` - it isn't needed, checking current token is enough. - Simplified the code by removing all usages of `SourceLocation` and `SourceSpan`: those types aren't required and conversion to `line/character` representation is needed only at the very end of `FormatLine` method. - Added `GetLineStartIndex` and `GetLineEndIndex` methods, cause `NewLineLocation` doesn't have that information. - Fixed `GrammarFile` test. It has been taking substring of length `end.character - start.character - 1`. `Range.end` is exclusive, so length should be calculated as `end.character - start.character` - Added couple of helper methods to the `Tokenizer`
1 parent 484443d commit 2aed314

4 files changed

Lines changed: 72 additions & 116 deletions

File tree

src/Analysis/Engine/Impl/Parsing/Tokenizer.cs

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -130,17 +130,9 @@ public List<TokenInfo> ReadTokens(int characterCount) {
130130
return tokens;
131131
}
132132

133-
public object CurrentState {
134-
get {
135-
return _state;
136-
}
137-
}
138-
139-
public SourceLocation CurrentPosition {
140-
get {
141-
return IndexToLocation(CurrentIndex);
142-
}
143-
}
133+
public object CurrentState => _state;
134+
public int CurrentLine => _newLineLocations.Count;
135+
public SourceLocation CurrentPosition => IndexToLocation(CurrentIndex);
144136

145137
public SourceLocation IndexToLocation(int index) {
146138
int match = _newLineLocations.BinarySearch(new NewLineLocation(index, NewLineKind.None));
@@ -2235,6 +2227,7 @@ private static void DumpToken(Token token) {
22352227
Console.WriteLine("{0} `{1}`", token.Kind, token.Image.Replace("\r", "\\r").Replace("\n", "\\n").Replace("\t", "\\t"));
22362228
}
22372229

2230+
internal NewLineLocation GetNewLineLocation(int line) => _newLineLocations.Count == line ? new NewLineLocation(CurrentIndex, NewLineKind.None) : _newLineLocations[line];
22382231
internal NewLineLocation[] GetLineLocations() => _newLineLocations.ToArray();
22392232
internal SourceLocation[] GetCommentLocations() => _commentLocations.ToArray();
22402233

@@ -2564,11 +2557,7 @@ private bool AtBeginning {
25642557
}
25652558
}
25662559

2567-
private int CurrentIndex {
2568-
get {
2569-
return _tokenStartIndex + Math.Min(_position, _end) - _start;
2570-
}
2571-
}
2560+
private int CurrentIndex => _tokenStartIndex + Math.Min(_position, _end) - _start;
25722561

25732562
private void DiscardToken() {
25742563
CheckInvariants();

src/Analysis/Engine/Test/LineFormatterTests.cs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -356,11 +356,10 @@ public void LineContinuation() {
356356
[DataRow("foo.a() \\\r\n .b() \\\r\n .c()", ".c()", 2, 3, 7)]
357357
[DataTestMethod, Priority(0)]
358358
public void MultilineChainedCall(string code, string expected, int line, int characterStart, int characterEnd) {
359-
var edits = new LineFormatter(new StringReader(code), PythonLanguageVersion.V36).FormatLine(line + 1);
359+
var edits = new LineFormatter(new StringReader(code), PythonLanguageVersion.V36).FormatLine(line);
360360
edits.Should().OnlyHaveTextEdit(expected, (line, characterStart, line, characterEnd));
361361
}
362362

363-
364363
[DataRow("a[:, :, :, 1]")]
365364
[DataRow("a[x:y, x + 1 :y, :, 1]")]
366365
[DataRow("a[:, 1:3]")]
@@ -425,9 +424,7 @@ public void GrammarFile() {
425424
var lineFormatter = new LineFormatter(reader, PythonLanguageVersion.V37);
426425

427426
for (var i = 0; i < lines.Length; i++) {
428-
var lineNum = i + 1;
429-
430-
var edits = lineFormatter.FormatLine(lineNum);
427+
var edits = lineFormatter.FormatLine(i);
431428
edits.Should().NotBeNull().And.HaveCountLessOrEqualTo(1);
432429

433430
if (edits.Length == 0) {
@@ -442,7 +439,7 @@ public void GrammarFile() {
442439
end.line.Should().Be(i);
443440

444441
var lineText = lines[i];
445-
edit.newText.Should().Be(lineText.Substring(start.character, end.character - start.character - 1), $"because line {lineNum} should be unchanged");
442+
edit.newText.Should().Be(lineText.Substring(start.character, end.character - start.character), $"because line {i} should be unchanged");
446443
}
447444
}
448445
}
@@ -460,7 +457,7 @@ public static void AssertSingleLineFormat(string text, string expected, int line
460457
Check.ArgumentNull(nameof(expected), expected);
461458

462459
using (var reader = new StringReader(text)) {
463-
var edits = new LineFormatter(reader, languageVersion).FormatLine(line + 1);
460+
var edits = new LineFormatter(reader, languageVersion).FormatLine(line);
464461
edits.Should().OnlyHaveTextEdit(expected, (line, editStart, line, text.Split('\n')[line].Length));
465462
}
466463
}
@@ -469,7 +466,7 @@ public static void AssertNoEdits(string text, int line = 0, PythonLanguageVersio
469466
Check.ArgumentNull(nameof(text), text);
470467

471468
using (var reader = new StringReader(text)) {
472-
var edits = new LineFormatter(reader, languageVersion).FormatLine(line + 1);
469+
var edits = new LineFormatter(reader, languageVersion).FormatLine(line);
473470
edits.Should().BeEmpty();
474471
}
475472
}

src/LanguageServer/Impl/Implementation/LineFormatter.cs

Lines changed: 60 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -77,30 +77,20 @@ private void AddToken(TokenExt token) {
7777
/// <param name="includeToken">A function which returns true if the token should be added to the final list. If null, all tokens will be added.</param>
7878
/// <returns>A non-null list of tokens on that line.</returns>
7979
private List<TokenExt> TokenizeLine(int line, Func<TokenExt, bool> includeToken = null) {
80-
Check.Argument(nameof(line), () => line > 0);
80+
Check.Argument(nameof(line), () => line >= 0);
8181

8282
var extraToken = true;
83-
84-
var peeked = _tokenizer.Peek();
85-
while (peeked != null && (peeked.Line <= line || extraToken)) {
86-
var token = _tokenizer.Next();
87-
88-
if (includeToken == null || includeToken(token)) {
89-
AddToken(token);
83+
for (var current = _tokenizer.Current ?? _tokenizer.Next(); current != null && (current.Line <= line || extraToken); current = _tokenizer.Next()) {
84+
if (includeToken == null || includeToken(current)) {
85+
AddToken(current);
9086
}
9187

92-
peeked = _tokenizer.Peek();
93-
94-
if (token.Line > line && !token.IsIgnored) {
88+
if (current.Line > line && !current.IsIgnored) {
9589
extraToken = false;
9690
}
9791
}
9892

99-
if (!_lineTokens.TryGetValue(line, out List<TokenExt> tokens)) {
100-
return new List<TokenExt>();
101-
}
102-
103-
return tokens;
93+
return _lineTokens.TryGetValue(line, out var tokens) ? tokens : new List<TokenExt>();
10494
}
10595

10696
/// <summary>
@@ -109,11 +99,10 @@ private List<TokenExt> TokenizeLine(int line, Func<TokenExt, bool> includeToken
10999
/// <param name="line">One-indexed line number.</param>
110100
/// <returns>A list of TextEdits needed to format the line.</returns>
111101
public TextEdit[] FormatLine(int line) {
112-
if (line < 1) {
102+
if (line < 0) {
113103
return NoEdits;
114104
}
115-
116-
// Keep ExplictLineJoin because it has text associated with it.
105+
// Keep ExplicitLineJoin because it has text associated with it.
117106
var tokens = TokenizeLine(line, t => !t.IsIgnored || t.Kind == TokenKind.ExplicitLineJoin);
118107

119108
if (tokens.Count == 0) {
@@ -122,14 +111,14 @@ public TextEdit[] FormatLine(int line) {
122111

123112
var builder = new StringBuilder();
124113
var first = tokens[0];
125-
var beginCol = first.Span.Start.Column;
114+
var startIndex = first.Span.Start;
126115
var startIdx = 0;
127116

128117
if (first.IsMultilineString) {
129118
// If the first token is a multiline string, start the edit afterward,
130119
// skip looking at the first token, and ensure that there's a space
131120
// after it if needed (i.e. in the case of a following comment).
132-
beginCol = first.Span.End.Column;
121+
startIndex = first.Span.End;
133122
startIdx = 1;
134123
if (builder.Length == 0) {
135124
builder.Append(' ');
@@ -357,7 +346,7 @@ public TextEdit[] FormatLine(int line) {
357346
}
358347
}
359348

360-
var endCol = _tokenizer.EndOfLineCol(line);
349+
var endIndex = _tokenizer.GetLineEndIndex(line);
361350

362351
var afterLast = tokens.Last().Next;
363352
if (afterLast != null && afterLast.IsMultilineString) {
@@ -368,16 +357,17 @@ public TextEdit[] FormatLine(int line) {
368357
}
369358

370359
builder.TrimEnd();
371-
var newText = builder.ToString();
372-
373-
if (newText.Length == 0) {
360+
if (builder.Length == 0) {
374361
return NoEdits;
375362
}
376363

364+
var newText = builder.ToString();
365+
var lineStartIndex = _tokenizer.GetLineStartIndex(line);
366+
377367
var edit = new TextEdit {
378368
range = new Range {
379-
start = new SourceLocation(line, beginCol),
380-
end = new SourceLocation(line, endCol)
369+
start = new Position{ line = line, character = startIndex - lineStartIndex },
370+
end = new Position{ line = line, character = endIndex - lineStartIndex }
381371
},
382372
newText = newText
383373
};
@@ -391,13 +381,25 @@ private static void AppendTokenEnsureWhiteSpacesAround(StringBuilder builder, To
391381
.EnsureEndsWithWhiteSpace();
392382

393383
private class TokenExt {
394-
public Token Token { get; set; }
395-
public SourceSpan Span { get; set; }
396-
public int Line => Span.End.Line;
384+
public TokenExt(Token token, string precedingWhitespace, IndexSpan span, int line, bool isMultiLine,
385+
TokenExt prev) {
386+
Token = token;
387+
PrecedingWhitespace = precedingWhitespace;
388+
Span = span;
389+
Line = line;
390+
Prev = prev;
391+
IsMultilineString = IsString && isMultiLine;
392+
}
393+
394+
public Token Token { get; }
395+
public IndexSpan Span { get; }
396+
public int Line { get; }
397397
public TokenExt Inside { get; set; }
398-
public TokenExt Prev { get; set; }
398+
public TokenExt Prev { get; }
399399
public TokenExt Next { get; set; }
400-
public string PrecedingWhitespace { get; set; }
400+
public string PrecedingWhitespace { get; }
401+
public bool IsMultilineString { get; }
402+
401403
public TokenKind Kind => Token.Kind;
402404

403405
public override string ToString() => Token.VerbatimImage;
@@ -434,9 +436,7 @@ public bool MatchesClose(TokenExt other) {
434436
public bool IsKeyword => (Kind >= TokenKind.FirstKeyword && Kind <= TokenKind.LastKeyword) || Kind == TokenKind.KeywordAsync || Kind == TokenKind.KeywordAwait;
435437

436438
public bool IsString => Kind == TokenKind.Constant && Token != Tokens.NoneToken && (Token.Value is string || Token.Value is AsciiString);
437-
438-
public bool IsMultilineString => Span.Start.Line != Span.End.Line && IsString;
439-
439+
440440
public bool IsSimpleSliceToLeft {
441441
get {
442442
if (Kind != TokenKind.Colon) {
@@ -540,26 +540,17 @@ public TokenExt NextNonIgnored {
540540
private class TokenizerWrapper {
541541
private readonly Tokenizer _tokenizer;
542542
private readonly Stack<TokenExt> _insides = new Stack<TokenExt>();
543-
private TokenExt _peeked = null;
544-
private TokenExt _prev = null;
543+
public TokenExt Current { get; private set; }
545544

546545
public TokenizerWrapper(Tokenizer tokenizer) {
547546
_tokenizer = tokenizer;
548547
}
549548

550549
/// <summary>
551-
/// Returns the next token, and advances the tokenizer. Note that
552-
/// the returned token's Next will not be set until the tokenizer
553-
/// actually reads that next token.
550+
/// Returns the next token, and advances the tokenizer.
554551
/// </summary>
555552
/// <returns>The next token</returns>
556553
public TokenExt Next() {
557-
if (_peeked != null) {
558-
var tmp = _peeked;
559-
_peeked = null;
560-
return tmp;
561-
}
562-
563554
if (_tokenizer.IsEndOfFile) {
564555
return null;
565556
}
@@ -571,14 +562,18 @@ public TokenExt Next() {
571562
}
572563

573564
var tokenSpan = _tokenizer.TokenSpan;
574-
var sourceSpan = new SourceSpan(_tokenizer.IndexToLocation(tokenSpan.Start), _tokenizer.IndexToLocation(tokenSpan.End));
575-
576-
var tokenExt = new TokenExt {
577-
Token = token,
578-
PrecedingWhitespace = _tokenizer.PreceedingWhiteSpace,
579-
Span = sourceSpan,
580-
Prev = _prev
581-
};
565+
var line = _tokenizer.CurrentLine;
566+
var lineStart = GetLineStartIndex(line);
567+
var isMultiLine = tokenSpan.Start < lineStart;
568+
569+
var tokenExt = new TokenExt(
570+
token,
571+
_tokenizer.PreceedingWhiteSpace,
572+
tokenSpan,
573+
line,
574+
isMultiLine,
575+
Current
576+
);
582577

583578
if (tokenExt.IsClose) {
584579
if (_insides.Count == 0 || !_insides.Peek().MatchesClose(tokenExt)) {
@@ -597,52 +592,27 @@ public TokenExt Next() {
597592
_insides.Push(tokenExt);
598593
}
599594

600-
if (_prev != null) {
601-
_prev.Next = tokenExt;
595+
if (Current != null) {
596+
Current.Next = tokenExt;
602597
}
603598

604-
_prev = tokenExt;
599+
Current = tokenExt;
605600
return tokenExt;
606601
}
607602

608603
/// <summary>
609-
/// Returns the next token without advancing the tokenizer. Note that
610-
/// the returned token's Next will not be set until the tokenizer
611-
/// actually reads that next token.
604+
/// Gets the index of the start of the line
612605
/// </summary>
613-
/// <returns>The next token</returns>
614-
public TokenExt Peek() {
615-
if (_peeked != null) {
616-
return _peeked;
617-
}
618-
619-
_peeked = Next();
620-
return _peeked;
621-
}
606+
/// <param name="line">Line number.</param>
607+
public int GetLineStartIndex(int line) => line > 0 ? _tokenizer.GetNewLineLocation(line - 1).EndIndex : 0;
622608

623609
/// <summary>
624-
/// Gets the one-indexed column number of the end of a line. The
625-
/// tokenizer must be past the line's newline (or at EOF) in order
626-
/// for this function to work.
610+
/// Gets the index of the end of the line, excluding line break
627611
/// </summary>
628-
/// <param name="line">A one-indexed line number.</param>
629-
/// <returns>One-indexed column number for the end of the line</returns>
630-
public int EndOfLineCol(int line) {
631-
if (line > _tokenizer.CurrentPosition.Line || (line == _tokenizer.CurrentPosition.Line && !_tokenizer.IsEndOfFile)) {
632-
throw new ArgumentException("tokenizer must be at EOF or past line's newline", nameof(line));
633-
}
634-
635-
var idx = line - 1;
636-
var lines = _tokenizer.GetLineLocations();
637-
638-
if (idx < lines.Length) {
639-
var nlLoc = lines[idx];
640-
641-
var sourceLocation = _tokenizer.IndexToLocation(nlLoc.EndIndex - 1);
642-
return sourceLocation.Column;
643-
}
644-
645-
return _tokenizer.CurrentPosition.Column;
612+
/// <param name="line">Line number.</param>
613+
public int GetLineEndIndex(int line) {
614+
var newLineLocation = _tokenizer.GetNewLineLocation(line);
615+
return newLineLocation.EndIndex - newLineLocation.Kind.GetSize();
646616
}
647617
}
648618

src/LanguageServer/Impl/Implementation/Server.OnTypeFormatting.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ public override async Task<TextEdit[]> DocumentOnTypeFormatting(DocumentOnTypeFo
2626

2727
switch (@params.ch) {
2828
case "\n":
29-
targetLine = @params.position.line;
29+
targetLine = @params.position.line - 1;
3030
break;
3131
case ";":
32-
targetLine = @params.position.line + 1;
32+
targetLine = @params.position.line;
3333
break;
3434
default:
3535
throw new ArgumentException("unexpected trigger character", nameof(@params.ch));

0 commit comments

Comments
 (0)