From 3389c876775d76327b557bcc8e89cfc2fef103ae Mon Sep 17 00:00:00 2001 From: kevmoo Date: Mon, 9 Mar 2026 21:20:24 -0700 Subject: [PATCH] Optimize git object parsing with StringScanner Migrates custom line-by-line reading logic in `tag.dart` and `commit.dart` to use native `StringScanner` capabilities with multiline regular expressions. This improves parsing robustness and performance. Fixes handle edge cases including: - Parsing multiline headers (e.g., PGP Signatures) correctly. - Safely handling missing trailing blank lines and empty message bodies in rev-list outputs. --- lib/src/commit.dart | 40 ++++++++++++++++++++-------------------- lib/src/tag.dart | 12 +++++------- lib/src/util.dart | 21 ++++----------------- 3 files changed, 29 insertions(+), 44 deletions(-) diff --git a/lib/src/commit.dart b/lib/src/commit.dart index b2c5dfa8..b2096490 100644 --- a/lib/src/commit.dart +++ b/lib/src/commit.dart @@ -43,6 +43,9 @@ class Commit { final commits = {}; while (!scanner.isDone) { + if (scanner.scan(RegExp(r'\r?\n'))) { + continue; + } final tuple = _parse(scanner, true); commits[tuple.sha!] = tuple.commit; } @@ -57,37 +60,34 @@ class Commit { final headers = >{}; final startSpot = scanner.position; - var lastLine = scanner.readNextLine(); - - while (lastLine != null && lastLine.isNotEmpty) { - final allHeaderMatches = headerRegExp.allMatches(lastLine); - if (allHeaderMatches.isNotEmpty) { - final match = allHeaderMatches.single; - assert(match.groupCount == 2); - final header = match.group(1)!; - final value = match.group(2)!; - headers.putIfAbsent(header, () => []).add(value); - } + while (scanner.scan(headerRegExp)) { + final match = scanner.lastMatch!; + final header = match.group(1)!; + final value = match.group(2)!; - lastLine = scanner.readNextLine()!; + headers.putIfAbsent(header, () => []).add(value); } - assert(lastLine!.isEmpty); + // consume the blank line but it might not exist if the commit has no body + // at all, or might be empty. + scanner.scan(RegExp(r'\r?\n')); - String message; + var message = ''; if (isRevParse) { final msgLines = []; - lastLine = scanner.readNextLine(); - const revParseMessagePrefix = ' '; - while (lastLine != null && lastLine.startsWith(revParseMessagePrefix)) { - msgLines.add(lastLine.substring(revParseMessagePrefix.length)); - lastLine = scanner.readNextLine(); + while (scanner.scan(RegExp(r' ([^\r\n]*)(?:\r?\n|$)'))) { + msgLines.add(scanner.lastMatch!.group(1)!); + if (!scanner.lastMatch!.group(0)!.endsWith('\n')) { + break; + } } - message = msgLines.join('\n'); + if (msgLines.isNotEmpty) { + message = msgLines.join('\n'); + } } else { message = scanner.rest; scanner.position = scanner.string.length; diff --git a/lib/src/tag.dart b/lib/src/tag.dart index 10b587cd..679d1536 100644 --- a/lib/src/tag.dart +++ b/lib/src/tag.dart @@ -22,19 +22,17 @@ class Tag { final scanner = StringScanner(content); - var lastLine = scanner.readNextLine()!; - - while (lastLine.isNotEmpty) { - final match = headerRegExp.allMatches(lastLine).single; - assert(match.groupCount == 2); + while (scanner.scan(headerRegExp)) { + final match = scanner.lastMatch!; final header = match.group(1)!; final value = match.group(2)!; headers.putIfAbsent(header, () => []).add(value); - - lastLine = scanner.readNextLine()!; } + // consume the blank line that separates headers from message + scanner.scan(RegExp(r'\r?\n')); + String objectSha; String type; String tag; diff --git a/lib/src/util.dart b/lib/src/util.dart index f1a4f2e1..c1488bbd 100644 --- a/lib/src/util.dart +++ b/lib/src/util.dart @@ -1,11 +1,12 @@ -import 'package:string_scanner/string_scanner.dart'; - import 'bot.dart'; import 'top_level.dart'; const shaRegexPattern = '[a-f0-9]{40}'; -final headerRegExp = RegExp(r'^([a-z]+) (.+)$'); +final headerRegExp = RegExp( + r'^([a-z]+) ((?:[^\r\n]|\r?\n[ \t])+)\r?\n', + multiLine: true, +); void requireArgumentValidSha1(String value, String argName) { metaRequireArgumentNotNullOrEmpty(argName); @@ -16,17 +17,3 @@ void requireArgumentValidSha1(String value, String argName) { throw ArgumentError.value(value, argName, message); } } - -extension StringScannerX on StringScanner { - String? readNextLine() { - if (isDone) return null; - if (scan(_lineRegexp)) { - return lastMatch![1]; - } - final restStr = rest; - position = string.length; - return restStr; - } -} - -final _lineRegexp = RegExp(r'([^\r\n]*)\r?\n');