33//!
44//! *leading comment* a comment that
55//! precedes an SQL Statement.
6- use std:: fmt;
6+ use std:: { fmt, path :: Path } ;
77
8- use crate :: ast:: ParsedSqlFile ;
8+ use crate :: { ast:: ParsedSqlFile , files :: { SqlFile , SqlFilesList } } ;
99
1010/// Structure for holding a location in the file. Assumes file is first split by
1111/// lines and then split by characters (column)
@@ -135,23 +135,36 @@ impl Comment {
135135/// Enum for returning errors withe Comment parsing
136136#[ derive( Debug ) ]
137137pub enum CommentError {
138- /// Found a block terminator `*/` without a matching opener `/*`
139- UnmatchedBlockCommentStart {
140- /// Returns the location of the block terminator found
138+ /// Found a multiline comment terminator `*/` without a matching opener `/*`
139+ UnmatchedMultilineCommentStart {
140+ /// Returns the location of the terminator found
141141 location : Location ,
142142 } ,
143+ /// Found a multiline comment that is not properly terminated before EOF
144+ UnterminatedMultiLineComment {
145+ /// Returns the location of where the multiline comment started
146+ start : Location
147+ }
143148}
144149
145150impl fmt:: Display for CommentError {
146151 fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
147152 match self {
148- CommentError :: UnmatchedBlockCommentStart { location } => {
153+ Self :: UnmatchedMultilineCommentStart { location } => {
149154 write ! (
150155 f,
151156 "unmatched block comment start at line {}, column {}" ,
152157 location. line( ) ,
153158 location. column( )
154159 )
160+ } ,
161+ Self :: UnterminatedMultiLineComment { start } => {
162+ write ! (
163+ f,
164+ "unterminated block comment with start at line {}, column {}" ,
165+ start. line( ) ,
166+ start. column( ) ,
167+ )
155168 }
156169 }
157170 }
@@ -221,12 +234,13 @@ impl Comments {
221234 }
222235
223236 /// Build all leading comments from a parsed SQL file
224- ///
237+ ///
225238 /// # Parameters
226239 /// - `file`: the [`ParsedSqlFile`] that needs to be parsed for comments
227- ///
240+ ///
228241 /// # Errors
229- /// - Will return [`CommentError::UnmatchedBlockCommentStart`] if a comment does not have an opening `/*`
242+ /// - Will return [`CommentError::UnmatchedBlockCommentStart`] if a comment
243+ /// does not have an opening `/*`
230244 pub fn parse_all_comments_from_file ( file : & ParsedSqlFile ) -> CommentResult < Self > {
231245 let src = file. content ( ) ;
232246 let comments = Self :: scan_comments ( src) ?;
@@ -237,76 +251,115 @@ impl Comments {
237251 ///
238252 /// # Parameters
239253 /// - `src` which is the `SQL` file content as a [`str`]
254+ ///
255+ /// # Errors
256+ /// - `UnmatchedMultilineCommentStart` : will return error if unable to find a starting `/*` for a multiline comment
257+ /// - `UnterminatedMultiLineComment` : will return error if there is an unterminated multiline comment, found at EOF
240258 pub fn scan_comments ( src : & str ) -> CommentResult < Self > {
241259 let mut comments = Vec :: new ( ) ;
242- let mut current_line = 0u64 ;
243- let mut current_column = 0u64 ;
244260
245261 let mut start_line = 0u64 ;
246- let mut start_column = 0u64 ;
247-
248- let mut single_line = String :: new ( ) ;
249- let mut multi_line = String :: new ( ) ;
250-
251- let mut src_state_machine = src. chars ( ) . peekable ( ) ;
252- while !src_state_machine. peek ( ) . is_none ( ) {
253- if let Some ( c) = src_state_machine. next ( ) {
254- match c {
255- '-' => {
256- if single_line. is_empty ( ) {
257- single_line. push ( c) ;
258- start_column = current_column;
259- start_line = current_line;
260- } else if single_line. chars ( ) . last ( ) == Some ( '-' ) {
261- single_line. push ( c) ;
262- }
263- } ,
264- '/' => {
265- if multi_line. is_empty ( ) {
266- multi_line. push ( c) ;
267- start_column = current_column;
268- start_line = current_line;
269- } else if multi_line. chars ( ) . last ( ) == Some ( '*' ) {
270- multi_line. push ( c) ;
271- comments. push ( Comment { kind : CommentKind :: MultiLine ( multi_line. clone ( ) ) , span : Span { start : Location { line : start_line, column : start_column } , end : Location { line : current_line, column : current_column+1 } } } ) ;
272- multi_line. clear ( ) ;
273- }
274- } ,
275- '*' => {
276- if !multi_line. is_empty ( ) {
277- multi_line. push ( c) ;
278- }
279- } ,
280- '\n' => {
281- if !single_line. is_empty ( ) {
282- comments. push ( Comment { kind : CommentKind :: SingleLine ( single_line. clone ( ) ) , span : Span { start : Location { line : start_line, column : start_column+1 } , end : Location { line : current_line, column : current_column } } } ) ;
283- single_line. clear ( ) ;
284- } else if !multi_line. is_empty ( ) {
285- multi_line. push ( c) ;
286- }
287- } ,
288- _ => {
289- if !single_line. is_empty ( ) {
290- single_line. push ( c) ;
291- } else if !multi_line. is_empty ( ) {
292- multi_line. push ( c) ;
293- }
262+ let mut start_col = 0u64 ;
263+
264+ let mut line = 0u64 ;
265+ let mut col = 0u64 ;
266+
267+ let mut in_single = false ;
268+ let mut in_block = false ;
269+
270+
271+ let mut buf = String :: new ( ) ;
272+
273+ let mut chars = src. chars ( ) . peekable ( ) ;
274+
275+
276+ while let Some ( c) = chars. next ( ) {
277+ match ( in_single, in_block, c) {
278+ ( false , false , '-' ) => {
279+ if chars. peek ( ) . copied ( ) == Some ( '-' ) {
280+ chars. next ( ) ;
281+ in_single = true ;
282+ start_line = line;
283+ start_col = col;
284+ buf. clear ( ) ;
285+ buf. push ( '-' ) ;
286+ buf. push ( '-' ) ;
287+ col += 1 ;
288+ }
289+ } ,
290+ ( false , false , '/' ) => {
291+ if chars. peek ( ) . copied ( ) == Some ( '*' ) {
292+ chars. next ( ) ;
293+ in_block = true ;
294+ start_line = line;
295+ start_col = col;
296+ buf. clear ( ) ;
297+ col += 1 ;
294298 }
295- }
296- if c == '\n' {
297- current_column = 0 ;
298- current_line += 1 ;
299- } else {
300- current_column += 1 ;
301- }
299+ } ,
300+ ( false , false , '*' ) => {
301+ if chars. peek ( ) . copied ( ) == Some ( '*' ) {
302+ let loc = Location :: new ( line, col) ;
303+ return Err ( CommentError :: UnmatchedMultilineCommentStart { location : loc } ) ;
304+ }
305+ } ,
306+ ( true , false , '\n' ) => {
307+ let end_loc = Location :: new ( line, col) ;
308+ comments. push ( Comment :: new (
309+ CommentKind :: SingleLine ( buf. clone ( ) ) ,
310+ Span :: new ( Location { line : start_line, column : start_col} , end_loc) ,
311+ ) ) ;
312+ in_single = false ;
313+ buf. clear ( ) ;
314+ } ,
315+ ( false , true , '*' ) => {
316+ if chars. peek ( ) . copied ( ) == Some ( '/' ) {
317+ chars. next ( ) ;
318+ buf. push ( '*' ) ;
319+ buf. push ( '/' ) ;
320+ let end_loc = Location :: new ( line, col + 1 ) ;
321+ comments. push ( Comment :: new (
322+ CommentKind :: MultiLine ( buf. trim ( ) . to_string ( ) ) ,
323+ Span :: new ( Location { line : start_line, column : start_col} , end_loc) ,
324+ ) ) ;
325+ in_block = false ;
326+ buf. clear ( ) ;
327+ col += 1 ;
328+ } else {
329+ buf. push ( '*' ) ;
330+ }
331+ } ,
332+ ( false , true , '\n' ) => {
333+ buf. push ( '\n' ) ;
334+ } ,
335+ ( false , true , ch) | ( true , false , ch) => {
336+ buf. push ( ch) ;
337+ } ,
338+ ( _, _, _) => unreachable ! ( "cannot be in single-line and block comment at once" ) ,
339+ }
340+ if c == '\n' {
341+ line += 1 ;
342+ col = 0 ;
343+ } else {
344+ col += 1 ;
302345 }
303346 }
304347
348+ // EOF: close any open comments
349+ if in_block {
350+ return Err ( CommentError :: UnterminatedMultiLineComment { start : Location { line : start_line, column : start_col} } ) ;
351+ }
352+
353+ if in_single && !buf. is_empty ( ) {
354+ let end_loc = Location :: new ( line, col) ;
355+ comments. push ( Comment :: new (
356+ CommentKind :: SingleLine ( buf. trim_end ( ) . to_string ( ) ) ,
357+ Span :: new ( Location { line : start_line, column : start_col} , end_loc) ,
358+ ) ) ;
359+ }
360+
305361 Ok ( Self { comments } )
306362 }
307- /// Parse single line comments
308-
309- /// Parse multi line comments
310363
311364 /// Getter method for retrieving the Vec of [`Comment`]
312365 #[ must_use]
@@ -316,7 +369,6 @@ impl Comments {
316369}
317370
318371#[ cfg( test) ]
319- use super :: * ;
320372
321373#[ test]
322374fn location_new_and_default ( ) {
@@ -373,3 +425,33 @@ fn multiline_comment_span() {
373425 assert_eq ! ( comment. span. start. line, 1 ) ;
374426 assert_eq ! ( comment. span. end. line, 2 ) ;
375427}
428+
429+ #[ test]
430+ fn parse_single_line_comments ( ) {
431+ use std:: fs;
432+ use crate :: files:: SqlFileSet ;
433+ use crate :: ast:: ParsedSqlFileSet ;
434+ let path = Path :: new ( "sql_files" ) ;
435+ let set = SqlFileSet :: new ( path, None ) . unwrap ( ) ;
436+ let parsed_set = ParsedSqlFileSet :: parse_all ( set) . unwrap ( ) ;
437+ for file in parsed_set. files ( ) . iter ( ) {
438+ let parsed_comments = Comments :: parse_all_comments_from_file ( file) . unwrap ( ) ;
439+ match file. file ( ) . path ( ) . to_str ( ) . unwrap ( ) {
440+ "sql_files/with_comments.sql" => {
441+ println ! ( "succeeded!" ) ;
442+ } ,
443+ "sql_files/without_comments.sql" => {
444+
445+ } ,
446+ _ => unreachable ! ( "This shouldn't be accessible if directory parsed correctly" )
447+ }
448+ }
449+
450+
451+ }
452+
453+
454+ #[ test]
455+ fn parse_multi_line_comments ( ) {
456+
457+ }
0 commit comments