1- //! Content search using regex pattern matching .
1+ //! Content search using the ripgrep crate family .
22//!
3- //! Uses [`regex `] for pattern compilation and [`ignore `] for directory
4- //! traversal that respects `.gitignore` rules. Binary files are silently
5- //! skipped .
3+ //! Uses [`grep_regex `] for pattern compilation, [`grep_searcher `] for efficient
4+ //! line-oriented searching (with binary detection and streaming I/O), and
5+ //! [`ignore`] for directory traversal that respects `.gitignore` rules .
66
77use std:: path:: { Path , PathBuf } ;
88
9+ use grep_matcher:: Matcher ;
10+ use grep_regex:: RegexMatcherBuilder ;
11+ use grep_searcher:: sinks:: UTF8 ;
12+ use grep_searcher:: { BinaryDetection , SearcherBuilder } ;
13+
914// ---------------------------------------------------------------------------
1015// Types
1116// ---------------------------------------------------------------------------
@@ -50,14 +55,17 @@ pub struct GrepOptions {
5055
5156/// Search file contents by regex pattern.
5257///
58+ /// Uses the ripgrep crate family (`grep-regex`, `grep-searcher`, `ignore`)
59+ /// for efficient, streaming search with built-in binary detection.
60+ ///
5361/// # Errors
5462///
5563/// - Invalid regex pattern.
5664/// - `path` does not exist or is inaccessible.
5765pub fn search ( opts : & GrepOptions ) -> crab_common:: Result < Vec < GrepMatch > > {
58- let re = regex :: RegexBuilder :: new ( & opts . pattern )
66+ let matcher = RegexMatcherBuilder :: new ( )
5967 . case_insensitive ( opts. case_insensitive )
60- . build ( )
68+ . build ( & opts . pattern )
6169 . map_err ( |e| crab_common:: Error :: Other ( format ! ( "invalid regex: {e}" ) ) ) ?;
6270
6371 let file_glob = if let Some ( ref glob_pat) = opts. file_glob {
@@ -71,25 +79,23 @@ pub fn search(opts: &GrepOptions) -> crab_common::Result<Vec<GrepMatch>> {
7179 None
7280 } ;
7381
74- let mut matches = Vec :: new ( ) ;
7582 let max = if opts. max_results == 0 {
7683 usize:: MAX
7784 } else {
7885 opts. max_results
7986 } ;
8087
88+ let mut all_matches = Vec :: new ( ) ;
89+
8190 if opts. path . is_file ( ) {
82- // Single file search
83- if let Ok ( file_matches) = search_file ( & opts. path , & re, opts. context_lines ) {
84- for m in file_matches {
85- if matches. len ( ) >= max {
86- break ;
87- }
88- matches. push ( m) ;
89- }
90- }
91+ search_file_grep (
92+ & opts. path ,
93+ & matcher,
94+ opts. context_lines ,
95+ max,
96+ & mut all_matches,
97+ ) ?;
9198 } else {
92- // Directory walk
9399 let mut walker = ignore:: WalkBuilder :: new ( & opts. path ) ;
94100 walker
95101 . hidden ( true )
@@ -99,7 +105,7 @@ pub fn search(opts: &GrepOptions) -> crab_common::Result<Vec<GrepMatch>> {
99105 . parents ( opts. respect_gitignore ) ;
100106
101107 for entry in walker. build ( ) . flatten ( ) {
102- if matches . len ( ) >= max {
108+ if all_matches . len ( ) >= max {
103109 break ;
104110 }
105111
@@ -119,61 +125,122 @@ pub fn search(opts: &GrepOptions) -> crab_common::Result<Vec<GrepMatch>> {
119125 }
120126 }
121127
122- if let Ok ( file_matches ) = search_file ( path , & re , opts . context_lines ) {
123- for m in file_matches {
124- if matches . len ( ) >= max {
125- break ;
126- }
127- matches . push ( m ) ;
128- }
129- }
128+ let remaining = max - all_matches . len ( ) ;
129+ search_file_grep (
130+ path ,
131+ & matcher ,
132+ opts . context_lines ,
133+ remaining ,
134+ & mut all_matches ,
135+ ) ? ;
130136 }
131137 }
132138
133- Ok ( matches )
139+ Ok ( all_matches )
134140}
135141
136- /// Search a single file and return all matches.
137- ///
138- /// # Errors
142+ // ---------------------------------------------------------------------------
143+ // Internal: file-level search using grep-searcher
144+ // ---------------------------------------------------------------------------
145+
146+ /// Search a single file using `grep-searcher` with binary detection.
147+ fn search_file_grep (
148+ path : & Path ,
149+ matcher : & grep_regex:: RegexMatcher ,
150+ context_lines : usize ,
151+ max_matches : usize ,
152+ results : & mut Vec < GrepMatch > ,
153+ ) -> crab_common:: Result < ( ) > {
154+ // When context is requested, we need a two-pass approach:
155+ // first collect all matching line numbers, then re-read to extract context.
156+ // For the no-context case, we stream directly.
157+ if context_lines > 0 {
158+ search_file_with_context ( path, matcher, context_lines, max_matches, results)
159+ } else {
160+ search_file_no_context ( path, matcher, max_matches, results)
161+ }
162+ }
163+
164+ /// Streaming search without context lines — uses `grep_searcher::Searcher`.
165+ fn search_file_no_context (
166+ path : & Path ,
167+ matcher : & grep_regex:: RegexMatcher ,
168+ max_matches : usize ,
169+ results : & mut Vec < GrepMatch > ,
170+ ) -> crab_common:: Result < ( ) > {
171+ let mut searcher = SearcherBuilder :: new ( )
172+ . binary_detection ( BinaryDetection :: quit ( 0 ) )
173+ . line_number ( true )
174+ . build ( ) ;
175+
176+ let path_buf = path. to_path_buf ( ) ;
177+
178+ // grep_searcher errors are non-fatal (binary file quit, encoding, etc.)
179+ let _ = searcher. search_path (
180+ matcher,
181+ path,
182+ UTF8 ( |line_number, line_content| {
183+ if results. len ( ) >= max_matches {
184+ return Ok ( false ) ; // stop searching
185+ }
186+ results. push ( GrepMatch {
187+ path : path_buf. clone ( ) ,
188+ line_number : line_number as usize ,
189+ line_content : line_content. trim_end_matches ( '\n' ) . to_string ( ) ,
190+ context_before : Vec :: new ( ) ,
191+ context_after : Vec :: new ( ) ,
192+ } ) ;
193+ Ok ( true )
194+ } ) ,
195+ ) ;
196+
197+ Ok ( ( ) )
198+ }
199+
200+ /// Search with context lines. Reads the file to collect lines, then matches.
139201///
140- /// Returns an error if the file cannot be read.
141- pub ( crate ) fn search_file (
202+ /// `grep-searcher` does support context via `SearcherBuilder::after_context()`
203+ /// and `before_context()`, but the sink API for context is more complex
204+ /// (`SinkContext`). We use a simpler approach: collect matches first, then
205+ /// extract context from the line buffer.
206+ fn search_file_with_context (
142207 path : & Path ,
143- regex : & regex :: Regex ,
208+ matcher : & grep_regex :: RegexMatcher ,
144209 context_lines : usize ,
145- ) -> crab_common:: Result < Vec < GrepMatch > > {
210+ max_matches : usize ,
211+ results : & mut Vec < GrepMatch > ,
212+ ) -> crab_common:: Result < ( ) > {
213+ // Read the file — grep-searcher handles binary detection
146214 let content = std:: fs:: read ( path) ?;
147215
148- // Skip binary files (contain NUL bytes )
216+ // Quick binary check (same heuristic as grep-searcher )
149217 if content. contains ( & 0 ) {
150- return Ok ( Vec :: new ( ) ) ;
218+ return Ok ( ( ) ) ;
151219 }
152220
153221 let Ok ( text) = String :: from_utf8 ( content) else {
154- return Ok ( Vec :: new ( ) ) ; // Non-UTF8, skip
222+ return Ok ( ( ) ) ;
155223 } ;
156224
157225 let lines: Vec < & str > = text. lines ( ) . collect ( ) ;
158- let mut matches = Vec :: new ( ) ;
159226
160227 for ( i, line) in lines. iter ( ) . enumerate ( ) {
161- if regex. is_match ( line) {
162- let context_before: Vec < String > = if context_lines > 0 {
228+ if results. len ( ) >= max_matches {
229+ break ;
230+ }
231+
232+ if matcher. is_match ( line. as_bytes ( ) ) . unwrap_or ( false ) {
233+ let context_before: Vec < String > = {
163234 let start = i. saturating_sub ( context_lines) ;
164235 lines[ start..i] . iter ( ) . map ( |& s| s. to_string ( ) ) . collect ( )
165- } else {
166- Vec :: new ( )
167236 } ;
168237
169- let context_after: Vec < String > = if context_lines > 0 {
238+ let context_after: Vec < String > = {
170239 let end = ( i + 1 + context_lines) . min ( lines. len ( ) ) ;
171240 lines[ i + 1 ..end] . iter ( ) . map ( |& s| s. to_string ( ) ) . collect ( )
172- } else {
173- Vec :: new ( )
174241 } ;
175242
176- matches . push ( GrepMatch {
243+ results . push ( GrepMatch {
177244 path : path. to_path_buf ( ) ,
178245 line_number : i + 1 , // 1-based
179246 line_content : ( * line) . to_string ( ) ,
@@ -183,7 +250,7 @@ pub(crate) fn search_file(
183250 }
184251 }
185252
186- Ok ( matches )
253+ Ok ( ( ) )
187254}
188255
189256#[ cfg( test) ]
0 commit comments