1+ use datafusion:: common:: test_util:: batches_to_string;
12use datafusion:: {
23 arrow:: { error:: ArrowError , record_batch:: RecordBatch } ,
34 assert_batches_eq,
@@ -25,6 +26,15 @@ use object_store::local::LocalFileSystem;
2526use std:: sync:: Arc ;
2627use tempfile:: TempDir ;
2728
29+ async fn run_query ( query : & str , ctx : & SessionContext ) -> Vec < RecordBatch > {
30+ ctx. sql ( query)
31+ . await
32+ . expect ( "Failed to create plan for query" )
33+ . collect ( )
34+ . await
35+ . expect ( "Failed to execute query" )
36+ }
37+
2838/// Convert DuckDB's Arrow RecordBatch to DataFusion's Arrow RecordBatch
2939/// using Arrow IPC format as an interchange format.
3040/// This allows compatibility between different Arrow versions.
@@ -120,28 +130,19 @@ pub async fn test_equality_delete() {
120130
121131 ctx. register_catalog ( "warehouse" , datafusion_catalog) ;
122132
123- ctx . sql (
124- "INSERT INTO warehouse.test.orders (id, customer_id, product_id, date, amount) VALUES
133+ run_query (
134+ "INSERT INTO warehouse.test.orders (id, customer_id, product_id, date, amount) VALUES
125135 (1, 1, 1, '2020-01-01', 1),
126136 (2, 2, 1, '2020-01-01', 1),
127137 (3, 3, 1, '2020-01-01', 3),
128138 (4, 1, 2, '2020-02-02', 1),
129139 (5, 1, 1, '2020-02-02', 2),
130140 (6, 3, 3, '2020-02-02', 3);" ,
141+ & ctx,
131142 )
132- . await
133- . expect ( "Failed to create query plan for insert" )
134- . collect ( )
135- . await
136- . expect ( "Failed to insert values into table" ) ;
143+ . await ;
137144
138- let batches = ctx
139- . sql ( "select * from warehouse.test.orders order by id" )
140- . await
141- . expect ( "Failed to create plan for select" )
142- . collect ( )
143- . await
144- . expect ( "Failed to execute select query" ) ;
145+ let batches = run_query ( "select * from warehouse.test.orders order by id" , & ctx) . await ;
145146
146147 let expected = [
147148 "+----+-------------+------------+------------+--------+" ,
@@ -157,15 +158,7 @@ pub async fn test_equality_delete() {
157158 ] ;
158159 assert_batches_eq ! ( expected, & batches) ;
159160
160- let batches = ctx
161- . sql (
162- "SELECT id, customer_id, product_id, date FROM warehouse.test.orders WHERE customer_id = 1 order by id" ,
163- )
164- . await
165- . expect ( "Failed to create query plan for insert" )
166- . collect ( )
167- . await
168- . expect ( "Failed to insert values into table" ) ;
161+ let batches = run_query ( "SELECT id, customer_id, product_id, date FROM warehouse.test.orders WHERE customer_id = 1 order by id" , & ctx) . await ;
169162
170163 let expected = [
171164 "+----+-------------+------------+------------+" ,
@@ -204,13 +197,7 @@ pub async fn test_equality_delete() {
204197 . await
205198 . unwrap ( ) ;
206199
207- let batches = ctx
208- . sql ( "select * from warehouse.test.orders order by id" )
209- . await
210- . expect ( "Failed to create plan for select" )
211- . collect ( )
212- . await
213- . expect ( "Failed to execute select query" ) ;
200+ let batches = run_query ( "select * from warehouse.test.orders order by id" , & ctx) . await ;
214201
215202 let expected = [
216203 "+----+-------------+------------+------------+--------+" ,
@@ -237,25 +224,16 @@ pub async fn test_equality_delete() {
237224 assert_batches_eq ! ( expected, & duckdb_batches) ;
238225
239226 // Test that projecting a column that is not included in equality deletes works
240- ctx . sql (
227+ run_query (
241228 "INSERT INTO warehouse.test.orders (id, customer_id, product_id, date, amount) VALUES
242229 (7, 3, 2, '2020-01-01', 2),
243230 (8, 2, 1, '2020-02-02', 3),
244231 (9, 1, 3, '2020-01-01', 1);" ,
232+ & ctx,
245233 )
246- . await
247- . expect ( "Failed to create query plan for insert" )
248- . collect ( )
249- . await
250- . expect ( "Failed to insert values into table" ) ;
234+ . await ;
251235
252- let batches = ctx
253- . sql ( "select sum(amount) from warehouse.test.orders" )
254- . await
255- . expect ( "Failed to create plan for select" )
256- . collect ( )
257- . await
258- . expect ( "Failed to execute select query" ) ;
236+ let batches = run_query ( "select sum(amount) from warehouse.test.orders" , & ctx) . await ;
259237
260238 let expected = [
261239 "+-----------------------------------+" ,
@@ -265,4 +243,20 @@ pub async fn test_equality_delete() {
265243 "+-----------------------------------+" ,
266244 ] ;
267245 assert_batches_eq ! ( expected, & batches) ;
246+
247+ // Test that using a filter on a column that is not included in equality deletes works
248+ let query = "select count(*) from warehouse.test.orders where product_id = 1 and (amount = 1 or customer_id = 3)" ;
249+ let batches = run_query ( query, & ctx) . await ;
250+ let expected = [
251+ "+----------+" ,
252+ "| count(*) |" ,
253+ "+----------+" ,
254+ "| 2 |" ,
255+ "+----------+" ,
256+ ] ;
257+ assert_batches_eq ! ( expected, & batches) ;
258+
259+ // Ensure we only pushed down predicates that have matching columns with delete file schemas (i.e. amount was not pushed down).
260+ let batches = run_query ( & format ! ( "explain {query}" ) , & ctx) . await ;
261+ assert ! ( batches_to_string( & batches) . contains( "projection=[id, customer_id, product_id, date], file_type=parquet, predicate=product_id@2 = 1," ) ) ;
268262}
0 commit comments