Skip to content

Commit fee1a60

Browse files
committed
DRILL-8245: Project pushdown depends on rules order and might not happen
1 parent cea11e2 commit fee1a60

6 files changed

Lines changed: 43 additions & 34 deletions

File tree

contrib/format-httpd/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
import nl.basjes.parse.core.exceptions.InvalidDissectorException;
3434
import nl.basjes.parse.core.exceptions.MissingDissectorsException;
3535
import nl.basjes.parse.httpdlog.HttpdLoglineParser;
36+
3637
import org.slf4j.Logger;
3738
import org.slf4j.LoggerFactory;
3839

39-
import java.util.ArrayList;
4040
import java.util.EnumSet;
4141
import java.util.List;
4242
import java.util.Map;
@@ -190,13 +190,7 @@ public TupleMetadata setupParser()
190190
if (!isStarQuery() &&
191191
!isMetadataQuery() &&
192192
!isOnlyImplicitColumns()) {
193-
List<String> keysToRemove = new ArrayList<>();
194-
for (final String key : requestedPaths.keySet()) {
195-
if (!isRequested(key)) {
196-
keysToRemove.add(key);
197-
}
198-
}
199-
keysToRemove.forEach( key -> requestedPaths.remove(key));
193+
requestedPaths = getRequestedColumnPaths();
200194
}
201195

202196
EnumSet<Casts> allCasts;
@@ -254,6 +248,25 @@ public TupleMetadata setupParser()
254248
return builder.build();
255249
}
256250

251+
private Map<String, String> getRequestedColumnPaths() {
252+
Map<String, String> requestedColumnPaths = new TreeMap<>();
253+
for (SchemaPath requestedColumn : requestedColumns) {
254+
String columnName = requestedColumn.getRootSegmentPath();
255+
String parserPath = requestedPaths.get(columnName);
256+
if (parserPath != null) {
257+
requestedColumnPaths.put(columnName, parserPath);
258+
} else {
259+
requestedPaths.keySet()
260+
.stream()
261+
.filter(colName -> colName.endsWith(HttpdUtils.SAFE_WILDCARD)
262+
&& requestedColumn.rootName().startsWith(colName.substring(0, colName.length() - HttpdUtils.SAFE_WILDCARD.length())))
263+
.findAny()
264+
.ifPresent(colName -> requestedColumnPaths.put(colName, requestedPaths.get(colName)));
265+
}
266+
}
267+
return requestedColumnPaths;
268+
}
269+
257270
public void addFieldsToParser(RowSetLoader rowWriter) {
258271
for (final Map.Entry<String, String> entry : requestedPaths.entrySet()) {
259272
try {
@@ -266,24 +279,14 @@ public void addFieldsToParser(RowSetLoader rowWriter) {
266279
}
267280

268281
public boolean isStarQuery() {
269-
return requestedColumns.size() == 1 && requestedColumns.get(0).isDynamicStar();
282+
return requestedColumns.stream()
283+
.anyMatch(SchemaPath::isDynamicStar);
270284
}
271285

272286
public boolean isMetadataQuery() {
273287
return requestedColumns.size() == 0;
274288
}
275289

276-
public boolean isRequested(String colName) {
277-
for (SchemaPath path : requestedColumns) {
278-
if (path.isDynamicStar()) {
279-
return true;
280-
} else if (path.nameEquals(colName)) {
281-
return true;
282-
}
283-
}
284-
return false;
285-
}
286-
287290
/*
288291
This is for the edge case where a query only contains the implicit fields.
289292
*/

contrib/format-httpd/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ public void testFlattenMap() throws Exception {
118118
"\\\"%{User-agent}i\\\"', " +
119119
"flattenWildcards => true)) WHERE `request_firstline_original_uri_query_came__from` IS NOT NULL";
120120

121+
queryBuilder()
122+
.sql(sql)
123+
.planMatcher()
124+
.include("columns=\\[`request_firstline_original_uri_query_came__from`\\]")
125+
.match();
126+
121127
RowSet results = client.queryBuilder().sql(sql).rowSet();
122128

123129
TupleMetadata expectedSchema = new SchemaBuilder()

contrib/format-httpd/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReaderUserAgent.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,6 @@ public void testUserAgentAndTypeRemapping() throws Exception {
199199

200200
RowSet results = client.queryBuilder().sql(sql).rowSet();
201201

202-
results.print();
203-
204202
TupleMetadata expectedSchema = new SchemaBuilder()
205203
.addNullable("request_receive_time_epoch", MinorType.TIMESTAMP)
206204
.addNullable("request_user-agent", MinorType.VARCHAR)
@@ -255,8 +253,4 @@ public void testUserAgentAndTypeRemapping() throws Exception {
255253
RowSetUtilities.verify(expected, results);
256254
}
257255

258-
259-
260256
}
261-
262-

exec/java-exec/src/main/java/org/apache/drill/exec/planner/PlannerPhase.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,8 @@ static RuleSet getDrillUserConfigurableLogicalRules(OptimizerRulesContext optimi
323323
/*
324324
Filter push-down related rules
325325
*/
326-
DrillPushFilterPastProjectRule.INSTANCE,
326+
DrillPushFilterPastProjectRule.LOGICAL,
327+
DrillPushFilterPastProjectRule.DRILL_INSTANCE,
327328
// Due to infinite loop in planning (DRILL-3257/CALCITE-1271), temporarily use this rule in Hep planner
328329
// RuleInstance.FILTER_SET_OP_TRANSPOSE_RULE,
329330
DrillFilterAggregateTransposeRule.INSTANCE,

exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushFilterPastProjectRule.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.drill.exec.planner.logical;
1919

20+
import org.apache.drill.exec.planner.common.DrillProjectRelBase;
2021
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
2122
import org.apache.calcite.plan.RelOptRule;
2223
import org.apache.calcite.plan.RelOptRuleCall;
@@ -39,7 +40,10 @@
3940

4041
public class DrillPushFilterPastProjectRule extends RelOptRule {
4142

42-
public final static RelOptRule INSTANCE = new DrillPushFilterPastProjectRule(DrillRelFactories.LOGICAL_BUILDER);
43+
public final static RelOptRule LOGICAL = new DrillPushFilterPastProjectRule(
44+
LogicalFilter.class, LogicalProject.class, DrillRelFactories.LOGICAL_BUILDER, "DrillPushFilterPastProjectRule:logical");
45+
public final static RelOptRule DRILL_INSTANCE = new DrillPushFilterPastProjectRule(
46+
DrillFilterRel.class, DrillProjectRelBase.class, DrillRelFactories.LOGICAL_BUILDER, "DrillPushFilterPastProjectRule:drill_logical");
4347

4448
private static final Collection<String> BANNED_OPERATORS;
4549

@@ -49,8 +53,9 @@ public class DrillPushFilterPastProjectRule extends RelOptRule {
4953
BANNED_OPERATORS.add("item");
5054
}
5155

52-
private DrillPushFilterPastProjectRule(RelBuilderFactory relBuilderFactory) {
53-
super(operand(LogicalFilter.class, operand(LogicalProject.class, any())), relBuilderFactory,null);
56+
private DrillPushFilterPastProjectRule(Class<? extends Filter> filter,
57+
Class<? extends Project> project, RelBuilderFactory relBuilderFactory, String description) {
58+
super(operand(filter, operand(project, any())), relBuilderFactory,description);
5459
}
5560

5661
//~ Methods ----------------------------------------------------------------

exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillScanRel.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,7 @@ public DrillScanRel(final RelOptCluster cluster, final RelTraitSet traits,
6060

6161
public DrillScanRel(final RelOptCluster cluster, final RelTraitSet traits,
6262
final RelOptTable table, boolean partitionFilterPushdown) {
63-
// By default, scan does not support project pushdown.
64-
// Decision whether push projects into scan will be made solely in DrillPushProjIntoScanRule.
65-
this(cluster, traits, table, table.getRowType(), getProjectedColumns(table, true), partitionFilterPushdown);
63+
this(cluster, traits, table, table.getRowType(), getProjectedColumns(table, false), partitionFilterPushdown);
6664
this.settings = PrelUtil.getPlannerSettings(cluster.getPlanner());
6765
}
6866

@@ -139,7 +137,9 @@ public double estimateRowCount(RelMetadataQuery mq) {
139137
@Override
140138
public RelOptCost computeSelfCost(final RelOptPlanner planner, RelMetadataQuery mq) {
141139
final ScanStats stats = getGroupScan().getScanStats(settings);
142-
int columnCount = Utilities.isStarQuery(columns) ? STAR_COLUMN_COST : getRowType().getFieldCount();
140+
double columnCount = Utilities.isStarQuery(columns)
141+
? STAR_COLUMN_COST
142+
: Math.pow(getRowType().getFieldCount(), 2) / Math.max(columns.size(), 1);
143143

144144
// double rowCount = RelMetadataQuery.getRowCount(this);
145145
double rowCount = Math.max(1, stats.getRecordCount());

0 commit comments

Comments
 (0)