Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,31 @@ private static void validateQualifiedCommonColumn(SqlJoin join,
}
}

/** Validates that a SQL node tree does not contain qualified references
* to common columns in a JOIN USING or NATURAL JOIN context.
* This is called before identifier expansion to catch user-written
* qualified common columns in conformances where they are disallowed
* (e.g. Oracle, Presto).
*
* @param nodeList The list of SQL nodes to check
* @param join The JOIN node containing USING/NATURAL condition
* @param scope The select scope for resolving identifiers
*/
private void validateNoQualifiedCommonColumns(SqlNodeList nodeList,
SqlJoin join, SelectScope scope) {
for (SqlNode item : nodeList) {
item.accept(new SqlShuttle() {
@Override public SqlNode visit(SqlIdentifier id) {
if (!id.isSimple()) {
validateQualifiedCommonColumn(join, id, scope,
SqlValidatorImpl.this);
}
return id;
}
});
}
}

private boolean expandStar(List<SqlNode> selectItems, Set<String> aliases,
PairList<String, RelDataType> fields, boolean includeSystemVars,
SelectScope scope, SqlNode node) {
Expand Down Expand Up @@ -5253,6 +5278,17 @@ protected void validateGroupClause(SqlSelect select) {

// expand the expression in group list.
List<SqlNode> expandedList = new ArrayList<>();
// Validate that GROUP BY items do not qualify common columns
// in conformances where it is disallowed (e.g. Oracle, Presto).
// This must run before expansion, because expansion generates
// qualified identifiers that should not trigger this validation.
if (!config.conformance().allowQualifyingCommonColumn()) {
final SqlNode from = select.getFrom();
if (from instanceof SqlJoin) {
validateNoQualifiedCommonColumns(groupList,
(SqlJoin) from, getRawSelectScopeNonNull(select));
}
}
for (SqlNode groupItem : groupList) {
SqlNode expandedItem =
extendedExpand(groupItem, groupScope, select, Clause.GROUP_BY);
Expand Down Expand Up @@ -5404,6 +5440,19 @@ protected void validateHavingClause(SqlSelect select) {
}
}

/** Validates that SELECT items do not qualify common columns
* in conformances where it is disallowed (e.g. Oracle, Presto). */
private void validateSelectCommonColumns(SqlNodeList selectItems,
SqlSelect select) {
if (!config().conformance().allowQualifyingCommonColumn()) {
final SqlNode from = select.getFrom();
if (from instanceof SqlJoin) {
validateNoQualifiedCommonColumns(selectItems,
(SqlJoin) from, getRawSelectScopeNonNull(select));
}
}
}

protected RelDataType validateSelectList(final SqlNodeList selectItems,
SqlSelect select, RelDataType targetRowType) {
// First pass, ensure that aliases are unique. "*" and "TABLE.*" items
Expand All @@ -5417,6 +5466,12 @@ protected RelDataType validateSelectList(final SqlNodeList selectItems,
// Populated during select expansion when SqlConformance.isSelectAlias != UNSUPPORTED
final Map<String, SqlNode> expansions = new HashMap<>();

// Validate that SELECT items do not qualify common columns
// in conformances where it is disallowed (e.g. Oracle, Presto).
// This must run before expansion, because expansion generates
// qualified identifiers that should not trigger this validation.
validateSelectCommonColumns(selectItems, select);

for (SqlNode selectItem : selectItems) {
if (selectItem instanceof SqlSelect) {
handleScalarSubQuery(select, (SqlSelect) selectItem,
Expand Down Expand Up @@ -7672,9 +7727,10 @@ protected SqlNode expandCommonColumn(SqlSelect sqlSelect, SqlNode selectItem,

final SqlIdentifier identifier = (SqlIdentifier) selectItem;
if (!identifier.isSimple()) {
if (!validator.config().conformance().allowQualifyingCommonColumn()) {
validateQualifiedCommonColumn((SqlJoin) from, identifier, scope, validator);
}
// Qualified identifiers (e.g. t1.col) are returned unchanged.
// Validation of qualified common columns is performed before expansion,
// in validateSelectCommonColumns, where the original user-written
// identifier (with its source position) is still available.
return selectItem;
}

Expand All @@ -7700,10 +7756,14 @@ private SqlNode expandExprFromJoin(SqlJoin join, SqlIdentifier identifier, Selec
final List<SqlNode> qualifiedNode = new ArrayList<>();
for (ScopeChild child : requireNonNull(scope, "scope").children) {
if (matcher.indexOf(child.namespace.getRowType().getFieldNames(), name) >= 0) {
// These identifiers are created by the expansion logic, not
// parsed from user input, so they have no meaningful source
// position. SqlParserPos.ZERO is the standard convention for
// internally generated nodes.
final SqlIdentifier exp =
new SqlIdentifier(
ImmutableList.of(child.name, name),
identifier.getParserPosition());
SqlParserPos.ZERO);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not look like a robust way to carry information between compilation stages; in general, the parser position is best-effort in Calcite.

There must be a better way to do this. My intuition tells me that the unparser has to handle this case by stripping out the qualifying information. Alternatively, can the join type be changed to an INNER JOIN?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your suggestion. The original plan was indeed inadequate. I have now revised it to completely separate validation from expansion — validating the original AST once before expansion, and not performing validation during expansion.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But why remove the position from the identifier?

Copy link
Copy Markdown
Contributor Author

@cjj2010 cjj2010 May 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why remove the position from the identifier?

Thank you for reviewing this pull request (PR). The identifier here is synthetic — the validator fabricates EMP.DEPTNO / DEPT.DEPTNO when expanding JOIN ... USING(deptno) into COALESCE(EMP.DEPTNO, DEPT.DEPTNO) AS DEPTNO. The user never typed it.
Three reasons to use SqlParserPos.ZERO:

  1. SqlParserPos.ZERO is documented as "use this if the node doesn't correspond to a position in piece of SQL text." The surrounding COALESCE and AS in this same expansion already use ZERO; the inner identifiers should match.
  2. With identifier.getParserPosition(), the synthetic EMP.DEPTNO was indistinguishable from a user-written one. When it re-entered expandCommonColumn, conformances with allowQualifyingCommonColumn() == false (Oracle, Presto, …) raised a spurious Cannot qualify common column 'EMP.DEPTNO'. Pairing this change with the new pre-expansion check validateNoQualifiedCommonColumns cleanly separates "user wrote it" (real position, rejected) from "validator synthesized it" (ZERO, allowed).
  3. Any future error raised on this node would otherwise underline the user's deptno and report EMP.DEPTNO — misleading for SqlAdvisor.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand. Are you saying that having position on the identifier leads to an error, and not having one does not? I didn't know that's possible.

What is a "synthetic" identifier? In the end all identifiers come from the source code.

Providing errors without source position is a worse user-experience, even if the program has been optimized. Is the concern for SqlAdvisor based on a concrete example, or is it hypothetical?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the careful review — these are good questions.
On the SqlParserPos.ZERO change:
The SqlIdentifier nodes created inside expandExprFromJoin (i.e., the t1.col and t2.col sub-expressions that form the COALESCE) are not parsed from user input — they are constructed programmatically by the expansion logic. The user only writes col; the framework builds the qualified forms internally. Since validation is now performed before expansion, directly on the original user-written identifiers (which carry proper source positions), these internally generated nodes no longer need to carry a meaningful position. Using identifier.getParserPosition() on them was misleading — it implied they were the same node as the original identifier, which they are not. SqlParserPos.ZERO is the standard convention in Calcite for internally generated nodes (see also expandStar, validateFrom, etc.).
On "synthetic identifier":
I used "synthetic" informally to mean "created programmatically, not parsed from user SQL." I agree the term is not ideal — I'll remove it from the comment to avoid confusion.
On the SqlAdvisor concern:
The concern was hypothetical, not based on a concrete failing case. SqlAdvisorValidator overrides expand, expandSelectExpr, and expandOrderExpr to return expressions unchanged, so expandExprFromJoin is not reachable from that path. More importantly, error reporting quality is not degraded: validation errors are now thrown on the original user-written identifiers (before expansion), which always carry real source positions. So users will still see accurate error locations.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Validation errors are not the only errors we have to be concerned about.
Our compiler carries the position information all the way to the runtime, and surfaces runtime errors using source position information. A runtime error without position information usually provides very little actionable insight to the user about what went wrong. So in general I am trying very hard to preserve any source position information that is available.

qualifiedNode.add(exp);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6092,4 +6092,45 @@ void checkUserDefinedOrderByOver(NullCollation nullCollation) {
final String sql = "select distinct deptno, deptno, empno, 1, 'a' from emp order by rand(), 1";
sql(sql).ok();
}

/** Test case of
* <a href="https://issues.apache.org/jira/browse/CALCITE-7085">[CALCITE-7085]
* JOIN USING with unqualified common column fails in a conformance where
* allowQualifyingCommonColumn is false (e.g. Oracle, Presto)</a>. */
@Test void testJoinUsingWithConformanceOracle() {
final String sql = "SELECT deptno, name\n"
+ "FROM emp JOIN dept using (deptno)";
sql(sql).withConformance(SqlConformanceEnum.ORACLE_10).ok();
Comment thread
mihaibudiu marked this conversation as resolved.
}


/** Test case of
* <a href="https://issues.apache.org/jira/browse/CALCITE-7085">[CALCITE-7085]
* JOIN USING with unqualified common column fails in a conformance where
* allowQualifyingCommonColumn is false (e.g. Oracle, Presto)</a>. */
@Test void testLeftJoinUsingWithConformanceOracle() {
final String sql = "SELECT deptno, name\n"
+ "FROM emp LEFT OUTER JOIN dept using (deptno)";
sql(sql).withConformance(SqlConformanceEnum.ORACLE_10).ok();
}

/** Test case of
* <a href="https://issues.apache.org/jira/browse/CALCITE-7085">[CALCITE-7085]
* JOIN USING with unqualified common column fails in a conformance where
* allowQualifyingCommonColumn is false (e.g. Oracle, Presto)</a>. */
@Test void testRightJoinUsingWithConformanceOracle() {
final String sql = "SELECT deptno, name\n"
+ "FROM emp RIGHT OUTER JOIN dept using (deptno)";
sql(sql).withConformance(SqlConformanceEnum.ORACLE_10).ok();
}

/** Test case of
* <a href="https://issues.apache.org/jira/browse/CALCITE-7085">[CALCITE-7085]
* JOIN USING with unqualified common column fails in a conformance where
* allowQualifyingCommonColumn is false (e.g. Oracle, Presto)</a>. */
@Test void testJoinUsingWithConformancePresto() {
final String sql = "SELECT deptno, name\n"
+ "FROM emp JOIN dept using (deptno)";
sql(sql).withConformance(SqlConformanceEnum.PRESTO).ok();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4368,6 +4368,34 @@ LogicalProject(DEPTNO=[$0], EXPR$1=[$1])
LogicalJoin(condition=[=($7, $9)], joinType=[full])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
<TestCase name="testJoinUsingWithConformanceOracle">
<Resource name="sql">
<![CDATA[SELECT deptno, name
FROM emp JOIN dept using (deptno)]]>
</Resource>
<Resource name="plan">
<![CDATA[
LogicalProject(DEPTNO=[$7], NAME=[$10])
LogicalJoin(condition=[=($7, $9)], joinType=[inner])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
<TestCase name="testJoinUsingWithConformancePresto">
<Resource name="sql">
<![CDATA[SELECT deptno, name
FROM emp JOIN dept using (deptno)]]>
</Resource>
<Resource name="plan">
<![CDATA[
LogicalProject(DEPTNO=[$7], NAME=[$10])
LogicalJoin(condition=[=($7, $9)], joinType=[inner])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
Expand Down Expand Up @@ -5000,6 +5028,20 @@ LogicalProject(C=[$0], N=[$3])
LogicalAggregate(group=[{0}])
LogicalProject($f2=[+($0, 1)])
LogicalValues(tuples=[[{ 4 }]])
]]>
</Resource>
</TestCase>
<TestCase name="testLeftJoinUsingWithConformanceOracle">
<Resource name="sql">
<![CDATA[SELECT deptno, name
FROM emp LEFT OUTER JOIN dept using (deptno)]]>
</Resource>
<Resource name="plan">
<![CDATA[
LogicalProject(DEPTNO=[$7], NAME=[$10])
LogicalJoin(condition=[=($7, $9)], joinType=[left])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
Expand Down Expand Up @@ -7396,6 +7438,20 @@ GROUP BY ROLLUP(deptno)]]>
LogicalAggregate(group=[{0}], groups=[[{0}, {}]], EXPR$1=[COUNT(DISTINCT $1)])
LogicalProject(DEPTNO=[$7], EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testRightJoinUsingWithConformanceOracle">
<Resource name="sql">
<![CDATA[SELECT deptno, name
FROM emp RIGHT OUTER JOIN dept using (deptno)]]>
</Resource>
<Resource name="plan">
<![CDATA[
LogicalProject(DEPTNO=[COALESCE($7, $9)], NAME=[$10])
LogicalJoin(condition=[=($7, $9)], joinType=[right])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
Expand Down
Loading