From 2841b683a0d983faa31f0dca5da34c63e99009f0 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Thu, 7 Mar 2024 02:09:05 +0000 Subject: [PATCH 1/9] =?UTF-8?q?[CALCITE-6301]=20Extend=20=E2=80=98Must-fil?= =?UTF-8?q?ter=E2=80=99=20columns=20to=20support=20a=20conditional=20bypas?= =?UTF-8?q?s=20list?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sql/validate/AbstractNamespace.java | 6 + .../sql/validate/IdentifierNamespace.java | 1 + .../calcite/sql/validate/SemanticTable.java | 6 + .../sql/validate/SqlValidatorImpl.java | 59 ++- .../sql/validate/SqlValidatorNamespace.java | 4 + .../calcite/sql/validate/TableNamespace.java | 4 + .../sql/validate/WithItemNamespace.java | 1 + .../calcite/sql/validate/WithNamespace.java | 1 + .../apache/calcite/test/SqlValidatorTest.java | 356 +++++++++++++++++- .../test/catalog/MockCatalogReader.java | 12 +- .../catalog/MustFilterMockCatalogReader.java | 5 +- 11 files changed, 440 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java index 27979e72ea0..cd310bc0b1b 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java @@ -62,6 +62,8 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { * should typically be re-assigned on validate. */ protected ImmutableBitSet mustFilterFields = ImmutableBitSet.of(); + protected ImmutableBitSet mustFilterBypassFields = ImmutableBitSet.of(); + protected final @Nullable SqlNode enclosingNode; //~ Constructors ----------------------------------------------------------- @@ -169,6 +171,10 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { "mustFilterFields (maybe validation is not complete?)"); } + @Override public ImmutableBitSet getMustFilterBypassFields() { + return requireNonNull(mustFilterBypassFields, + "mustFilterBypassFields (maybe validation is not complete?)"); + } @Override public SqlMonotonicity getMonotonicity(String columnName) { return SqlMonotonicity.NOT_MONOTONIC; } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java index 87d088bc800..64164708b31 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java @@ -211,6 +211,7 @@ private SqlValidatorNamespace resolveImpl(SqlIdentifier id) { } this.mustFilterFields = resolvedNamespace.getMustFilterFields(); + this.mustFilterBypassFields = resolvedNamespace.getMustFilterBypassFields(); RelDataType rowType = resolvedNamespace.getRowType(); if (extendList != null) { diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java b/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java index b115dbb4ccc..9f246d081f3 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java @@ -18,6 +18,10 @@ import org.checkerframework.checker.nullness.qual.Nullable; +import java.util.List; + +import static java.util.Collections.emptyList; + /** * Extension to {@link SqlValidatorTable} with extra, optional metadata. * @@ -44,4 +48,6 @@ public interface SemanticTable { default boolean mustFilter(int column) { return getFilter(column) != null; } + + default List bypassFieldList() { return emptyList(); } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java index 706c8e6be0e..ee3aef3d77b 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java @@ -4100,32 +4100,37 @@ protected void validateSelect( // Deduce which columns must be filtered. ns.mustFilterFields = ImmutableBitSet.of(); + ns.mustFilterBypassFields = ImmutableBitSet.of(); if (from != null) { final Set qualifieds = new LinkedHashSet<>(); + final Set bypassQualifieds = new LinkedHashSet<>(); for (ScopeChild child : fromScope.children) { final List fieldNames = child.namespace.getRowType().getFieldNames(); - child.namespace.getMustFilterFields() - .forEachInt(i -> - qualifieds.add( - SqlQualified.create(fromScope, 1, child.namespace, - new SqlIdentifier( - ImmutableList.of(child.name, fieldNames.get(i)), - SqlParserPos.ZERO)))); + toQualifieds(child.namespace.getMustFilterFields(), qualifieds, fromScope, child, + fieldNames); + toQualifieds(child.namespace.getMustFilterBypassFields(), bypassQualifieds, fromScope, child, fieldNames); } if (!qualifieds.isEmpty()) { if (select.getWhere() != null) { forEachQualified(select.getWhere(), getWhereScope(select), qualifieds::remove); + purgeForBypassFields(select.getWhere(), getWhereScope(select), + qualifieds, bypassQualifieds); } if (select.getHaving() != null) { forEachQualified(select.getHaving(), getHavingScope(select), qualifieds::remove); + purgeForBypassFields(select.getHaving(), getHavingScope(select), + qualifieds, bypassQualifieds); } // Each of the must-filter fields identified must be returned as a // SELECT item, which is then flagged as must-filter. final BitSet mustFilterFields = new BitSet(); + // Each of the bypass fields identified must be returned as a + // SELECT item, which is then flagged as a bypass field for the consumer. + final BitSet mustFilterBypassFields = new BitSet(); final List expandedSelectItems = requireNonNull(fromScope.getExpandedSelectList(), "expandedSelectList"); @@ -4140,6 +4145,11 @@ protected void validateSelect( // column for our consumer. mustFilterFields.set(i); } + if (bypassQualifieds.remove(qualified)) { + // SELECT item #i referenced a bypass column that was not filtered in the WHERE + // or HAVING. It becomes a bypass column for our consumer. + mustFilterBypassFields.set(i); + } } }); @@ -4154,6 +4164,7 @@ protected void validateSelect( .toString())); } ns.mustFilterFields = ImmutableBitSet.fromBitSet(mustFilterFields); + ns.mustFilterBypassFields = ImmutableBitSet.fromBitSet(mustFilterBypassFields); } } @@ -4185,6 +4196,40 @@ private static void forEachQualified(SqlNode node, SqlValidatorScope scope, }); } + /* If the supplied SqlNode when fully qualified is in the set of bypassQualifieds, then we + remove all entries in the qualifieds set that belong to the same table identifier. + */ + private static void purgeForBypassFields(SqlNode node, SqlValidatorScope scope, + Set qualifieds, Set bypassQualifieds) { + node.accept(new SqlBasicVisitor() { + @Override public Void visit(SqlIdentifier id) { + final SqlQualified qualified = scope.fullyQualify(id); + if (bypassQualifieds.contains(qualified)) { + // Clear all the must-filter qualifieds from the same table identifier + Collection sameIdentifier = qualifieds.stream() + .filter(q -> qualifiedMatchesIdentifier(q, qualified)) + .collect(Collectors.toList()); + sameIdentifier.forEach (qualifieds::remove); + } + return null; + } + }); + } + + private static void toQualifieds(ImmutableBitSet fields, Set qualifiedSet, + SelectScope fromScope, ScopeChild child, List fieldNames){ + fields.forEachInt(i -> + qualifiedSet.add( + SqlQualified.create(fromScope, 1, child.namespace, + new SqlIdentifier( + ImmutableList.of(child.name, fieldNames.get(i)), + SqlParserPos.ZERO)))); + + } + private static boolean qualifiedMatchesIdentifier(SqlQualified q1, SqlQualified q2) { + return q1.identifier.names.get(0).equals(q2.identifier.names.get(0)); + } + private void checkRollUpInSelectList(SqlSelect select) { SqlValidatorScope scope = getSelectScope(select); for (SqlNode item : SqlNonNullableAccessors.getSelectList(select)) { diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java index 77a8dc070e2..cad5c2f1a99 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java @@ -219,4 +219,8 @@ default boolean fieldExists(String name) { default ImmutableBitSet getMustFilterFields() { return ImmutableBitSet.of(); } + + default ImmutableBitSet getMustFilterBypassFields() { + return ImmutableBitSet.of(); + } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java index f6a0069560b..abd6ba8a755 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java @@ -68,6 +68,10 @@ private TableNamespace(SqlValidatorImpl validator, SqlValidatorTable table, .map(RelDataTypeField::getIndex) .filter(semanticTable::mustFilter) .collect(toImmutableBitSet())); + table.maybeUnwrap(SemanticTable.class) + .ifPresent(semanticTable -> + this.mustFilterBypassFields = semanticTable.bypassFieldList() + .stream().collect(toImmutableBitSet())); if (extendedFields.isEmpty()) { return table.getRowType(); diff --git a/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java index 5c0a85756fa..31ec34a55b0 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java @@ -42,6 +42,7 @@ class WithItemNamespace extends AbstractNamespace { validator.getNamespaceOrThrow(getQuery()); final RelDataType rowType = childNs.getRowTypeSansSystemColumns(); mustFilterFields = childNs.getMustFilterFields(); + mustFilterBypassFields = childNs.getMustFilterBypassFields(); SqlNodeList columnList = withItem.columnList; if (columnList == null) { return rowType; diff --git a/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java index bec57b715b1..61b6c5a14c9 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java @@ -65,6 +65,7 @@ public class WithNamespace extends AbstractNamespace { final RelDataType rowType = validator.getValidatedNodeType(with.body); validator.setValidatedNodeType(with, rowType); mustFilterFields = bodyNamespace.getMustFilterFields(); + mustFilterBypassFields = bodyNamespace.getMustFilterBypassFields(); return rowType; } diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java index c27e0124ab2..bd550d05ed5 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java @@ -12235,10 +12235,10 @@ private void checkCustomColumnResolving(String table) { + " where empno = 1)\n" + "where j = 'doctor'") .ok(); - // Deceitful alias #2. Filter on 'job' is a filter on the underlying - // 'ename', so the underlying 'job' is missing a filter. + // Deceitful alias #2. Filter on 'job' is a filter on the underlying + // 'slacker', so the underlying 'job' is missing a filter. fixture.withSql("^select * from (\n" - + " select job as j, ename as job\n" + + " select job as j, slacker as job\n" + " from emp\n" + " where empno = 1)\n" + "where job = 'doctor'^") @@ -12379,6 +12379,356 @@ private void checkCustomColumnResolving(String table) { + "from cte\n" + "where empno = 1") .fails(missingFilters("JOB")); + fixture.withSql("WITH cte AS (\n" + + " select * from emp)\n" + + "SELECT *\n" + + "from cte\n" + + "where empno = 1\n" + + "and job = 'doctor'") + .ok(); + fixture.withSql("WITH cte AS (\n" + + " select * from emp where empno = 1)\n" + + "SELECT *\n" + + "from cte\n" + + "where job = 'doctor'") + .ok(); + fixture.withSql("WITH cte AS (\n" + + " select empno, job from emp)\n" + + "SELECT *\n" + + "from cte\n" + + "where empno = 1\n" + + "and job = 'doctor'") + .ok(); + + // Filters are missing on EMPNO and JOB, but the error message only + // complains about JOB because EMPNO is in the SELECT clause, and could + // theoretically be filtered by an enclosing query. + fixture.withSql("^select empno\n" + + "from emp^") + .fails(missingFilters("JOB")); + fixture.withSql("^select empno,\n" + + " sum(sal) over (order by mgr)\n" + + "from emp^") + .fails(missingFilters("JOB")); + } + + + /** + * Tests validation of must-filter columns with the inclusion of bypass fields. + * + *

If a table that implements + * {@link org.apache.calcite.sql.validate.SemanticTable} tags fields as + * 'must-filter', and the SQL query does not contain a WHERE or HAVING clause + * on each of the tagged columns, the validator should throw an error. + * If any bypass field for a table is in a WHERE/HAVING clause for that SELECT statement, + * the must-filter requirements for that table are disabled. + */ + @Test void testMustFilterColumnsWithBypass() { + final SqlValidatorFixture fixture = fixture() + .withParserConfig(c -> c.withQuoting(Quoting.BACK_TICK)) + .withOperatorTable(operatorTableFor(SqlLibrary.BIG_QUERY)) + .withCatalogReader(MustFilterMockCatalogReader::create); + // Basic query + fixture.withSql("select empno\n" + + "from emp\n" + + "where job = 'doctor'\n" + + "and empno = 1") + .ok(); + fixture.withSql("^select *\n" + + "from emp\n" + + "where concat(emp.empno, ' ') = 'abc'^") + .fails(missingFilters("JOB")); + + fixture.withSql("select *\n" + + "from emp\n" + + "where concat(emp.ename, ' ') = 'abc'^") + .ok(); + // ENAME is a bypass field + + // SUBQUERIES + fixture.withSql("select * from (\n" + + " select * from emp where empno = 1)\n" + + "where job = 'doctor'") + .ok(); + // Deceitful alias #1. Filter on 'j' is a filter on the underlying 'job'. + fixture.withSql("select * from (\n" + + " select job as j, ename as job\n" + + " from emp\n" + + " where empno = 1)\n" + + "where j = 'doctor'") + .ok(); + // Deceitful alias #2. Filter on 'job' is a filter on the underlying + // 'slacker', so the underlying 'job' is missing a filter. + fixture.withSql("^select * from (\n" + + " select job as j, slacker as job\n" + + " from emp\n" + + " where empno = 1)\n" + + "where job = 'doctor'^") + .fails(missingFilters("J")); + + // Deceitful alias #3. Filter on 'job' is a filter on the underlying + // 'ename', which is a bypass field thus no exception. + fixture.withSql("select * from (\n" + + " select job as j, ename as job\n" + + " from emp\n" + + " where empno = 1)\n" + + "where job = 'doctor'^") + .ok(); + fixture.withSql("select * from (\n" + + " select * from emp where job = 'doctor')\n" + + "where empno = 1") + .ok(); + fixture.withSql("select * from (\n" + + " select empno from emp where job = 'doctor')\n" + + "where empno = 1") + .ok(); + fixture.withSql("^select * from (\n" + + " select * from emp where empno = 1)^") + .fails(missingFilters("JOB")); + fixture.withSql("select * from (\n" + + " select * from emp where ename = 1)^") + .ok(); + // ENAME is a bypass field + + fixture.withSql("^select * from (select * from `SALES`.`EMP`) as a1^ ") + .fails(missingFilters("EMPNO", "JOB")); + + fixture.withSql("select * from (select * from `SALES`.`EMP`) as a1 where ename = '1'^ ") + .ok(); + // JOINs + fixture.withSql("^select *\n" + + "from emp\n" + + "join dept on emp.deptno = dept.deptno^") + .fails(missingFilters("EMPNO", "JOB", "NAME")); + + fixture.withSql("^select *\n" + + "from emp\n" + + "join dept on emp.deptno = dept.deptno where ename = '1'^") + .fails(missingFilters("NAME")); + // ENAME is a bypass field for EMP table. + + fixture.withSql("^select *\n" + + "from emp\n" + + "join dept on emp.deptno = dept.deptno\n" + + "where emp.empno = 1^") + .fails(missingFilters("JOB", "NAME")); + fixture.withSql("select *\n" + + "from emp\n" + + "join dept on emp.deptno = dept.deptno\n" + + "where emp.empno = 1\n" + + "and emp.job = 'doctor'\n" + + "and dept.name = 'ACCOUNTING'") + .ok(); + fixture.withSql("select *\n" + + "from emp\n" + + "join dept on emp.deptno = dept.deptno\n" + + "where empno = 1\n" + + "and job = 'doctor'\n" + + "and dept.name = 'ACCOUNTING'") + .ok(); + + // Self-join + fixture.withSql("^select *\n" + + "from `SALES`.emp a1\n" + + "join `SALES`.emp a2 on a1.empno = a2.empno^") + .fails(missingFilters("EMPNO", "EMPNO0", "JOB", "JOB0")); + + fixture.withSql("^select *\n" + + "from `SALES`.emp a1\n" + + "join `SALES`.emp a2 on a1.empno = a2.empno where a1.ename = '1'^") + .fails(missingFilters("EMPNO0", "JOB0")); + // Filtering on a bypass field in a1 disables must-filter for a1, but a2 must-filters + // are still required. + + fixture.withSql("^select *\n" + + "from emp a1\n" + + "join emp a2 on a1.empno = a2.empno\n" + + "where a2.empno = 1\n" + + "and a1.empno = 1\n" + + "and a2.job = 'doctor'^") + .fails(missingFilters("JOB")); + // There are two JOB columns but only one is filtered + + fixture.withSql("select *\n" + + "from emp a1\n" + + "join emp a2 on a1.empno = a2.empno\n" + + "where a2.empno = 1\n" + + "and a1.empno = 1\n" + + "and a2.job = 'doctor'^\n" + + "and a1.ename = '1'") + .ok(); + + fixture.withSql("select *\n" + + "from emp a1\n" + + "join emp a2 on a1.empno = a2.empno\n" + + "where a1.empno = 1\n" + + "and a1.job = 'doctor'\n" + + "and a2.empno = 2\n" + + "and a2.job = 'undertaker'\n") + .ok(); + fixture.withSql("^select *\n" + + " from (select * from `SALES`.`EMP`) as a1\n" + + "join (select * from `SALES`.`EMP`) as a2\n" + + " on a1.`EMPNO` = a2.`EMPNO`^") + .fails(missingFilters("EMPNO", "EMPNO0", "JOB", "JOB0")); + + fixture.withSql("^select *\n" + + " from (select * from `SALES`.`EMP`) as a1\n" + + "join (select * from `SALES`.`EMP`) as a2\n" + + " on a1.`EMPNO` = a2.`EMPNO`\n" + + "where a1.ename = '1'^") + .fails(missingFilters("EMPNO0", "JOB0")); + // Filtering on a bypass field in a1 disables must-filter for a1, but a2 must-filters + // are still required. + + fixture.withSql("^select *\n" + + " from (select * from `SALES`.`EMP` where `ENAME` = '1') as a1\n" + + "join (select * from `SALES`.`EMP`) as a2\n" + + " on a1.`EMPNO` = a2.`EMPNO`^") + .fails(missingFilters("EMPNO0", "JOB0")); + + // USING + fixture.withSql("^select *\n" + + "from emp\n" + + "join dept using(deptno)\n" + + "where emp.empno = 1^") + .fails(missingFilters("JOB", "NAME")); + + fixture.withSql("^select *\n" + + "from emp\n" + + "join dept using(deptno)\n" + + "where emp.ename = '1'^") + .fails(missingFilters("NAME")); + // ENAME is bypass field for EMP. + + fixture.withSql("select *\n" + + "from emp\n" + + "join dept using(deptno)\n" + + "where emp.empno = 1\n" + + "and emp.job = 'doctor'\n" + + "and dept.name = 'ACCOUNTING'") + .ok(); + + // GROUP BY (HAVING) + fixture.withSql("select *\n" + + "from dept\n" + + "group by deptno, name\n" + + "having name = 'accounting_dept'") + .ok(); + fixture.withSql("^select *\n" + + "from dept\n" + + "group by deptno, name^") + .fails(missingFilters("NAME")); + + fixture.withSql("select *\n" + + "from dept\n" + + "group by deptno, name\n" + + "having deptno > '1'") + .ok(); + // DEPTNO is bypass field + + fixture.withSql("select name\n" + + "from dept\n" + + "group by name\n" + + "having name = 'accounting'") + .ok(); + fixture.withSql("^select name\n" + + "from dept\n" + + "group by name^ ") + .fails(missingFilters("NAME")); + + fixture.withSql("select sum(sal)\n" + + "from emp\n" + + "where empno > 10\n" + + "and job = 'doctor'\n" + + "group by empno\n" + + "having sum(sal) > 100") + .ok(); + fixture.withSql("^select sum(sal)\n" + + "from emp\n" + + "where empno > 10\n" + + "group by empno\n" + + "having sum(sal) > 100^") + .fails(missingFilters("JOB")); + + fixture.withSql("^select sum(sal)\n" + + "from emp\n" + + "where empno > 10\n" + + "group by empno\n" + + "having sum(sal) > 100^") + .fails(missingFilters("JOB")); + + fixture.withSql("select sum(sal), job\n" + + "from emp\n" + + "where empno > 10\n" + + "group by job\n" + + "having job = 'undertaker'") + .ok(); + + fixture.withSql("select sum(sal), ename\n" + + "from emp\n" + + "where empno > 10\n" + + "group by empno, ename\n" + + "having ename = '1'") + .ok(); + + fixture.withSql("select sum(sal)\n" + + "from emp\n" + + "where ename = '1'\n" + + "group by empno, ename\n" + + "having sum(sal) > 100") + .ok(); + + // CTE + fixture.withSql("^WITH cte AS (\n" + + " select * from emp order by empno)^\n" + + "SELECT * from cte") + .fails(missingFilters("EMPNO", "JOB")); + + fixture.withSql("WITH cte AS (\n" + + " select * from emp where ename = '1' order by empno)^\n" + + "SELECT * from cte") + .ok(); + // ENAME is a bypass field + + fixture.withSql("WITH cte AS (\n" + + " select * from emp order by empno)^\n" + + "SELECT * from cte where ename = '1'") + .ok(); + // ENAME is a bypass field + + fixture.withSql("^WITH cte AS (\n" + + " select * from emp where empno = 1)^\n" + + "SELECT * from cte") + .fails(missingFilters("JOB")); + + fixture.withSql("WITH cte AS (\n" + + " select *\n" + + " from emp\n" + + " where empno = 1\n" + + " and job = 'doctor')\n" + + "SELECT * from cte") + .ok(); + fixture.withSql("^WITH cte AS (\n" + + " select * from emp)^\n" + + "SELECT *\n" + + "from cte\n" + + "where empno = 1") + .fails(missingFilters("JOB")); + + fixture.withSql("WITH cte AS (\n" + + " select * from emp where ename = '1')^\n" + + "SELECT *\n" + + "from cte\n") + .ok(); + + fixture.withSql("WITH cte AS (\n" + + " select * from emp)^\n" + + "SELECT *\n" + + "from cte\n" + + "where ename = '1'") + .ok(); + fixture.withSql("WITH cte AS (\n" + " select * from emp)\n" + "SELECT *\n" diff --git a/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java b/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java index 01aeb5b8b2c..5ee0d113e97 100644 --- a/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java +++ b/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java @@ -1026,15 +1026,17 @@ public static class MockDynamicTable public static class MustFilterMockTable extends MockTable implements SemanticTable { private final Map fieldFilters; + private final List bypassFieldList; MustFilterMockTable(MockCatalogReader catalogReader, String catalogName, String schemaName, String name, boolean stream, boolean temporal, double rowCount, @Nullable ColumnResolver resolver, InitializerExpressionFactory initializerExpressionFactory, - Map fieldFilters) { + Map fieldFilters, List bypassFieldList) { super(catalogReader, catalogName, schemaName, name, stream, temporal, rowCount, resolver, initializerExpressionFactory); this.fieldFilters = ImmutableMap.copyOf(fieldFilters); + this.bypassFieldList = ImmutableList.copyOf(bypassFieldList); } /** Creates a MustFilterMockTable. */ @@ -1042,11 +1044,11 @@ public static MustFilterMockTable create(MockCatalogReader catalogReader, MockSchema schema, String name, boolean stream, double rowCount, @Nullable ColumnResolver resolver, InitializerExpressionFactory initializerExpressionFactory, - boolean temporal, Map fieldFilters) { + boolean temporal, Map fieldFilters, List bypassFieldList) { MustFilterMockTable table = new MustFilterMockTable(catalogReader, schema.getCatalogName(), schema.name, name, stream, temporal, rowCount, resolver, - initializerExpressionFactory, fieldFilters); + initializerExpressionFactory, fieldFilters, bypassFieldList); schema.addTable(name); return table; } @@ -1060,6 +1062,10 @@ public static MustFilterMockTable create(MockCatalogReader catalogReader, String columnName = columnList.get(column).getKey(); return fieldFilters.containsKey(columnName); } + + @Override public List bypassFieldList() { + return bypassFieldList; + } } /** Wrapper around a {@link MockTable}, giving it a {@link Table} interface. diff --git a/testkit/src/main/java/org/apache/calcite/test/catalog/MustFilterMockCatalogReader.java b/testkit/src/main/java/org/apache/calcite/test/catalog/MustFilterMockCatalogReader.java index 69cc3efaf76..222cba8c531 100644 --- a/testkit/src/main/java/org/apache/calcite/test/catalog/MustFilterMockCatalogReader.java +++ b/testkit/src/main/java/org/apache/calcite/test/catalog/MustFilterMockCatalogReader.java @@ -22,6 +22,7 @@ import org.apache.calcite.sql.validate.SqlValidatorCatalogReader; import org.apache.calcite.sql2rel.NullInitializerExpressionFactory; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; /** @@ -50,7 +51,7 @@ public static SqlValidatorCatalogReader create(RelDataTypeFactory typeFactory, MustFilterMockTable empTable = MustFilterMockTable.create(this, salesSchema, "EMP", false, 14, null, NullInitializerExpressionFactory.INSTANCE, - false, ImmutableMap.of("EMPNO", "10", "JOB", "JOB_1")); + false, ImmutableMap.of("EMPNO", "10", "JOB", "JOB_1"), ImmutableList.of(1)); final RelDataType integerType = typeFactory.createSqlType(SqlTypeName.INTEGER); @@ -75,7 +76,7 @@ public static SqlValidatorCatalogReader create(RelDataTypeFactory typeFactory, MustFilterMockTable deptTable = MustFilterMockTable.create(this, salesSchema, "DEPT", false, 14, null, NullInitializerExpressionFactory.INSTANCE, - false, ImmutableMap.of("NAME", "ACCOUNTING_DEPT")); + false, ImmutableMap.of("NAME", "ACCOUNTING_DEPT"), ImmutableList.of(0)); deptTable.addColumn("DEPTNO", integerType, true); deptTable.addColumn("NAME", varcharType); registerTable(deptTable); From 20847930f9264da4e2ff7983dc72b31a6b742720 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Tue, 12 Mar 2024 23:58:27 +0000 Subject: [PATCH 2/9] Add in additional test case --- .../org/apache/calcite/test/SqlValidatorTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java index bd550d05ed5..0102ce14a73 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java @@ -12450,6 +12450,21 @@ private void checkCustomColumnResolving(String table) { + " select * from emp where empno = 1)\n" + "where job = 'doctor'") .ok(); + fixture.withSql("select * from (\n" + + " ^select ename from emp where empno = 1^)") + .fails(missingFilters("JOB")); + + // //This test is failing, needs new logic handling. + // fixture.withSql("select * from (\n" + // + " select ename from emp where empno = 1)" + // + "where ename = '1'") + // .ok(); + + fixture.withSql("select * from (\n" + + " select empno, job from emp)\n" + + "where job = 'doctor' and empno = 1") + .ok(); + // Deceitful alias #1. Filter on 'j' is a filter on the underlying 'job'. fixture.withSql("select * from (\n" + " select job as j, ename as job\n" From 028c6cecbe426806ec41db4886f80fc09fcb6726 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Thu, 26 Sep 2024 16:55:22 -0700 Subject: [PATCH 3/9] Add in concept of remnant-must-filter fields to handle the case of a bypass-field being selected but not filtered on, a must-filter field not being selected (thus can no longer be defused), and filtering on the bypass field in the enclosing query, thus defusing the must-filter field Fix style --- .../sql/validate/AbstractNamespace.java | 9 +++ .../calcite/sql/validate/SemanticTable.java | 19 +++++- .../sql/validate/SqlValidatorImpl.java | 61 ++++++++++++++----- .../sql/validate/SqlValidatorNamespace.java | 5 ++ .../calcite/sql/validate/TableNamespace.java | 4 ++ .../apache/calcite/test/SqlValidatorTest.java | 13 ++-- .../test/catalog/MockCatalogReader.java | 9 +++ .../catalog/MustFilterMockCatalogReader.java | 2 + 8 files changed, 98 insertions(+), 24 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java index cd310bc0b1b..5287b52b48f 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java @@ -28,7 +28,9 @@ import org.checkerframework.checker.nullness.qual.Nullable; +import java.util.Collections; import java.util.List; +import java.util.Set; import static com.google.common.base.Preconditions.checkArgument; @@ -64,6 +66,8 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { protected ImmutableBitSet mustFilterBypassFields = ImmutableBitSet.of(); + protected Set remnantMustFilterFields = Collections.emptySet(); + protected final @Nullable SqlNode enclosingNode; //~ Constructors ----------------------------------------------------------- @@ -175,6 +179,11 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { return requireNonNull(mustFilterBypassFields, "mustFilterBypassFields (maybe validation is not complete?)"); } + + @Override public Set getRemnantMustFilterFields() { + return requireNonNull(remnantMustFilterFields, + "remnantMustFilterFields (maybe validation is not complete?"); + } @Override public SqlMonotonicity getMonotonicity(String columnName) { return SqlMonotonicity.NOT_MONOTONIC; } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java b/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java index 9f246d081f3..60eb5942e91 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java @@ -19,8 +19,10 @@ import org.checkerframework.checker.nullness.qual.Nullable; import java.util.List; +import java.util.Set; import static java.util.Collections.emptyList; +import static java.util.Collections.emptySet; /** * Extension to {@link SqlValidatorTable} with extra, optional metadata. @@ -49,5 +51,20 @@ default boolean mustFilter(int column) { return getFilter(column) != null; } - default List bypassFieldList() { return emptyList(); } + /** + * Returns a list of column ordinals (0-based) of fields that defuse + * must-filter columns when filtered on. + */ + default List bypassFieldList() { + return emptyList(); + } + + /** + * Returns a list of must-filter qualifieds that can only be defused by a selected bypass-field + * from the same table. The qualified was neither filtered on nor selected, and hence can no + * longer be defused by itself. + */ + default Set remnantMustFilterFields() { + return emptySet(); + } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java index ee3aef3d77b..6a917ca84c4 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java @@ -160,6 +160,7 @@ import java.util.function.Supplier; import java.util.function.UnaryOperator; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import static com.google.common.base.Preconditions.checkArgument; @@ -177,6 +178,7 @@ import static org.apache.calcite.util.Util.first; import static java.util.Collections.emptyList; +import static java.util.Collections.emptySet; import static java.util.Objects.requireNonNull; /** @@ -1217,12 +1219,21 @@ protected void validateNamespace(final SqlValidatorNamespace namespace, // fields; these are neutralized if the consuming query filters on them. final ImmutableBitSet mustFilterFields = namespace.getMustFilterFields(); - if (!mustFilterFields.isEmpty()) { + // Remnant must filter fields are fields that are not selected and cannot + // be defused unlessa bypass field defuses it. + // A top-level namespace must not have any remnant-must-filter fields. + final Set remnantMustFilterFields = namespace.getRemnantMustFilterFields(); + if (!mustFilterFields.isEmpty() || !remnantMustFilterFields.isEmpty()) { + Stream mustFilterStream = + StreamSupport.stream(mustFilterFields.spliterator(), false) + .map(namespace.getRowType().getFieldNames()::get); + Stream remnantStream = + StreamSupport.stream(remnantMustFilterFields.spliterator(), false) + .map(q -> q.suffix().get(0)); + // Set of field names, sorted alphabetically for determinism. - Set fieldNameSet = - StreamSupport.stream(mustFilterFields.spliterator(), false) - .map(namespace.getRowType().getFieldNames()::get) - .collect(Collectors.toCollection(TreeSet::new)); + Set fieldNameSet = Stream.concat(mustFilterStream, remnantStream) + .collect(Collectors.toCollection(TreeSet::new)); throw newValidationError(node, RESOURCE.mustFilterFieldsMissing(fieldNameSet.toString())); } @@ -4101,28 +4112,33 @@ protected void validateSelect( // Deduce which columns must be filtered. ns.mustFilterFields = ImmutableBitSet.of(); ns.mustFilterBypassFields = ImmutableBitSet.of(); + ns.remnantMustFilterFields = emptySet(); if (from != null) { + final BitSet projectedNonFilteredBypassField = new BitSet(); final Set qualifieds = new LinkedHashSet<>(); final Set bypassQualifieds = new LinkedHashSet<>(); + final Set remnantQualifieds = new LinkedHashSet<>(); for (ScopeChild child : fromScope.children) { final List fieldNames = child.namespace.getRowType().getFieldNames(); toQualifieds(child.namespace.getMustFilterFields(), qualifieds, fromScope, child, fieldNames); - toQualifieds(child.namespace.getMustFilterBypassFields(), bypassQualifieds, fromScope, child, fieldNames); + toQualifieds(child.namespace.getMustFilterBypassFields(), bypassQualifieds, fromScope, + child, fieldNames); + remnantQualifieds.addAll(child.namespace.getRemnantMustFilterFields()); } - if (!qualifieds.isEmpty()) { + if (!qualifieds.isEmpty() || !bypassQualifieds.isEmpty()) { if (select.getWhere() != null) { forEachQualified(select.getWhere(), getWhereScope(select), qualifieds::remove); purgeForBypassFields(select.getWhere(), getWhereScope(select), - qualifieds, bypassQualifieds); + qualifieds, bypassQualifieds, remnantQualifieds); } if (select.getHaving() != null) { forEachQualified(select.getHaving(), getHavingScope(select), qualifieds::remove); purgeForBypassFields(select.getHaving(), getHavingScope(select), - qualifieds, bypassQualifieds); + qualifieds, bypassQualifieds, remnantQualifieds); } // Each of the must-filter fields identified must be returned as a @@ -4149,13 +4165,14 @@ protected void validateSelect( // SELECT item #i referenced a bypass column that was not filtered in the WHERE // or HAVING. It becomes a bypass column for our consumer. mustFilterBypassFields.set(i); + projectedNonFilteredBypassField.set(0); } } }); - // If there are must-filter fields that are not in the SELECT clause, - // this is an error. - if (!qualifieds.isEmpty()) { + // If there are must-filter fields that are not in the SELECT clause and there were no + // bypass-fields on this table, this is an error. + if (!qualifieds.isEmpty() && !projectedNonFilteredBypassField.get(0)) { throw newValidationError(select, RESOURCE.mustFilterFieldsMissing( qualifieds.stream() @@ -4165,6 +4182,10 @@ protected void validateSelect( } ns.mustFilterFields = ImmutableBitSet.fromBitSet(mustFilterFields); ns.mustFilterBypassFields = ImmutableBitSet.fromBitSet(mustFilterBypassFields); + // Remaining must-filter fields can be defused by a bypass-field, + // so we pass this to the consumer. + ns.remnantMustFilterFields = Stream.of(remnantQualifieds, qualifieds) + .flatMap(Set::stream).collect(Collectors.toSet()); } } @@ -4197,10 +4218,12 @@ private static void forEachQualified(SqlNode node, SqlValidatorScope scope, } /* If the supplied SqlNode when fully qualified is in the set of bypassQualifieds, then we - remove all entries in the qualifieds set that belong to the same table identifier. + remove all entries in the qualifieds set as well as remnantMustFilterFields + that belong to the same table identifier. */ private static void purgeForBypassFields(SqlNode node, SqlValidatorScope scope, - Set qualifieds, Set bypassQualifieds) { + Set qualifieds, Set bypassQualifieds, + Set remnantMustFilterFields) { node.accept(new SqlBasicVisitor() { @Override public Void visit(SqlIdentifier id) { final SqlQualified qualified = scope.fullyQualify(id); @@ -4209,7 +4232,13 @@ private static void purgeForBypassFields(SqlNode node, SqlValidatorScope scope, Collection sameIdentifier = qualifieds.stream() .filter(q -> qualifiedMatchesIdentifier(q, qualified)) .collect(Collectors.toList()); - sameIdentifier.forEach (qualifieds::remove); + sameIdentifier.forEach(qualifieds::remove); + + // Clear all the remnant must-filter qualifieds from the same table identifier + Collection sameIdentifier_ = remnantMustFilterFields.stream() + .filter(q -> qualifiedMatchesIdentifier(q, qualified)) + .collect(Collectors.toList()); + sameIdentifier_.forEach(remnantMustFilterFields::remove); } return null; } @@ -4217,7 +4246,7 @@ private static void purgeForBypassFields(SqlNode node, SqlValidatorScope scope, } private static void toQualifieds(ImmutableBitSet fields, Set qualifiedSet, - SelectScope fromScope, ScopeChild child, List fieldNames){ + SelectScope fromScope, ScopeChild child, List fieldNames) { fields.forEachInt(i -> qualifiedSet.add( SqlQualified.create(fromScope, 1, child.namespace, diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java index cad5c2f1a99..696d6857d7c 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java @@ -25,7 +25,9 @@ import org.checkerframework.checker.nullness.qual.Nullable; import org.checkerframework.dataflow.qual.Pure; +import java.util.Collections; import java.util.List; +import java.util.Set; /** * A namespace describes the relation returned by a section of a SQL query. @@ -223,4 +225,7 @@ default ImmutableBitSet getMustFilterFields() { default ImmutableBitSet getMustFilterBypassFields() { return ImmutableBitSet.of(); } + default Set getRemnantMustFilterFields() { + return Collections.emptySet(); + } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java index abd6ba8a755..6a87aab22c3 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java @@ -34,6 +34,7 @@ import org.checkerframework.checker.nullness.qual.Nullable; +import java.util.HashSet; import java.util.List; import java.util.Map; @@ -72,6 +73,9 @@ private TableNamespace(SqlValidatorImpl validator, SqlValidatorTable table, .ifPresent(semanticTable -> this.mustFilterBypassFields = semanticTable.bypassFieldList() .stream().collect(toImmutableBitSet())); + table.maybeUnwrap(SemanticTable.class) + .ifPresent(semanticTable -> + this.remnantMustFilterFields = new HashSet<>(semanticTable.remnantMustFilterFields())); if (extendedFields.isEmpty()) { return table.getRowType(); diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java index 0102ce14a73..aff9cd181c8 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java @@ -12450,15 +12450,14 @@ private void checkCustomColumnResolving(String table) { + " select * from emp where empno = 1)\n" + "where job = 'doctor'") .ok(); - fixture.withSql("select * from (\n" - + " ^select ename from emp where empno = 1^)") + fixture.withSql("^select * from (\n" + + " select ename from emp where empno = 1)^") .fails(missingFilters("JOB")); - // //This test is failing, needs new logic handling. - // fixture.withSql("select * from (\n" - // + " select ename from emp where empno = 1)" - // + "where ename = '1'") - // .ok(); + fixture.withSql("select * from (\n" + + " select job, ename from emp where empno = 1)" + + "where ename = '1'") + .ok(); fixture.withSql("select * from (\n" + " select empno, job from emp)\n" diff --git a/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java b/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java index 5ee0d113e97..8811ce5cb95 100644 --- a/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java +++ b/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java @@ -79,6 +79,7 @@ import org.apache.calcite.sql.validate.SqlMonotonicity; import org.apache.calcite.sql.validate.SqlNameMatcher; import org.apache.calcite.sql.validate.SqlNameMatchers; +import org.apache.calcite.sql.validate.SqlQualified; import org.apache.calcite.sql.validate.SqlValidatorCatalogReader; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.sql2rel.InitializerExpressionFactory; @@ -102,6 +103,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -1028,6 +1030,8 @@ public static class MustFilterMockTable private final Map fieldFilters; private final List bypassFieldList; + private final Set remnantFieldFilters; + MustFilterMockTable(MockCatalogReader catalogReader, String catalogName, String schemaName, String name, boolean stream, boolean temporal, double rowCount, @Nullable ColumnResolver resolver, @@ -1037,6 +1041,7 @@ public static class MustFilterMockTable rowCount, resolver, initializerExpressionFactory); this.fieldFilters = ImmutableMap.copyOf(fieldFilters); this.bypassFieldList = ImmutableList.copyOf(bypassFieldList); + this.remnantFieldFilters = Collections.emptySet(); } /** Creates a MustFilterMockTable. */ @@ -1066,6 +1071,10 @@ public static MustFilterMockTable create(MockCatalogReader catalogReader, @Override public List bypassFieldList() { return bypassFieldList; } + + @Override public Set remnantMustFilterFields() { + return remnantFieldFilters; + } } /** Wrapper around a {@link MockTable}, giving it a {@link Table} interface. diff --git a/testkit/src/main/java/org/apache/calcite/test/catalog/MustFilterMockCatalogReader.java b/testkit/src/main/java/org/apache/calcite/test/catalog/MustFilterMockCatalogReader.java index 222cba8c531..ef9f71a616b 100644 --- a/testkit/src/main/java/org/apache/calcite/test/catalog/MustFilterMockCatalogReader.java +++ b/testkit/src/main/java/org/apache/calcite/test/catalog/MustFilterMockCatalogReader.java @@ -48,6 +48,7 @@ public static SqlValidatorCatalogReader create(RelDataTypeFactory typeFactory, registerSchema(salesSchema); // Register "EMP" table. Must-filter fields are "EMPNO", "JOB". + // Bypass field of column (1): ENAME. MustFilterMockTable empTable = MustFilterMockTable.create(this, salesSchema, "EMP", false, 14, null, NullInitializerExpressionFactory.INSTANCE, @@ -73,6 +74,7 @@ public static SqlValidatorCatalogReader create(RelDataTypeFactory typeFactory, registerTable(empTable); // Register "DEPT" table. "NAME" is a must-filter field. + // Bypass field of column (0): DEPTNO. MustFilterMockTable deptTable = MustFilterMockTable.create(this, salesSchema, "DEPT", false, 14, null, NullInitializerExpressionFactory.INSTANCE, From 0dd34617c6f5dc8636f816c03c3809fbbc5fa3e7 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Thu, 17 Oct 2024 16:21:01 -0700 Subject: [PATCH 4/9] Address style concerns and add comments --- .../sql/validate/AbstractNamespace.java | 6 ++ .../calcite/sql/validate/SemanticTable.java | 5 +- .../sql/validate/SqlValidatorImpl.java | 26 +++++---- .../apache/calcite/test/SqlValidatorTest.java | 58 +++++++------------ .../test/catalog/MockCatalogReader.java | 4 +- 5 files changed, 45 insertions(+), 54 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java index 5287b52b48f..ae1803f7c33 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java @@ -64,8 +64,13 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { * should typically be re-assigned on validate. */ protected ImmutableBitSet mustFilterFields = ImmutableBitSet.of(); + /** Ordinals of fields that when filtered on, remove the requirement from mustFilterFields. + * Initially the empty set, but should typically be re-assigned on validate. */ protected ImmutableBitSet mustFilterBypassFields = ImmutableBitSet.of(); + /** A set of SqlQualifieds that carries over mustFilterFields that were not selected or filtered, + * but could still be defused by mustFilterBypassFields up until the top level SqlNode. + * Initially the empty set, but should typically be re-assigned on validate. */ protected Set remnantMustFilterFields = Collections.emptySet(); protected final @Nullable SqlNode enclosingNode; @@ -184,6 +189,7 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { return requireNonNull(remnantMustFilterFields, "remnantMustFilterFields (maybe validation is not complete?"); } + @Override public SqlMonotonicity getMonotonicity(String columnName) { return SqlMonotonicity.NOT_MONOTONIC; } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java b/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java index 60eb5942e91..4e9f65f251c 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java @@ -16,12 +16,13 @@ */ package org.apache.calcite.sql.validate; +import com.google.common.collect.ImmutableList; + import org.checkerframework.checker.nullness.qual.Nullable; import java.util.List; import java.util.Set; -import static java.util.Collections.emptyList; import static java.util.Collections.emptySet; /** @@ -56,7 +57,7 @@ default boolean mustFilter(int column) { * must-filter columns when filtered on. */ default List bypassFieldList() { - return emptyList(); + return ImmutableList.of(); } /** diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java index 6a917ca84c4..d0dce617afe 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java @@ -1220,19 +1220,20 @@ protected void validateNamespace(final SqlValidatorNamespace namespace, final ImmutableBitSet mustFilterFields = namespace.getMustFilterFields(); // Remnant must filter fields are fields that are not selected and cannot - // be defused unlessa bypass field defuses it. + // be defused unless a bypass field defuses it. // A top-level namespace must not have any remnant-must-filter fields. final Set remnantMustFilterFields = namespace.getRemnantMustFilterFields(); if (!mustFilterFields.isEmpty() || !remnantMustFilterFields.isEmpty()) { Stream mustFilterStream = - StreamSupport.stream(mustFilterFields.spliterator(), false) - .map(namespace.getRowType().getFieldNames()::get); + StreamSupport.stream(mustFilterFields.spliterator(), false) + .map(namespace.getRowType().getFieldNames()::get); Stream remnantStream = - StreamSupport.stream(remnantMustFilterFields.spliterator(), false) - .map(q -> q.suffix().get(0)); + StreamSupport.stream(remnantMustFilterFields.spliterator(), false) + .map(q -> q.suffix().get(0)); // Set of field names, sorted alphabetically for determinism. - Set fieldNameSet = Stream.concat(mustFilterStream, remnantStream) + Set fieldNameSet = + Stream.concat(mustFilterStream, remnantStream) .collect(Collectors.toCollection(TreeSet::new)); throw newValidationError(node, RESOURCE.mustFilterFieldsMissing(fieldNameSet.toString())); @@ -4204,8 +4205,10 @@ protected void validateSelect( } } - /** For each identifier in an expression, resolves it to a qualified name - * and calls the provided action. */ + /** + * For each identifier in an expression, resolves it to a qualified name + * and calls the provided action. + */ private static void forEachQualified(SqlNode node, SqlValidatorScope scope, Consumer consumer) { node.accept(new SqlBasicVisitor() { @@ -4217,9 +4220,9 @@ private static void forEachQualified(SqlNode node, SqlValidatorScope scope, }); } - /* If the supplied SqlNode when fully qualified is in the set of bypassQualifieds, then we - remove all entries in the qualifieds set as well as remnantMustFilterFields - that belong to the same table identifier. + /** + * Removes all entries from qualifieds and remnantMustFilterFields if the supplied SqlNode + * is a bypassField. */ private static void purgeForBypassFields(SqlNode node, SqlValidatorScope scope, Set qualifieds, Set bypassQualifieds, @@ -4255,6 +4258,7 @@ private static void toQualifieds(ImmutableBitSet fields, Set quali SqlParserPos.ZERO)))); } + private static boolean qualifiedMatchesIdentifier(SqlQualified q1, SqlQualified q2) { return q1.identifier.names.get(0).equals(q2.identifier.names.get(0)); } diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java index aff9cd181c8..8fc3dec03ca 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java @@ -12412,7 +12412,6 @@ private void checkCustomColumnResolving(String table) { .fails(missingFilters("JOB")); } - /** * Tests validation of must-filter columns with the inclusion of bypass fields. * @@ -12439,11 +12438,11 @@ private void checkCustomColumnResolving(String table) { + "where concat(emp.empno, ' ') = 'abc'^") .fails(missingFilters("JOB")); + // ENAME is a bypass field fixture.withSql("select *\n" + "from emp\n" + "where concat(emp.ename, ' ') = 'abc'^") .ok(); - // ENAME is a bypass field // SUBQUERIES fixture.withSql("select * from (\n" @@ -12453,12 +12452,10 @@ private void checkCustomColumnResolving(String table) { fixture.withSql("^select * from (\n" + " select ename from emp where empno = 1)^") .fails(missingFilters("JOB")); - fixture.withSql("select * from (\n" + " select job, ename from emp where empno = 1)" + "where ename = '1'") .ok(); - fixture.withSql("select * from (\n" + " select empno, job from emp)\n" + "where job = 'doctor' and empno = 1") @@ -12471,8 +12468,9 @@ private void checkCustomColumnResolving(String table) { + " where empno = 1)\n" + "where j = 'doctor'") .ok(); - // Deceitful alias #2. Filter on 'job' is a filter on the underlying - // 'slacker', so the underlying 'job' is missing a filter. + + // Deceitful alias #2. Filter on 'job' is a filter on the underlying + // 'slacker', so the underlying 'job' is missing a filter. fixture.withSql("^select * from (\n" + " select job as j, slacker as job\n" + " from emp\n" @@ -12499,28 +12497,27 @@ private void checkCustomColumnResolving(String table) { fixture.withSql("^select * from (\n" + " select * from emp where empno = 1)^") .fails(missingFilters("JOB")); + + // Query is valid because ENAME is a bypass field fixture.withSql("select * from (\n" + " select * from emp where ename = 1)^") .ok(); - // ENAME is a bypass field - fixture.withSql("^select * from (select * from `SALES`.`EMP`) as a1^ ") .fails(missingFilters("EMPNO", "JOB")); - fixture.withSql("select * from (select * from `SALES`.`EMP`) as a1 where ename = '1'^ ") .ok(); + // JOINs fixture.withSql("^select *\n" + "from emp\n" + "join dept on emp.deptno = dept.deptno^") .fails(missingFilters("EMPNO", "JOB", "NAME")); + // Query is invalid because ENAME is a bypass field for EMP table, but not the DEPT table. fixture.withSql("^select *\n" + "from emp\n" + "join dept on emp.deptno = dept.deptno where ename = '1'^") .fails(missingFilters("NAME")); - // ENAME is a bypass field for EMP table. - fixture.withSql("^select *\n" + "from emp\n" + "join dept on emp.deptno = dept.deptno\n" @@ -12547,13 +12544,14 @@ private void checkCustomColumnResolving(String table) { + "join `SALES`.emp a2 on a1.empno = a2.empno^") .fails(missingFilters("EMPNO", "EMPNO0", "JOB", "JOB0")); + // Query is invalid because filtering on a bypass field in a1 disables must-filter for a1, + // but a2 must-filters are still required. fixture.withSql("^select *\n" + "from `SALES`.emp a1\n" + "join `SALES`.emp a2 on a1.empno = a2.empno where a1.ename = '1'^") .fails(missingFilters("EMPNO0", "JOB0")); - // Filtering on a bypass field in a1 disables must-filter for a1, but a2 must-filters - // are still required. + // Query is invalid because here are two JOB columns but only one is filtered. fixture.withSql("^select *\n" + "from emp a1\n" + "join emp a2 on a1.empno = a2.empno\n" @@ -12561,8 +12559,6 @@ private void checkCustomColumnResolving(String table) { + "and a1.empno = 1\n" + "and a2.job = 'doctor'^") .fails(missingFilters("JOB")); - // There are two JOB columns but only one is filtered - fixture.withSql("select *\n" + "from emp a1\n" + "join emp a2 on a1.empno = a2.empno\n" @@ -12571,7 +12567,6 @@ private void checkCustomColumnResolving(String table) { + "and a2.job = 'doctor'^\n" + "and a1.ename = '1'") .ok(); - fixture.withSql("select *\n" + "from emp a1\n" + "join emp a2 on a1.empno = a2.empno\n" @@ -12586,15 +12581,14 @@ private void checkCustomColumnResolving(String table) { + " on a1.`EMPNO` = a2.`EMPNO`^") .fails(missingFilters("EMPNO", "EMPNO0", "JOB", "JOB0")); + // Query is invalid because filtering on a bypass field in a1 disables must-filter for a1, + // but a2 must-filters are still required. fixture.withSql("^select *\n" + " from (select * from `SALES`.`EMP`) as a1\n" + "join (select * from `SALES`.`EMP`) as a2\n" + " on a1.`EMPNO` = a2.`EMPNO`\n" + "where a1.ename = '1'^") .fails(missingFilters("EMPNO0", "JOB0")); - // Filtering on a bypass field in a1 disables must-filter for a1, but a2 must-filters - // are still required. - fixture.withSql("^select *\n" + " from (select * from `SALES`.`EMP` where `ENAME` = '1') as a1\n" + "join (select * from `SALES`.`EMP`) as a2\n" @@ -12608,13 +12602,12 @@ private void checkCustomColumnResolving(String table) { + "where emp.empno = 1^") .fails(missingFilters("JOB", "NAME")); + // Query is invalid because ENAME is bypass field for EMP, but not for DEPT. fixture.withSql("^select *\n" + "from emp\n" + "join dept using(deptno)\n" + "where emp.ename = '1'^") .fails(missingFilters("NAME")); - // ENAME is bypass field for EMP. - fixture.withSql("select *\n" + "from emp\n" + "join dept using(deptno)\n" @@ -12634,13 +12627,12 @@ private void checkCustomColumnResolving(String table) { + "group by deptno, name^") .fails(missingFilters("NAME")); + // Query is valid because DEPTNO is bypass field. fixture.withSql("select *\n" + "from dept\n" + "group by deptno, name\n" + "having deptno > '1'") .ok(); - // DEPTNO is bypass field - fixture.withSql("select name\n" + "from dept\n" + "group by name\n" @@ -12650,7 +12642,6 @@ private void checkCustomColumnResolving(String table) { + "from dept\n" + "group by name^ ") .fails(missingFilters("NAME")); - fixture.withSql("select sum(sal)\n" + "from emp\n" + "where empno > 10\n" @@ -12664,28 +12655,24 @@ private void checkCustomColumnResolving(String table) { + "group by empno\n" + "having sum(sal) > 100^") .fails(missingFilters("JOB")); - fixture.withSql("^select sum(sal)\n" + "from emp\n" + "where empno > 10\n" + "group by empno\n" + "having sum(sal) > 100^") .fails(missingFilters("JOB")); - fixture.withSql("select sum(sal), job\n" + "from emp\n" + "where empno > 10\n" + "group by job\n" + "having job = 'undertaker'") .ok(); - fixture.withSql("select sum(sal), ename\n" + "from emp\n" + "where empno > 10\n" + "group by empno, ename\n" + "having ename = '1'") .ok(); - fixture.withSql("select sum(sal)\n" + "from emp\n" + "where ename = '1'\n" @@ -12699,23 +12686,21 @@ private void checkCustomColumnResolving(String table) { + "SELECT * from cte") .fails(missingFilters("EMPNO", "JOB")); + // Query is valid because ENAME is a bypass field. fixture.withSql("WITH cte AS (\n" + " select * from emp where ename = '1' order by empno)^\n" + "SELECT * from cte") .ok(); - // ENAME is a bypass field + // Query is valid because ENAME is a bypass field. fixture.withSql("WITH cte AS (\n" + " select * from emp order by empno)^\n" + "SELECT * from cte where ename = '1'") .ok(); - // ENAME is a bypass field - fixture.withSql("^WITH cte AS (\n" + " select * from emp where empno = 1)^\n" + "SELECT * from cte") .fails(missingFilters("JOB")); - fixture.withSql("WITH cte AS (\n" + " select *\n" + " from emp\n" @@ -12729,20 +12714,17 @@ private void checkCustomColumnResolving(String table) { + "from cte\n" + "where empno = 1") .fails(missingFilters("JOB")); - fixture.withSql("WITH cte AS (\n" + " select * from emp where ename = '1')^\n" + "SELECT *\n" + "from cte\n") .ok(); - fixture.withSql("WITH cte AS (\n" + " select * from emp)^\n" + "SELECT *\n" + "from cte\n" + "where ename = '1'") .ok(); - fixture.withSql("WITH cte AS (\n" + " select * from emp)\n" + "SELECT *\n" @@ -12764,9 +12746,9 @@ private void checkCustomColumnResolving(String table) { + "and job = 'doctor'") .ok(); - // Filters are missing on EMPNO and JOB, but the error message only - // complains about JOB because EMPNO is in the SELECT clause, and could - // theoretically be filtered by an enclosing query. + // Query is invalid because filters are missing on EMPNO and JOB. + // The error message only complains about JOB because EMPNO is in the SELECT clause, + // and could theoretically be filtered by an enclosing query. fixture.withSql("^select empno\n" + "from emp^") .fails(missingFilters("JOB")); diff --git a/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java b/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java index 8811ce5cb95..7922890abab 100644 --- a/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java +++ b/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java @@ -103,7 +103,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -1029,7 +1028,6 @@ public static class MustFilterMockTable extends MockTable implements SemanticTable { private final Map fieldFilters; private final List bypassFieldList; - private final Set remnantFieldFilters; MustFilterMockTable(MockCatalogReader catalogReader, String catalogName, @@ -1041,7 +1039,7 @@ public static class MustFilterMockTable rowCount, resolver, initializerExpressionFactory); this.fieldFilters = ImmutableMap.copyOf(fieldFilters); this.bypassFieldList = ImmutableList.copyOf(bypassFieldList); - this.remnantFieldFilters = Collections.emptySet(); + this.remnantFieldFilters = ImmutableSet.of(); } /** Creates a MustFilterMockTable. */ From 1a90d100818e416e2d41ead6449a0f1e3d5b70b9 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Fri, 18 Oct 2024 11:27:09 -0700 Subject: [PATCH 5/9] Refactor 3 variables into MustFilterRequirements class --- .../sql/validate/AbstractNamespace.java | 36 ++++--------- .../sql/validate/IdentifierNamespace.java | 3 +- .../sql/validate/MustFilterRequirements.java | 51 +++++++++++++++++++ .../calcite/sql/validate/SemanticTable.java | 12 ----- .../sql/validate/SqlValidatorImpl.java | 40 +++++++-------- .../sql/validate/SqlValidatorNamespace.java | 17 ++----- .../calcite/sql/validate/TableNamespace.java | 27 +++++----- .../sql/validate/WithItemNamespace.java | 3 +- .../calcite/sql/validate/WithNamespace.java | 3 +- .../test/catalog/MockCatalogReader.java | 7 --- 10 files changed, 98 insertions(+), 101 deletions(-) create mode 100644 core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java diff --git a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java index ae1803f7c33..c78841a8b99 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java @@ -20,7 +20,6 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.apache.calcite.util.Util; @@ -28,9 +27,7 @@ import org.checkerframework.checker.nullness.qual.Nullable; -import java.util.Collections; import java.util.List; -import java.util.Set; import static com.google.common.base.Preconditions.checkArgument; @@ -60,18 +57,13 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { /** As {@link #rowType}, but not necessarily a struct. */ protected @Nullable RelDataType type; - /** Ordinals of fields that must be filtered. Initially the empty set, but - * should typically be re-assigned on validate. */ - protected ImmutableBitSet mustFilterFields = ImmutableBitSet.of(); - - /** Ordinals of fields that when filtered on, remove the requirement from mustFilterFields. - * Initially the empty set, but should typically be re-assigned on validate. */ - protected ImmutableBitSet mustFilterBypassFields = ImmutableBitSet.of(); - - /** A set of SqlQualifieds that carries over mustFilterFields that were not selected or filtered, - * but could still be defused by mustFilterBypassFields up until the top level SqlNode. - * Initially the empty set, but should typically be re-assigned on validate. */ - protected Set remnantMustFilterFields = Collections.emptySet(); + /** + * Class that holds information about what fields need to be filtered, what bypass-fields + * can defuse the errors if they are filtered on as an alternative, and a set used during + * validation internally. Initialized as empty object, but should typically be re-assiged + * on validate. + */ + protected MustFilterRequirements mustFilterRequirements = new MustFilterRequirements(); protected final @Nullable SqlNode enclosingNode; @@ -175,21 +167,11 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { return ImmutableList.of(); } - @Override public ImmutableBitSet getMustFilterFields() { - return requireNonNull(mustFilterFields, + @Override public MustFilterRequirements getMustFilterRequirements() { + return requireNonNull(mustFilterRequirements, "mustFilterFields (maybe validation is not complete?)"); } - @Override public ImmutableBitSet getMustFilterBypassFields() { - return requireNonNull(mustFilterBypassFields, - "mustFilterBypassFields (maybe validation is not complete?)"); - } - - @Override public Set getRemnantMustFilterFields() { - return requireNonNull(remnantMustFilterFields, - "remnantMustFilterFields (maybe validation is not complete?"); - } - @Override public SqlMonotonicity getMonotonicity(String columnName) { return SqlMonotonicity.NOT_MONOTONIC; } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java index 64164708b31..beeeac37223 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java @@ -210,8 +210,7 @@ private SqlValidatorNamespace resolveImpl(SqlIdentifier id) { } } - this.mustFilterFields = resolvedNamespace.getMustFilterFields(); - this.mustFilterBypassFields = resolvedNamespace.getMustFilterBypassFields(); + mustFilterRequirements = resolvedNamespace.getMustFilterRequirements(); RelDataType rowType = resolvedNamespace.getRowType(); if (extendList != null) { diff --git a/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java b/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java new file mode 100644 index 00000000000..90b425aa83e --- /dev/null +++ b/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.sql.validate; + +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableSet; + +import java.util.Set; + +/** + * Class that encapsulates filtering requirements when overloading SemanticTable. + * + *

mustFilterFields: the ordinals (in the row type) of the "must-filter" fields, + * fields that must be filtered in a query. + *

mustFilterBypassFields: the ordinals (in the row type) of the "bypass" fields, + * fields that can defuse validation errors on mustFilterFields if filtered on. + *

remnantMustFilterFields: Set of mustFilterField SqlQualifieds that have not been defused + * in the current query, but can still be defused by filtering on a bypass field in the + * enclosing query. + */ +public class MustFilterRequirements { + + ImmutableBitSet mustFilterFields; + ImmutableBitSet mustFilterBypassFields; + protected Set remnantMustFilterFields; + public MustFilterRequirements(ImmutableBitSet mustFilterFields, + ImmutableBitSet mustFilterBypassFields, Set remnantMustFilterFields) { + this.mustFilterFields = mustFilterFields; + this.mustFilterBypassFields = mustFilterBypassFields; + this.remnantMustFilterFields = remnantMustFilterFields; + } + + public MustFilterRequirements() { + this(ImmutableBitSet.of(), ImmutableBitSet.of(), ImmutableSet.of()); + } +} diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java b/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java index 4e9f65f251c..6b39af7065c 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SemanticTable.java @@ -21,9 +21,6 @@ import org.checkerframework.checker.nullness.qual.Nullable; import java.util.List; -import java.util.Set; - -import static java.util.Collections.emptySet; /** * Extension to {@link SqlValidatorTable} with extra, optional metadata. @@ -59,13 +56,4 @@ default boolean mustFilter(int column) { default List bypassFieldList() { return ImmutableList.of(); } - - /** - * Returns a list of must-filter qualifieds that can only be defused by a selected bypass-field - * from the same table. The qualified was neither filtered on nor selected, and hence can no - * longer be defused by itself. - */ - default Set remnantMustFilterFields() { - return emptySet(); - } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java index d0dce617afe..43c4b9bdf0e 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java @@ -178,7 +178,6 @@ import static org.apache.calcite.util.Util.first; import static java.util.Collections.emptyList; -import static java.util.Collections.emptySet; import static java.util.Objects.requireNonNull; /** @@ -1214,22 +1213,22 @@ protected void validateNamespace(final SqlValidatorNamespace namespace, RelDataType type = namespace.getType(); if (node == top) { - // A top-level namespace must not return any must-filter fields. + MustFilterRequirements mustFilterRequirements = namespace.getMustFilterRequirements(); + // Either of the following two conditions result in an invalid query: + // 1) A top-level namespace must not return any must-filter fields. // A non-top-level namespace (e.g. a subquery) may return must-filter // fields; these are neutralized if the consuming query filters on them. - final ImmutableBitSet mustFilterFields = - namespace.getMustFilterFields(); + // 2) A top-level namespace must not have any remnant-must-filter fields. // Remnant must filter fields are fields that are not selected and cannot // be defused unless a bypass field defuses it. - // A top-level namespace must not have any remnant-must-filter fields. - final Set remnantMustFilterFields = namespace.getRemnantMustFilterFields(); - if (!mustFilterFields.isEmpty() || !remnantMustFilterFields.isEmpty()) { + if (!mustFilterRequirements.mustFilterFields.isEmpty() + || !mustFilterRequirements.remnantMustFilterFields.isEmpty()) { Stream mustFilterStream = - StreamSupport.stream(mustFilterFields.spliterator(), false) - .map(namespace.getRowType().getFieldNames()::get); + StreamSupport.stream(mustFilterRequirements.mustFilterFields.spliterator(), false) + .map(namespace.getRowType().getFieldNames()::get); Stream remnantStream = - StreamSupport.stream(remnantMustFilterFields.spliterator(), false) - .map(q -> q.suffix().get(0)); + mustFilterRequirements.remnantMustFilterFields.stream() + .map(q -> q.suffix().get(0)); // Set of field names, sorted alphabetically for determinism. Set fieldNameSet = @@ -4111,9 +4110,7 @@ protected void validateSelect( ns.setType(rowType); // Deduce which columns must be filtered. - ns.mustFilterFields = ImmutableBitSet.of(); - ns.mustFilterBypassFields = ImmutableBitSet.of(); - ns.remnantMustFilterFields = emptySet(); + ns.mustFilterRequirements = new MustFilterRequirements(); if (from != null) { final BitSet projectedNonFilteredBypassField = new BitSet(); final Set qualifieds = new LinkedHashSet<>(); @@ -4122,11 +4119,12 @@ protected void validateSelect( for (ScopeChild child : fromScope.children) { final List fieldNames = child.namespace.getRowType().getFieldNames(); - toQualifieds(child.namespace.getMustFilterFields(), qualifieds, fromScope, child, + MustFilterRequirements mustFilterReqs = child.namespace.getMustFilterRequirements(); + toQualifieds(mustFilterReqs.mustFilterFields, qualifieds, fromScope, child, fieldNames); - toQualifieds(child.namespace.getMustFilterBypassFields(), bypassQualifieds, fromScope, + toQualifieds(mustFilterReqs.mustFilterBypassFields, bypassQualifieds, fromScope, child, fieldNames); - remnantQualifieds.addAll(child.namespace.getRemnantMustFilterFields()); + remnantQualifieds.addAll(mustFilterReqs.remnantMustFilterFields); } if (!qualifieds.isEmpty() || !bypassQualifieds.isEmpty()) { if (select.getWhere() != null) { @@ -4181,12 +4179,14 @@ protected void validateSelect( .collect(Collectors.toCollection(TreeSet::new)) .toString())); } - ns.mustFilterFields = ImmutableBitSet.fromBitSet(mustFilterFields); - ns.mustFilterBypassFields = ImmutableBitSet.fromBitSet(mustFilterBypassFields); // Remaining must-filter fields can be defused by a bypass-field, // so we pass this to the consumer. - ns.remnantMustFilterFields = Stream.of(remnantQualifieds, qualifieds) + Set remnantMustFilterFields = Stream.of(remnantQualifieds, qualifieds) .flatMap(Set::stream).collect(Collectors.toSet()); + ns.mustFilterRequirements = new MustFilterRequirements( + ImmutableBitSet.fromBitSet(mustFilterFields), + ImmutableBitSet.fromBitSet(mustFilterBypassFields), + remnantMustFilterFields); } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java index 696d6857d7c..00073abd68b 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java @@ -19,15 +19,12 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.checkerframework.checker.nullness.qual.Nullable; import org.checkerframework.dataflow.qual.Pure; -import java.util.Collections; import java.util.List; -import java.util.Set; /** * A namespace describes the relation returned by a section of a SQL query. @@ -216,16 +213,8 @@ default boolean fieldExists(String name) { */ boolean supportsModality(SqlModality modality); - /** Returns the ordinals (in the row type) of the "must-filter" fields, - * fields that that must be filtered in a query. */ - default ImmutableBitSet getMustFilterFields() { - return ImmutableBitSet.of(); - } - - default ImmutableBitSet getMustFilterBypassFields() { - return ImmutableBitSet.of(); - } - default Set getRemnantMustFilterFields() { - return Collections.emptySet(); + /** Returns a MustFilterRequirements object used during validation. */ + default MustFilterRequirements getMustFilterRequirements() { + return new MustFilterRequirements(); } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java index 6a87aab22c3..f21bd1c0404 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java @@ -61,22 +61,19 @@ private TableNamespace(SqlValidatorImpl validator, SqlValidatorTable table, } @Override protected RelDataType validateImpl(RelDataType targetRowType) { - this.mustFilterFields = ImmutableBitSet.of(); table.maybeUnwrap(SemanticTable.class) - .ifPresent(semanticTable -> - this.mustFilterFields = - table.getRowType().getFieldList().stream() - .map(RelDataTypeField::getIndex) - .filter(semanticTable::mustFilter) - .collect(toImmutableBitSet())); - table.maybeUnwrap(SemanticTable.class) - .ifPresent(semanticTable -> - this.mustFilterBypassFields = semanticTable.bypassFieldList() - .stream().collect(toImmutableBitSet())); - table.maybeUnwrap(SemanticTable.class) - .ifPresent(semanticTable -> - this.remnantMustFilterFields = new HashSet<>(semanticTable.remnantMustFilterFields())); - + .ifPresent(semanticTable -> { + ImmutableBitSet mustFilterFields = table.getRowType().getFieldList().stream() + .map(RelDataTypeField::getIndex) + .filter(semanticTable::mustFilter) + .collect(toImmutableBitSet()); + ImmutableBitSet bypassFieldList = semanticTable.bypassFieldList().stream() + .collect(toImmutableBitSet()); + // We pass in an empty set for remnantMustFilterFields here because it isn't exposed to + // SemanticTable and only mustFilterFields and bypassFieldList should be supplied. + this.mustFilterRequirements = + new MustFilterRequirements(mustFilterFields, bypassFieldList, new HashSet<>()); + }); if (extendedFields.isEmpty()) { return table.getRowType(); } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java index 31ec34a55b0..d1f42e741f0 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java @@ -41,8 +41,7 @@ class WithItemNamespace extends AbstractNamespace { final SqlValidatorNamespace childNs = validator.getNamespaceOrThrow(getQuery()); final RelDataType rowType = childNs.getRowTypeSansSystemColumns(); - mustFilterFields = childNs.getMustFilterFields(); - mustFilterBypassFields = childNs.getMustFilterBypassFields(); + mustFilterRequirements = childNs.getMustFilterRequirements(); SqlNodeList columnList = withItem.columnList; if (columnList == null) { return rowType; diff --git a/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java index 61b6c5a14c9..1cc0dcbc1d5 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java @@ -64,8 +64,7 @@ public class WithNamespace extends AbstractNamespace { validator.validateQuery(with.body, scope2, targetRowType); final RelDataType rowType = validator.getValidatedNodeType(with.body); validator.setValidatedNodeType(with, rowType); - mustFilterFields = bodyNamespace.getMustFilterFields(); - mustFilterBypassFields = bodyNamespace.getMustFilterBypassFields(); + mustFilterRequirements = bodyNamespace.getMustFilterRequirements(); return rowType; } diff --git a/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java b/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java index 7922890abab..5ee0d113e97 100644 --- a/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java +++ b/testkit/src/main/java/org/apache/calcite/test/catalog/MockCatalogReader.java @@ -79,7 +79,6 @@ import org.apache.calcite.sql.validate.SqlMonotonicity; import org.apache.calcite.sql.validate.SqlNameMatcher; import org.apache.calcite.sql.validate.SqlNameMatchers; -import org.apache.calcite.sql.validate.SqlQualified; import org.apache.calcite.sql.validate.SqlValidatorCatalogReader; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.sql2rel.InitializerExpressionFactory; @@ -1028,7 +1027,6 @@ public static class MustFilterMockTable extends MockTable implements SemanticTable { private final Map fieldFilters; private final List bypassFieldList; - private final Set remnantFieldFilters; MustFilterMockTable(MockCatalogReader catalogReader, String catalogName, String schemaName, String name, boolean stream, boolean temporal, @@ -1039,7 +1037,6 @@ public static class MustFilterMockTable rowCount, resolver, initializerExpressionFactory); this.fieldFilters = ImmutableMap.copyOf(fieldFilters); this.bypassFieldList = ImmutableList.copyOf(bypassFieldList); - this.remnantFieldFilters = ImmutableSet.of(); } /** Creates a MustFilterMockTable. */ @@ -1069,10 +1066,6 @@ public static MustFilterMockTable create(MockCatalogReader catalogReader, @Override public List bypassFieldList() { return bypassFieldList; } - - @Override public Set remnantMustFilterFields() { - return remnantFieldFilters; - } } /** Wrapper around a {@link MockTable}, giving it a {@link Table} interface. From 3fe4a5c0bedf08f87713cc06629f4ed7fea0aec4 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Fri, 18 Oct 2024 11:37:53 -0700 Subject: [PATCH 6/9] Move validation logic into helper function --- .../sql/validate/SqlValidatorImpl.java | 164 +++++++++--------- 1 file changed, 84 insertions(+), 80 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java index 43c4b9bdf0e..81067b14964 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java @@ -4109,86 +4109,7 @@ protected void validateSelect( validateSelectList(selectItems, select, targetRowType); ns.setType(rowType); - // Deduce which columns must be filtered. - ns.mustFilterRequirements = new MustFilterRequirements(); - if (from != null) { - final BitSet projectedNonFilteredBypassField = new BitSet(); - final Set qualifieds = new LinkedHashSet<>(); - final Set bypassQualifieds = new LinkedHashSet<>(); - final Set remnantQualifieds = new LinkedHashSet<>(); - for (ScopeChild child : fromScope.children) { - final List fieldNames = - child.namespace.getRowType().getFieldNames(); - MustFilterRequirements mustFilterReqs = child.namespace.getMustFilterRequirements(); - toQualifieds(mustFilterReqs.mustFilterFields, qualifieds, fromScope, child, - fieldNames); - toQualifieds(mustFilterReqs.mustFilterBypassFields, bypassQualifieds, fromScope, - child, fieldNames); - remnantQualifieds.addAll(mustFilterReqs.remnantMustFilterFields); - } - if (!qualifieds.isEmpty() || !bypassQualifieds.isEmpty()) { - if (select.getWhere() != null) { - forEachQualified(select.getWhere(), getWhereScope(select), - qualifieds::remove); - purgeForBypassFields(select.getWhere(), getWhereScope(select), - qualifieds, bypassQualifieds, remnantQualifieds); - } - if (select.getHaving() != null) { - forEachQualified(select.getHaving(), getHavingScope(select), - qualifieds::remove); - purgeForBypassFields(select.getHaving(), getHavingScope(select), - qualifieds, bypassQualifieds, remnantQualifieds); - } - - // Each of the must-filter fields identified must be returned as a - // SELECT item, which is then flagged as must-filter. - final BitSet mustFilterFields = new BitSet(); - // Each of the bypass fields identified must be returned as a - // SELECT item, which is then flagged as a bypass field for the consumer. - final BitSet mustFilterBypassFields = new BitSet(); - final List expandedSelectItems = - requireNonNull(fromScope.getExpandedSelectList(), - "expandedSelectList"); - forEach(expandedSelectItems, (selectItem, i) -> { - selectItem = stripAs(selectItem); - if (selectItem instanceof SqlIdentifier) { - SqlQualified qualified = - fromScope.fullyQualify((SqlIdentifier) selectItem); - if (qualifieds.remove(qualified)) { - // SELECT item #i referenced a must-filter column that was not - // filtered in the WHERE or HAVING. It becomes a must-filter - // column for our consumer. - mustFilterFields.set(i); - } - if (bypassQualifieds.remove(qualified)) { - // SELECT item #i referenced a bypass column that was not filtered in the WHERE - // or HAVING. It becomes a bypass column for our consumer. - mustFilterBypassFields.set(i); - projectedNonFilteredBypassField.set(0); - } - } - }); - - // If there are must-filter fields that are not in the SELECT clause and there were no - // bypass-fields on this table, this is an error. - if (!qualifieds.isEmpty() && !projectedNonFilteredBypassField.get(0)) { - throw newValidationError(select, - RESOURCE.mustFilterFieldsMissing( - qualifieds.stream() - .map(q -> q.suffix().get(0)) - .collect(Collectors.toCollection(TreeSet::new)) - .toString())); - } - // Remaining must-filter fields can be defused by a bypass-field, - // so we pass this to the consumer. - Set remnantMustFilterFields = Stream.of(remnantQualifieds, qualifieds) - .flatMap(Set::stream).collect(Collectors.toSet()); - ns.mustFilterRequirements = new MustFilterRequirements( - ImmutableBitSet.fromBitSet(mustFilterFields), - ImmutableBitSet.fromBitSet(mustFilterBypassFields), - remnantMustFilterFields); - } - } + validateMustFilterRequirements(select, ns); // Validate ORDER BY after we have set ns.rowType because in some // dialects you can refer to columns of the select list, e.g. @@ -4716,6 +4637,89 @@ protected void validateQualifyClause(SqlSelect select) { } } + protected void validateMustFilterRequirements(SqlSelect select, SelectNamespace ns) { + ns.mustFilterRequirements = new MustFilterRequirements(); + if (select.getFrom() != null) { + final SelectScope fromScope = (SelectScope) getFromScope(select); + final BitSet projectedNonFilteredBypassField = new BitSet(); + final Set qualifieds = new LinkedHashSet<>(); + final Set bypassQualifieds = new LinkedHashSet<>(); + final Set remnantQualifieds = new LinkedHashSet<>(); + for (ScopeChild child : fromScope.children) { + final List fieldNames = + child.namespace.getRowType().getFieldNames(); + MustFilterRequirements mustFilterReqs = child.namespace.getMustFilterRequirements(); + toQualifieds(mustFilterReqs.mustFilterFields, qualifieds, fromScope, child, + fieldNames); + toQualifieds(mustFilterReqs.mustFilterBypassFields, bypassQualifieds, fromScope, + child, fieldNames); + remnantQualifieds.addAll(mustFilterReqs.remnantMustFilterFields); + } + if (!qualifieds.isEmpty() || !bypassQualifieds.isEmpty()) { + if (select.getWhere() != null) { + forEachQualified(select.getWhere(), getWhereScope(select), + qualifieds::remove); + purgeForBypassFields(select.getWhere(), getWhereScope(select), + qualifieds, bypassQualifieds, remnantQualifieds); + } + if (select.getHaving() != null) { + forEachQualified(select.getHaving(), getHavingScope(select), + qualifieds::remove); + purgeForBypassFields(select.getHaving(), getHavingScope(select), + qualifieds, bypassQualifieds, remnantQualifieds); + } + + // Each of the must-filter fields identified must be returned as a + // SELECT item, which is then flagged as must-filter. + final BitSet mustFilterFields = new BitSet(); + // Each of the bypass fields identified must be returned as a + // SELECT item, which is then flagged as a bypass field for the consumer. + final BitSet mustFilterBypassFields = new BitSet(); + final List expandedSelectItems = + requireNonNull(fromScope.getExpandedSelectList(), + "expandedSelectList"); + forEach(expandedSelectItems, (selectItem, i) -> { + selectItem = stripAs(selectItem); + if (selectItem instanceof SqlIdentifier) { + SqlQualified qualified = + fromScope.fullyQualify((SqlIdentifier) selectItem); + if (qualifieds.remove(qualified)) { + // SELECT item #i referenced a must-filter column that was not + // filtered in the WHERE or HAVING. It becomes a must-filter + // column for our consumer. + mustFilterFields.set(i); + } + if (bypassQualifieds.remove(qualified)) { + // SELECT item #i referenced a bypass column that was not filtered in the WHERE + // or HAVING. It becomes a bypass column for our consumer. + mustFilterBypassFields.set(i); + projectedNonFilteredBypassField.set(0); + } + } + }); + + // If there are must-filter fields that are not in the SELECT clause and there were no + // bypass-fields on this table, this is an error. + if (!qualifieds.isEmpty() && !projectedNonFilteredBypassField.get(0)) { + throw newValidationError(select, + RESOURCE.mustFilterFieldsMissing( + qualifieds.stream() + .map(q -> q.suffix().get(0)) + .collect(Collectors.toCollection(TreeSet::new)) + .toString())); + } + // Remaining must-filter fields can be defused by a bypass-field, + // so we pass this to the consumer. + Set remnantMustFilterFields = Stream.of(remnantQualifieds, qualifieds) + .flatMap(Set::stream).collect(Collectors.toSet()); + ns.mustFilterRequirements = + new MustFilterRequirements(ImmutableBitSet.fromBitSet(mustFilterFields), + ImmutableBitSet.fromBitSet(mustFilterBypassFields), + remnantMustFilterFields); + } + } + } + @Override public void validateWith(SqlWith with, SqlValidatorScope scope) { final SqlValidatorNamespace namespace = getNamespaceOrThrow(with); validateNamespace(namespace, unknownType); From b0ec462ba144db00729e6fc024d819e3afede48a Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Fri, 18 Oct 2024 11:45:27 -0700 Subject: [PATCH 7/9] Change MustFilterRequirement type to ImmutableSet --- .../apache/calcite/sql/validate/MustFilterRequirements.java | 6 ++---- .../org/apache/calcite/sql/validate/SqlValidatorImpl.java | 5 +++-- .../org/apache/calcite/sql/validate/TableNamespace.java | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java b/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java index 90b425aa83e..bf915795d14 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java @@ -20,8 +20,6 @@ import com.google.common.collect.ImmutableSet; -import java.util.Set; - /** * Class that encapsulates filtering requirements when overloading SemanticTable. * @@ -37,9 +35,9 @@ public class MustFilterRequirements { ImmutableBitSet mustFilterFields; ImmutableBitSet mustFilterBypassFields; - protected Set remnantMustFilterFields; + protected ImmutableSet remnantMustFilterFields; public MustFilterRequirements(ImmutableBitSet mustFilterFields, - ImmutableBitSet mustFilterBypassFields, Set remnantMustFilterFields) { + ImmutableBitSet mustFilterBypassFields, ImmutableSet remnantMustFilterFields) { this.mustFilterFields = mustFilterFields; this.mustFilterBypassFields = mustFilterBypassFields; this.remnantMustFilterFields = remnantMustFilterFields; diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java index 81067b14964..551650ff05f 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java @@ -4710,8 +4710,9 @@ protected void validateMustFilterRequirements(SqlSelect select, SelectNamespace } // Remaining must-filter fields can be defused by a bypass-field, // so we pass this to the consumer. - Set remnantMustFilterFields = Stream.of(remnantQualifieds, qualifieds) - .flatMap(Set::stream).collect(Collectors.toSet()); + ImmutableSet remnantMustFilterFields = + Stream.of(remnantQualifieds, qualifieds) + .flatMap(Set::stream).collect(ImmutableSet.toImmutableSet()); ns.mustFilterRequirements = new MustFilterRequirements(ImmutableBitSet.fromBitSet(mustFilterFields), ImmutableBitSet.fromBitSet(mustFilterBypassFields), diff --git a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java index f21bd1c0404..e4fa86509e7 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java @@ -30,11 +30,11 @@ import org.apache.calcite.util.Util; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.checkerframework.checker.nullness.qual.Nullable; -import java.util.HashSet; import java.util.List; import java.util.Map; @@ -72,7 +72,7 @@ private TableNamespace(SqlValidatorImpl validator, SqlValidatorTable table, // We pass in an empty set for remnantMustFilterFields here because it isn't exposed to // SemanticTable and only mustFilterFields and bypassFieldList should be supplied. this.mustFilterRequirements = - new MustFilterRequirements(mustFilterFields, bypassFieldList, new HashSet<>()); + new MustFilterRequirements(mustFilterFields, bypassFieldList, ImmutableSet.of()); }); if (extendedFields.isEmpty()) { return table.getRowType(); From d1e6027ccd3949d671d966bccda03e9d98705d76 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Fri, 18 Oct 2024 11:58:18 -0700 Subject: [PATCH 8/9] Add Javadoc spacing for LintTest --- .../org/apache/calcite/sql/validate/MustFilterRequirements.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java b/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java index bf915795d14..4c751224422 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java @@ -25,8 +25,10 @@ * *

mustFilterFields: the ordinals (in the row type) of the "must-filter" fields, * fields that must be filtered in a query. + * *

mustFilterBypassFields: the ordinals (in the row type) of the "bypass" fields, * fields that can defuse validation errors on mustFilterFields if filtered on. + * *

remnantMustFilterFields: Set of mustFilterField SqlQualifieds that have not been defused * in the current query, but can still be defused by filtering on a bypass field in the * enclosing query. From 90b0f5b6baf87b8c82b0d027f39b5f8795a1f2b2 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Fri, 18 Oct 2024 14:12:14 -0700 Subject: [PATCH 9/9] Rename to FilterRequirement, make class Immutable, add javadoc with examples --- .../sql/validate/AbstractNamespace.java | 8 +- .../sql/validate/FilterRequirement.java | 118 ++++++++++++++++++ .../sql/validate/IdentifierNamespace.java | 2 +- .../sql/validate/MustFilterRequirements.java | 51 -------- .../sql/validate/SqlValidatorImpl.java | 30 ++--- .../sql/validate/SqlValidatorNamespace.java | 6 +- .../calcite/sql/validate/TableNamespace.java | 7 +- .../sql/validate/WithItemNamespace.java | 2 +- .../calcite/sql/validate/WithNamespace.java | 2 +- 9 files changed, 146 insertions(+), 80 deletions(-) create mode 100644 core/src/main/java/org/apache/calcite/sql/validate/FilterRequirement.java delete mode 100644 core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java diff --git a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java index c78841a8b99..b0eefb31d47 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/AbstractNamespace.java @@ -63,7 +63,7 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { * validation internally. Initialized as empty object, but should typically be re-assiged * on validate. */ - protected MustFilterRequirements mustFilterRequirements = new MustFilterRequirements(); + protected FilterRequirement filterRequirement = new FilterRequirement(); protected final @Nullable SqlNode enclosingNode; @@ -167,9 +167,9 @@ abstract class AbstractNamespace implements SqlValidatorNamespace { return ImmutableList.of(); } - @Override public MustFilterRequirements getMustFilterRequirements() { - return requireNonNull(mustFilterRequirements, - "mustFilterFields (maybe validation is not complete?)"); + @Override public FilterRequirement getFilterRequirement() { + return requireNonNull(filterRequirement, + "filterRequirement (maybe validation is not complete?)"); } @Override public SqlMonotonicity getMonotonicity(String columnName) { diff --git a/core/src/main/java/org/apache/calcite/sql/validate/FilterRequirement.java b/core/src/main/java/org/apache/calcite/sql/validate/FilterRequirement.java new file mode 100644 index 00000000000..22c1826e1a9 --- /dev/null +++ b/core/src/main/java/org/apache/calcite/sql/validate/FilterRequirement.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.sql.validate; + +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableSet; + +import java.util.Set; + +/** + * Class that encapsulates filtering requirements when overloading SemanticTable.
+ * + *

A few examples of the behavior:
+ * + *

Table t has a must-filter field f and bypass-fields b0 + * and b1.
+ * SQL: select f from t;
-> Errors because there's no filter on f.
+ * SQL: select * from (select f from t);
-> Errors at the inner query because + * there's no filter on f.
+ * SQL: select f from t where f = 1;
-> Valid because there is a filter on f.
+ * SQL: select * from (select f from t) where f = 1;
-> Valid because there is a + * filter on f.
+ * SQL: select f from t where b0 = 1;
-> Valid because there is a filter on + * bypass-field b0.
+ * + *

Some notes on remnantFilterFields.
+ * remnantFilterFields is used to identify whether the query should error + * at the top level query. It is populated with the filter-field value when a filter-field is not + * selected or filtered on, but a bypass-field for the table is selected. + * The remnantFilterFields are no longer accessible by the enclosing query and hence can no + * longer be defused by filtering on it; however, it can be defused if the bypass-field is + * filtered on, hence we need to keep track of it. + + * Example:
+ * Table t has a must-filter field f and bypass-fields b0 + * and b1.
+ * SQL: select b0, b1 from t;
+ * + *

This results in:
+ * filterFields:[]
+ * bypassFields:[b0, b1]
+ * remnantFilterFields: [f]
+ * -> Errors because it is a top level query and remnantFilterFields is not empty.
+ * + *

SQL: select * from (select b0, b1 from t) where b0 = 1;
+ * When unwrapping the inner query we get the same FilterRequirement as the previous example:
+ * filterFields:[]
+ * bypassFields:[b0, b1]
+ * remnantFilterFields: [f]
+ * When unwrapping the top level query, the filter on b0 defuses the remnantFilterField requirement + * of [f] because it originated from the same table, resulting in the following:
+ * filterFields:[]
+ * bypassFields:[b0, b1]
+ * remnantFilterFields: []
+ * -> Valid because remnantFilterFields is empty now. + */ +final class FilterRequirement { + + /** The ordinals (in the row type) of the "must-filter" fields, + * fields that must be filtered in a query.*/ + private final ImmutableBitSet filterFields; + /** The ordinals (in the row type) of the "bypass" fields, + * fields that can defuse validation errors on filterFields if filtered on. */ + private final ImmutableBitSet bypassFields; + /** Set of filterField SqlQualifieds that have not been defused + * in the current query, but can still be defused by filtering on a bypass field in the + * enclosing query.*/ + private final ImmutableSet remnantFilterFields; + + /** + * Creates a FilterRequirement. + * + * @param filterFields Ordinals of the "must-filter" fields. + * @param bypassFields Ordinals of the "bypass" fields. + * @param remnantFilterFields Filter fields that can no longer be filtered on, + * but can only be defused if a bypass field is filtered on. + */ + FilterRequirement(ImmutableBitSet filterFields, + ImmutableBitSet bypassFields, Set remnantFilterFields) { + this.filterFields = ImmutableBitSet.of(filterFields); + this.bypassFields = ImmutableBitSet.of(bypassFields); + this.remnantFilterFields = ImmutableSet.copyOf(remnantFilterFields); + } + + /** Creates an empty FilterRequirement. */ + FilterRequirement() { + this(ImmutableBitSet.of(), ImmutableBitSet.of(), ImmutableSet.of()); + } + /** Returns filterFields. */ + public ImmutableBitSet getFilterFields() { + return filterFields; + } + + /** Returns bypassFields. */ + public ImmutableBitSet getBypassFields() { + return bypassFields; + } + + /** Returns remnantFilterFields. */ + public ImmutableSet getRemnantFilterFields() { + return remnantFilterFields; + } +} diff --git a/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java index beeeac37223..9291752b90a 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/IdentifierNamespace.java @@ -210,7 +210,7 @@ private SqlValidatorNamespace resolveImpl(SqlIdentifier id) { } } - mustFilterRequirements = resolvedNamespace.getMustFilterRequirements(); + filterRequirement = resolvedNamespace.getFilterRequirement(); RelDataType rowType = resolvedNamespace.getRowType(); if (extendList != null) { diff --git a/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java b/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java deleted file mode 100644 index 4c751224422..00000000000 --- a/core/src/main/java/org/apache/calcite/sql/validate/MustFilterRequirements.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to you under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.calcite.sql.validate; - -import org.apache.calcite.util.ImmutableBitSet; - -import com.google.common.collect.ImmutableSet; - -/** - * Class that encapsulates filtering requirements when overloading SemanticTable. - * - *

mustFilterFields: the ordinals (in the row type) of the "must-filter" fields, - * fields that must be filtered in a query. - * - *

mustFilterBypassFields: the ordinals (in the row type) of the "bypass" fields, - * fields that can defuse validation errors on mustFilterFields if filtered on. - * - *

remnantMustFilterFields: Set of mustFilterField SqlQualifieds that have not been defused - * in the current query, but can still be defused by filtering on a bypass field in the - * enclosing query. - */ -public class MustFilterRequirements { - - ImmutableBitSet mustFilterFields; - ImmutableBitSet mustFilterBypassFields; - protected ImmutableSet remnantMustFilterFields; - public MustFilterRequirements(ImmutableBitSet mustFilterFields, - ImmutableBitSet mustFilterBypassFields, ImmutableSet remnantMustFilterFields) { - this.mustFilterFields = mustFilterFields; - this.mustFilterBypassFields = mustFilterBypassFields; - this.remnantMustFilterFields = remnantMustFilterFields; - } - - public MustFilterRequirements() { - this(ImmutableBitSet.of(), ImmutableBitSet.of(), ImmutableSet.of()); - } -} diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java index 551650ff05f..cb08493b247 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java @@ -1213,7 +1213,7 @@ protected void validateNamespace(final SqlValidatorNamespace namespace, RelDataType type = namespace.getType(); if (node == top) { - MustFilterRequirements mustFilterRequirements = namespace.getMustFilterRequirements(); + FilterRequirement filterRequirement = namespace.getFilterRequirement(); // Either of the following two conditions result in an invalid query: // 1) A top-level namespace must not return any must-filter fields. // A non-top-level namespace (e.g. a subquery) may return must-filter @@ -1221,13 +1221,13 @@ protected void validateNamespace(final SqlValidatorNamespace namespace, // 2) A top-level namespace must not have any remnant-must-filter fields. // Remnant must filter fields are fields that are not selected and cannot // be defused unless a bypass field defuses it. - if (!mustFilterRequirements.mustFilterFields.isEmpty() - || !mustFilterRequirements.remnantMustFilterFields.isEmpty()) { + if (!filterRequirement.getFilterFields().isEmpty() + || !filterRequirement.getRemnantFilterFields().isEmpty()) { Stream mustFilterStream = - StreamSupport.stream(mustFilterRequirements.mustFilterFields.spliterator(), false) + StreamSupport.stream(filterRequirement.getFilterFields().spliterator(), false) .map(namespace.getRowType().getFieldNames()::get); Stream remnantStream = - mustFilterRequirements.remnantMustFilterFields.stream() + filterRequirement.getRemnantFilterFields().stream() .map(q -> q.suffix().get(0)); // Set of field names, sorted alphabetically for determinism. @@ -4638,7 +4638,7 @@ protected void validateQualifyClause(SqlSelect select) { } protected void validateMustFilterRequirements(SqlSelect select, SelectNamespace ns) { - ns.mustFilterRequirements = new MustFilterRequirements(); + ns.filterRequirement = new FilterRequirement(); if (select.getFrom() != null) { final SelectScope fromScope = (SelectScope) getFromScope(select); final BitSet projectedNonFilteredBypassField = new BitSet(); @@ -4648,12 +4648,12 @@ protected void validateMustFilterRequirements(SqlSelect select, SelectNamespace for (ScopeChild child : fromScope.children) { final List fieldNames = child.namespace.getRowType().getFieldNames(); - MustFilterRequirements mustFilterReqs = child.namespace.getMustFilterRequirements(); - toQualifieds(mustFilterReqs.mustFilterFields, qualifieds, fromScope, child, + FilterRequirement mustFilterReq = child.namespace.getFilterRequirement(); + toQualifieds(mustFilterReq.getFilterFields(), qualifieds, fromScope, child, fieldNames); - toQualifieds(mustFilterReqs.mustFilterBypassFields, bypassQualifieds, fromScope, + toQualifieds(mustFilterReq.getBypassFields(), bypassQualifieds, fromScope, child, fieldNames); - remnantQualifieds.addAll(mustFilterReqs.remnantMustFilterFields); + remnantQualifieds.addAll(mustFilterReq.getRemnantFilterFields()); } if (!qualifieds.isEmpty() || !bypassQualifieds.isEmpty()) { if (select.getWhere() != null) { @@ -4712,11 +4712,11 @@ protected void validateMustFilterRequirements(SqlSelect select, SelectNamespace // so we pass this to the consumer. ImmutableSet remnantMustFilterFields = Stream.of(remnantQualifieds, qualifieds) - .flatMap(Set::stream).collect(ImmutableSet.toImmutableSet()); - ns.mustFilterRequirements = - new MustFilterRequirements(ImmutableBitSet.fromBitSet(mustFilterFields), - ImmutableBitSet.fromBitSet(mustFilterBypassFields), - remnantMustFilterFields); + .flatMap(Set::stream).collect(ImmutableSet.toImmutableSet()); + ns.filterRequirement = + new FilterRequirement(ImmutableBitSet.fromBitSet(mustFilterFields), + ImmutableBitSet.fromBitSet(mustFilterBypassFields), + remnantMustFilterFields); } } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java index 00073abd68b..77679237fd6 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorNamespace.java @@ -213,8 +213,8 @@ default boolean fieldExists(String name) { */ boolean supportsModality(SqlModality modality); - /** Returns a MustFilterRequirements object used during validation. */ - default MustFilterRequirements getMustFilterRequirements() { - return new MustFilterRequirements(); + /** Returns a FilterRequirement object used during validation. */ + default FilterRequirement getFilterRequirement() { + return new FilterRequirement(); } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java index e4fa86509e7..7cbffb3d100 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/TableNamespace.java @@ -67,12 +67,11 @@ private TableNamespace(SqlValidatorImpl validator, SqlValidatorTable table, .map(RelDataTypeField::getIndex) .filter(semanticTable::mustFilter) .collect(toImmutableBitSet()); - ImmutableBitSet bypassFieldList = semanticTable.bypassFieldList().stream() - .collect(toImmutableBitSet()); + ImmutableBitSet bypassFieldList = ImmutableBitSet.of(semanticTable.bypassFieldList()); // We pass in an empty set for remnantMustFilterFields here because it isn't exposed to // SemanticTable and only mustFilterFields and bypassFieldList should be supplied. - this.mustFilterRequirements = - new MustFilterRequirements(mustFilterFields, bypassFieldList, ImmutableSet.of()); + this.filterRequirement = + new FilterRequirement(mustFilterFields, bypassFieldList, ImmutableSet.of()); }); if (extendedFields.isEmpty()) { return table.getRowType(); diff --git a/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java index d1f42e741f0..dc41d92d3c8 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/WithItemNamespace.java @@ -41,7 +41,7 @@ class WithItemNamespace extends AbstractNamespace { final SqlValidatorNamespace childNs = validator.getNamespaceOrThrow(getQuery()); final RelDataType rowType = childNs.getRowTypeSansSystemColumns(); - mustFilterRequirements = childNs.getMustFilterRequirements(); + filterRequirement = childNs.getFilterRequirement(); SqlNodeList columnList = withItem.columnList; if (columnList == null) { return rowType; diff --git a/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java b/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java index 1cc0dcbc1d5..286b91d3cce 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/WithNamespace.java @@ -64,7 +64,7 @@ public class WithNamespace extends AbstractNamespace { validator.validateQuery(with.body, scope2, targetRowType); final RelDataType rowType = validator.getValidatedNodeType(with.body); validator.setValidatedNodeType(with, rowType); - mustFilterRequirements = bodyNamespace.getMustFilterRequirements(); + filterRequirement = bodyNamespace.getFilterRequirement(); return rowType; }