diff --git a/README.md b/README.md index 26c8376a..d43fa886 100644 --- a/README.md +++ b/README.md @@ -606,6 +606,38 @@ models: where: "num_orders > 0" ``` +### functional_dependency ([source](macros/generic_tests/functional_dependency.sql)) + +Asserts that one or more columns (the “determinants”) functionally determine another column (the “dependent”). For each unique combination of the determinant columns, there should be exactly one distinct value in the dependent column. If any combination of determinant columns maps to multiple dependent values, the test fails. + +Provide [a `where` argument](https://docs.getdbt.com/reference/resource-configs/where) to filter to specific records only (useful for partial checks). + +**Usage:** + +```yaml +version: 2 + +models: + - name: model_name + columns: + - name: col_a + - name: col_b + - name: col_y + tests: + - dbt_utils.functional_dependency: + determinants: + - col_a + - col_b + dependent: col_y + # Optional filtering + config: + where: "active = true" +``` + +In this example, `(col_a, col_b)` together determine `col_y`. If any `(col_a, col_b)` pair is associated with more than one distinct `col_y`, the test fails. If you only need a single column as the determinant, simply pass one item in the `determinants` list. + +Because the `where` clause uses the standard [dbt `config`](https://docs.getdbt.com/reference/configs-and-properties) pattern, you can further customize the scope of rows evaluated by this test (e.g., checking the dependency only for recent records). + ---- ### Grouping in tests diff --git a/docker-compose.yml b/docker-compose.yml index 6957ffe8..8fe2b040 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,9 @@ version: "3.7" services: postgres: - image: cimg/postgres:9.6 + image: cimg/postgres:13.19 environment: - POSTGRES_USER=root + - POSTGRES_DB=dbt_utils_test ports: - "5432:5432" diff --git a/integration_tests/data/schema_tests/data_test_functional_dependency_fail.csv b/integration_tests/data/schema_tests/data_test_functional_dependency_fail.csv new file mode 100644 index 00000000..5e85815f --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_functional_dependency_fail.csv @@ -0,0 +1,5 @@ +idx,col_a,col_b,col_y +1,1,1,X +2,1,1,Y +3,2,1,X +4,2,1,X \ No newline at end of file diff --git a/integration_tests/data/schema_tests/data_test_functional_dependency_pass.csv b/integration_tests/data/schema_tests/data_test_functional_dependency_pass.csv new file mode 100644 index 00000000..89739f1d --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_functional_dependency_pass.csv @@ -0,0 +1,5 @@ +col_a,col_b,col_y +1,1,X +1,2,X +2,1,X +2,2,Y diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml index b12e3c7f..8ac5cfc1 100644 --- a/integration_tests/models/generic_tests/schema.yml +++ b/integration_tests/models/generic_tests/schema.yml @@ -197,6 +197,24 @@ seeds: error_if: "<1" #sneaky way to ensure that the test is returning failing rows warn_if: "<0" + - name: data_test_functional_dependency_pass + data_tests: + - dbt_utils.functional_dependency: + determinants: + - col_a + - col_b + dependent: col_y + + - name: data_test_functional_dependency_fail + data_tests: + - dbt_utils.functional_dependency: + determinants: + - col_a + - col_b + dependent: col_y + error_if: "<1" #sneaky way to ensure that the test is returning failing rows + warn_if: "<0" + models: - name: recency_time_included data_tests: @@ -261,3 +279,4 @@ models: compare_model: ref('data_test_equality_a') exclude_columns: - col_c + diff --git a/macros/generic_tests/functional_dependency.sql b/macros/generic_tests/functional_dependency.sql new file mode 100644 index 00000000..cf9059d5 --- /dev/null +++ b/macros/generic_tests/functional_dependency.sql @@ -0,0 +1,32 @@ +{% test functional_dependency(model, determinants, dependent, where_clause=None) %} + {{ return(adapter.dispatch('test_functional_dependency', 'dbt_utils')(model, determinants, dependent, where_clause)) }} +{% endtest %} + +{% macro default__test_functional_dependency(model, determinants, dependent, where_clause=None) %} + +with filtered as ( + select * + from {{ model }} + {% if where_clause %} + where {{ where_clause }} + {% endif %} +), + +violations as ( + select + {% for col in determinants %} + {{ col }}{% if not loop.last %}, {% endif %} + {% endfor %}, + count(distinct {{ dependent }}) as distinct_dependent_count + from filtered + group by + {% for col in determinants %} + {{ col }}{% if not loop.last %}, {% endif %} + {% endfor %} + having count(distinct {{ dependent }}) > 1 +) + +select * +from violations + +{% endmacro %}