Skip to content

Commit 3cfacc1

Browse files
MyroslavLevchykMyroslavLevchyk
MyroslavLevchyk
authored and
MyroslavLevchyk
committed
feat: merge aws and azure databricks runtime modules
1 parent 2be3dfc commit 3cfacc1

11 files changed

+963
-3
lines changed

README.md

+236-3
Large diffs are not rendered by default.

cluster.tf

+192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
locals {
2+
spark_conf_single_node = var.cloud_name == "azure" ? {
3+
"spark.master" = "local[*]",
4+
"spark.databricks.cluster.profile" = "singleNode"
5+
} : {}
6+
7+
default_node_type_ids = {
8+
azure_node_type_id = "Standard_D4ds_v5"
9+
aws_node_type_id = "m5d.large"
10+
# gcp_node_type_id = "gcp-default-node-type-id"
11+
}
12+
}
13+
14+
resource "databricks_cluster" "this" {
15+
for_each = { for cluster in var.clusters : cluster.cluster_name => cluster }
16+
17+
cluster_name = each.value.cluster_name
18+
spark_version = each.value.spark_version
19+
node_type_id = coalesce(each.value.node_type_id, local.default_node_type_ids["${var.cloud_name}_node_type_id"])
20+
autotermination_minutes = each.value.autotermination_minutes
21+
data_security_mode = each.value.data_security_mode
22+
custom_tags = var.cloud_name == "azure" && each.value.single_node_enable ? merge({ "ResourceClass" = "SingleNode" }, each.value.custom_tags) : each.value.custom_tags
23+
24+
# Azure conditional configuration for Spark Conf
25+
spark_conf = var.cloud_name == "azure" ? merge(
26+
each.value.single_node_enable == true ? local.spark_conf_single_node : {},
27+
each.value.spark_conf
28+
) : each.value.spark_conf
29+
30+
# Autoscaling block for AWS
31+
dynamic "autoscale" {
32+
for_each = var.cloud_name == "aws" || !each.value.single_node_enable ? [1] : []
33+
content {
34+
min_workers = each.value.min_workers
35+
max_workers = each.value.max_workers
36+
}
37+
}
38+
39+
# Specific attributes for AWS
40+
dynamic "aws_attributes" {
41+
for_each = var.cloud_name == "aws" ? [each.value] : []
42+
content {
43+
availability = each.value.aws_attributes.availability
44+
zone_id = each.value.aws_attributes.zone_id
45+
first_on_demand = each.value.aws_attributes.first_on_demand
46+
spot_bid_price_percent = each.value.aws_attributes.spot_bid_price_percent
47+
ebs_volume_count = each.value.aws_attributes.ebs_volume_count
48+
ebs_volume_size = each.value.aws_attributes.ebs_volume_size
49+
ebs_volume_type = each.value.aws_attributes.ebs_volume_type
50+
}
51+
}
52+
53+
# Specific attributes for Azure
54+
dynamic "azure_attributes" {
55+
for_each = var.cloud_name == "azure" ? [each.value] : []
56+
content {
57+
availability = each.value.azure_attributes.availability
58+
first_on_demand = each.value.azure_attributes.first_on_demand
59+
spot_bid_max_price = each.value.azure_attributes.spot_bid_max_price
60+
}
61+
}
62+
63+
# Specific configurations
64+
dynamic "cluster_log_conf" {
65+
for_each = var.cloud_name == "azure" && each.value.cluster_log_conf_destination != null ? [each.value.cluster_log_conf_destination] : []
66+
content {
67+
dynamic "dbfs" {
68+
for_each = var.cloud_name == "azure" ? [1] : []
69+
content {
70+
destination = cluster_log_conf.value
71+
}
72+
}
73+
74+
# TODO
75+
# dynamic "s3" {
76+
# for_each = var.cloud_name == "aws" ? [1] : []
77+
# content {
78+
# destination = "s3://acmecorp-main/cluster-logs"
79+
# region = var.region
80+
# }
81+
# }
82+
}
83+
}
84+
85+
dynamic "init_scripts" {
86+
for_each = each.value.init_scripts_workspace != null ? each.value.init_scripts_workspace : []
87+
content {
88+
workspace {
89+
destination = init_scripts.value
90+
}
91+
}
92+
}
93+
94+
dynamic "init_scripts" {
95+
for_each = each.value.init_scripts_volumes != null ? each.value.init_scripts_volumes : []
96+
content {
97+
volumes {
98+
destination = init_scripts.value
99+
}
100+
}
101+
}
102+
103+
dynamic "init_scripts" {
104+
for_each = var.cloud_name == "azure" && each.value.init_scripts_dbfs != null ? each.value.init_scripts_dbfs : []
105+
content {
106+
dbfs {
107+
destination = init_scripts.value
108+
}
109+
}
110+
}
111+
112+
dynamic "init_scripts" {
113+
for_each = var.cloud_name == "azure" && each.value.init_scripts_abfss != null ? each.value.init_scripts_abfss : []
114+
content {
115+
abfss {
116+
destination = init_scripts.value
117+
}
118+
}
119+
}
120+
121+
# Library configurations
122+
dynamic "library" {
123+
for_each = each.value.pypi_library_repository != null ? each.value.pypi_library_repository : []
124+
content {
125+
pypi {
126+
package = library.value
127+
}
128+
}
129+
}
130+
131+
dynamic "library" {
132+
for_each = each.value.maven_library_repository != null ? each.value.maven_library_repository : []
133+
content {
134+
maven {
135+
coordinates = library.value.coordinates
136+
exclusions = library.value.exclusions
137+
}
138+
}
139+
}
140+
}
141+
142+
resource "databricks_cluster_policy" "this" {
143+
for_each = { for param in var.custom_cluster_policies : (param.name) => param.definition
144+
if param.definition != null
145+
}
146+
147+
name = each.key
148+
definition = jsonencode(each.value)
149+
}
150+
151+
resource "databricks_cluster_policy" "overrides" {
152+
for_each = { for param in var.default_cluster_policies_override : (param.name) => param
153+
if param.definition != null
154+
}
155+
156+
policy_family_id = each.value.family_id
157+
policy_family_definition_overrides = jsonencode(each.value.definition)
158+
name = each.key
159+
}
160+
161+
resource "databricks_permissions" "policy" {
162+
for_each = { for param in var.custom_cluster_policies : param.name => param.can_use
163+
if param.can_use != null
164+
}
165+
166+
cluster_policy_id = databricks_cluster_policy.this[each.key].id
167+
168+
dynamic "access_control" {
169+
for_each = each.value
170+
content {
171+
group_name = access_control.value
172+
permission_level = "CAN_USE"
173+
}
174+
}
175+
}
176+
177+
resource "databricks_permissions" "clusters" {
178+
for_each = {
179+
for v in var.clusters : (v.cluster_name) => v
180+
if length(v.permissions) != 0
181+
}
182+
183+
cluster_id = databricks_cluster.this[each.key].id
184+
185+
dynamic "access_control" {
186+
for_each = each.value.permissions
187+
content {
188+
group_name = access_control.value.group_name
189+
permission_level = access_control.value.permission_level
190+
}
191+
}
192+
}

data.tf

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
data "databricks_group" "account_groups" {
2+
for_each = local.iam_account_map
3+
4+
display_name = each.key
5+
}
6+
7+
data "databricks_current_metastore" "this" {
8+
}
9+
10+
data "databricks_sql_warehouses" "all" {
11+
}

iam.tf

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
locals {
2+
iam_account_map = tomap({
3+
for group in var.iam_account_groups : group.group_name => group.entitlements
4+
if group.group_name != null
5+
})
6+
}
7+
8+
resource "databricks_group" "this" {
9+
count = var.cloud_name == "azure" && length(local.iam_account_map) == 0 ? length(toset(keys(var.iam_workspace_groups))) : 0
10+
11+
display_name = keys(var.iam_workspace_groups)[count.index]
12+
13+
lifecycle {
14+
ignore_changes = [external_id, allow_cluster_create, allow_instance_pool_create, databricks_sql_access, workspace_access]
15+
}
16+
}
17+
18+
resource "databricks_entitlements" "this" {
19+
for_each = local.iam_account_map
20+
21+
group_id = data.databricks_group.account_groups[each.key].id
22+
allow_cluster_create = contains(coalesce(each.value, ["none"]), "allow_cluster_create")
23+
allow_instance_pool_create = contains(coalesce(each.value, ["none"]), "allow_instance_pool_create")
24+
databricks_sql_access = contains(coalesce(each.value, ["none"]), "databricks_sql_access")
25+
workspace_access = true
26+
}

main.tf

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
resource "databricks_workspace_conf" "this" {
2+
custom_config = var.custom_config
3+
}
4+
5+
resource "databricks_ip_access_list" "allowed_list" {
6+
label = "allow_in"
7+
list_type = "ALLOW"
8+
ip_addresses = flatten([for v in values(var.ip_addresses) : v])
9+
10+
depends_on = [databricks_workspace_conf.this]
11+
}
12+
13+
resource "databricks_token" "pat" {
14+
count = var.workspace_admin_token_enabled ? 1 : 0
15+
comment = "Terraform Provisioning"
16+
lifetime_seconds = var.pat_token_lifetime_seconds
17+
}
18+
19+
resource "databricks_system_schema" "this" {
20+
for_each = var.system_schemas_enabled ? var.system_schemas : toset([])
21+
22+
schema = each.value
23+
}

mount.tf

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
resource "databricks_mount" "adls" {
2+
for_each = var.mount_enabled && var.cloud_name == "azure" ? var.mountpoints : {}
3+
4+
name = each.key
5+
cluster_id = var.mount_cluster_name != null ? databricks_cluster.this[var.mount_cluster_name].id : null
6+
uri = "abfss://${each.value["container_name"]}@${each.value["storage_account_name"]}.dfs.core.windows.net"
7+
extra_configs = {
8+
"fs.azure.account.auth.type" : "OAuth",
9+
"fs.azure.account.oauth.provider.type" : "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
10+
"fs.azure.account.oauth2.client.id" : var.mount_service_principal_client_id,
11+
"fs.azure.account.oauth2.client.secret" : databricks_secret.main["mount-sp-secret"].config_reference,
12+
"fs.azure.account.oauth2.client.endpoint" : "https://#.microsoftonline.com/${var.mount_service_principal_tenant_id}/oauth2/token",
13+
"fs.azure.createRemoteFileSystemDuringInitialization" : "false",
14+
}
15+
}

outputs.tf

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
output "sql_endpoint_jdbc_url" {
2+
value = [for n in databricks_sql_endpoint.this : n.jdbc_url]
3+
description = "JDBC connection string of SQL Endpoint"
4+
}
5+
6+
output "sql_endpoint_data_source_id" {
7+
value = [for n in databricks_sql_endpoint.this : n.data_source_id]
8+
description = "ID of the data source for this endpoint"
9+
}
10+
11+
output "token" {
12+
value = length(databricks_token.pat) > 0 ? databricks_token.pat[0].token_value : null
13+
description = "Databricks Personal Authorization Token"
14+
sensitive = true
15+
}
16+
17+
output "clusters" {
18+
value = [for param in var.clusters : {
19+
name = param.cluster_name
20+
id = databricks_cluster.this[param.cluster_name].id
21+
} if length(var.clusters) != 0]
22+
description = "Provides name and unique identifier for the clusters"
23+
}
24+
25+
output "sql_warehouses_list" {
26+
value = data.databricks_sql_warehouses.all.ids
27+
description = "List of IDs of all SQL warehouses in the Databricks workspace."
28+
}
29+
30+
output "metastore_id" {
31+
value = data.databricks_current_metastore.this.id
32+
description = "The ID of the current metastore in the Databricks workspace."
33+
}

secrets.tf

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
locals {
2+
mount_sp_secrets = var.cloud_name == "azure" ? {
3+
mount-sp-client-id = { value = var.mount_service_principal_client_id }
4+
mount-sp-secret = { value = var.mount_service_principal_secret }
5+
} : {}
6+
7+
secrets_acl_objects_list = flatten([for param in var.secret_scope : [
8+
for permission in param.acl : {
9+
scope = param.scope_name, principal = permission.principal, permission = permission.permission
10+
}] if param.scope_acl != null
11+
])
12+
13+
secret_scope_config = { for object in var.secret_scope : object.scope_name => object }
14+
15+
secret_scope_config_secrets = { for object in flatten([for k, v in local.secret_scope_config : [for secret in v.secrets : {
16+
scope_name = k,
17+
secret_key = secret.key,
18+
secret_value = secret.string_value,
19+
}]]) : "${object.scope_name}:${object.secret_key}" => object }
20+
}
21+
22+
# Secret Scope with SP secrets for mounting Azure Data Lake Storage
23+
resource "databricks_secret_scope" "main" {
24+
count = var.cloud_name == "azure" && var.mount_enabled ? 1 : 0
25+
26+
name = "main"
27+
initial_manage_principal = null
28+
}
29+
30+
resource "databricks_secret" "main" {
31+
for_each = var.cloud_name == "azure" && var.mount_enabled ? local.mount_sp_secrets : {}
32+
33+
key = each.key
34+
string_value = each.value["value"]
35+
scope = databricks_secret_scope.main[0].id
36+
37+
lifecycle {
38+
precondition {
39+
condition = var.cloud_name == "azure" && var.mount_enabled ? length(compact([var.mount_service_principal_client_id, var.mount_service_principal_secret, var.mount_service_principal_tenant_id])) == 3 : true
40+
error_message = "To mount ADLS Storage, please provide prerequisite Service Principal values - 'mount_service_principal_object_id', 'mount_service_principal_secret', 'mount_service_principal_tenant_id'."
41+
}
42+
}
43+
}
44+
45+
# Custom additional Databricks Secret Scope
46+
resource "databricks_secret_scope" "this" {
47+
for_each = {
48+
for param in var.secret_scope : (param.scope_name) => param
49+
if param.scope_name != null
50+
}
51+
52+
name = each.key
53+
54+
# Key Vault metadata block only for Azure
55+
dynamic "keyvault_metadata" {
56+
for_each = var.cloud_name == "azure" ? [for kv in var.key_vault_secret_scope : kv] : []
57+
content {
58+
resource_id = keyvault_metadata.value.key_vault_id
59+
dns_name = keyvault_metadata.value.dns_name
60+
}
61+
}
62+
63+
# This property is only relevant for Azure
64+
initial_manage_principal = var.cloud_name == "azure" ? null : null
65+
}
66+
67+
resource "databricks_secret" "this" {
68+
for_each = local.secret_scope_config_secrets
69+
70+
key = each.value.secret_key
71+
string_value = each.value.secret_value
72+
scope = databricks_secret_scope.this[each.value.scope_name].id
73+
}
74+
75+
resource "databricks_secret_acl" "this" {
76+
for_each = var.cloud_name == "azure" && length(local.secrets_acl_objects_list) > 0 ? {
77+
for_each = { for entry in local.secrets_acl_objects_list : "${entry.scope}.${entry.principal}.${entry.permission}" => entry }
78+
} : {}
79+
80+
scope = databricks_secret_scope.this[each.value.scope].name
81+
principal = length(var.iam_account_groups) != 0 ? data.databricks_group.account_groups[each.value.principal].display_name : databricks_group.this[each.value.principal].display_name
82+
permission = each.value.permission
83+
}

0 commit comments

Comments
 (0)