Problem in configuing databrick provider in terraform. (Azure Databricks)

429 Views Asked by At

I have 2 terraform pipelines in ADO.Terraform Version iss - v1.5.3.

First Pipeline - created Azure DataBrick resource ,Azure DataLake and Azure Connector for DataBricks using terraform in first ADO pipeline.It also assign role of "Storage Blob Data Contributor" to the "Azure Connector for DataBricks resource" in "Azure DataLake resource" (Terraform is storing state file in Storage container defined in main.tf).

Second Pipeline - Second ADO Pipeline is suppose to login into Data Brick and Create MetaStore using resources created by pipeline first. As State file of pipeline first is stored in Container so i can read and use all those resource in pipeline second using remote_state.tf. My Problem is with Second Pipeline , I am not able to configure provider "databricks" {} properly.

This is terraform code for first pipeline.

### create Resource Group
module "resource_group" {
  source = "./modules/Module-ResourceGroup-v1"

  resource_group_name = var.resource_group_name
  location            = var.location
}

# create Azure Data Bricks Service
module "adb" {
  
  source = "./modules/Module-DataBrickWorkSpace-v1"
  
  databrick_workspace_name    = var.databrick_workspace_name
  resource_group_name         = module.resource_group.resource_group_name
  resource_group_location     = module.resource_group.resource_group_location
  managed_resource_group_name = var.managed_resource_group_name
}

# create Azure Data Lake
module "adc-uc-storage" {
  source                      = "./modules/Module-StorageForUnityCatalog-v1"

  uc_storage_account_name     = var.uc_storage_account_name
  resource_group_name         = module.resource_group.resource_group_name
  resource_group_location     = module.resource_group.resource_group_location
  uc_container_name           = var.uc_container_name
}

# creating access connector
module "adb-access-connector" {
  source                      = "./modules/Module-DataBrickAccessConnector-v1"

  access_connector_name       = var.access_connector_name
  location                    = module.resource_group.resource_group_location 
  resource_group_name         = module.resource_group.resource_group_name
  storage_account_id          = module.adc-uc-storage.storage_account_id
}

output "databricks_workspace_resource_id" {
  description = " Resource ID of databrick workspace"
  value = module.adb.databricks_workspace_resource_id
}

output "databrick_workspace_url" {
  description = "URL of databrick workspace"
  value = module.adb.databricks_workspace_url
}

output "databrick_host" {
  description = "The host part of URL of the Databricks workspace"
  value = module.adb.databricks_host
}

output "databricks_workspace_id" {
  description = "The ID of the Databricks workspace"
  value = module.adb.databricks_workspace_id
}
######################################################
output "resource_group_name" {
  description = "name of the adb resource group"
  value = module.resource_group.resource_group_name
}

output "resource_group_id" {
  description = "id of resource group"
  value = module.resource_group.resource_group_id
}

output "resource_group_location" {
 description = "location of resource group"
 value = module.resource_group.resource_group_location
}
###################################################
output "storage_account_id" {
  description = "The ID of the storage account"
  value       = module.adc-uc-storage.storage_account_id
}

output "storage_account_name" {
  description = "The name of the storage account"
  value       = module.adc-uc-storage.storage_account_name
}

output "storage_container_name" {
  description = "The name of the storage container"
  value       = module.adc-uc-storage.storage_container_name
}
#
output "storage_account_principal_id" {
  description = "The principal ID for the storage account"
  value       = module.adc-uc-storage.storage_account_principal_id
}
#
#############################################################
#
output "access_connector_unity_catalog_principal_id" {
  description = "The identity of the Databricks Access Connector"
  value       = module.adb-access-connector.access_connector_unity_catalog_principal_id
}
#
output "access_connector_unity_catalog_resource_id" {
  description = "The ID of the Databricks Access Connector"
  value       = module.adb-access-connector.access_connector_unity_catalog_resource_id
}
#
output "access_connector_name" {
  description = "The name of the Databricks Access Connector"
  value       = module.adb-access-connector.access_connector_name
}
#
output "access_connector_resource_group_name" {
  description = "The name of the resource group where the Databricks Access Connector is created"
  value       = module.adb-access-connector.access_connector_resource_group_name
}
#
output "access_connector_location" {
  description = "The location of the Databricks Access Connector"
  value       = module.adb-access-connector.access_connector_location

This is terraform code of second pipeline

#remote_state.tf
data "terraform_remote_state" "dev" {
  backend = "azurerm"
  config = {
    resource_group_name  = "pipeline1-storageblob-resourcegroup"
    storage_account_name = "pipeline1storageblob"
    access_key           = "SECRET"
    # this is primary access key of storage account

    container_name = "pipeline1-storageblob-container"
    key            = "pipeline1-state-file"
  }
}

#main.tf 
terraform {
  required_providers {
    azurerm = {
      source = "hashicorp/azurerm"
      version = "~>3.0"
    }
    databricks = {
      source = "databricks/databricks"
       version = "1.21.0"
    }
  }
}

provider "azurerm" {
  features {}
  subscription_id   = "XXXXXXXXXXXXXXXXXXXXXXX"
  tenant_id         = "YYYYYYYYYYYYYYYYYYYYYYY"
  client_id         = "CCCCCCCCCCCCCCCCCCCCCCC"
  client_secret     = "SSSSSSSSSSSSSSSSSSSSSSS"
}

provider "databricks" {
  azure_workspace_resource_id = data.terraform_remote_state.dev.outputs.databricks_workspace_resource_id # this is able to see from output of pipeline 1
  azure_client_id             = "CCCCCCCCCCCCCCCCCCCCCCC"
  azure_client_secret         = "SSSSSSSSSSSSSSSSSSSSSSS"
  azure_tenant_id             = "YYYYYYYYYYYYYYYYYYYYYYY"
}
#resource.tf
resource "databricks_metastore" "metastore" {
  name          = "metastore-allenv"
  storage_root  = format(
                          "abfss://%s@%s.dfs.core.windows.net/",
                          data.terraform_remote_state.dev.outputs.storage_container_name,
                          data.terraform_remote_state.dev.outputs.storage_account_name
                         )
  force_destroy = true
}

**This is Error i am getting **

│ Error: cannot create metastore: Only account admin can create metastores.
│
│   with databricks_metastore.metastore,
│   on resources.tf line 1, in resource "databricks_metastore" "metastore":
│    1: resource "databricks_metastore" "metastore" {
│╵

Validation:

My Service principal is a part of Global Administrator - in Active Directory.
Also Service Principal has following roles assigned at subscription level.
1. owner
2. Storage Blob Data Owner 
3. User Access Administrator 

Links:

https://learn.microsoft.com/en-us/azure/databricks/data-governance/unity-catalog/automate
https://registry.terraform.io/providers/databricks/databricks/latest/docs/guides/unity-catalog-azure

Creation of Meta Store.

1

There are 1 best solutions below

0
Venkat V On
Error: cannot create metastore: Only account admin can create metastores.
│   with databricks_metastore.metastore,
│   on resources.tf line 1, in resource "databricks_metastore" "metastore":
│    1: resource "databricks_metastore" "metastore" {
│╵

You must be an account admin Role To create a megastore.

Follow the this MS Doc How to enable your first account admin

To resolve the issue , assign the account Admin role to service prinicipal by following steps.

  1. Login to accounts.azuredatabricks.net with Global Administrator role
  2. Go to user Management - >Service principals > Add Service principal (Provide your SP name and ID)
  3. Navigate to Roles and assign Account Admin role.

enter image description here

Terraform Code to create megastore.

terraform {
  required_providers {
    databricks = {
      source = "databricks/databricks"
    }
  }
}

provider "azurerm" {
  features {}
  subscription_id   = "xxxx-7af2-43f1-bd66-12e77ac05818"
  tenant_id         = "xxxxx-b0e9-cxxxx-a944-627017451367"
  client_id         = "b2ba5bf1djdjdjdjdd-8aae-ed3a4b006e70"
  client_secret     = "gchdhdhdbjdjdjdjdnnj"
}

data "azurerm_resource_group" "this" {
  name = "Sri"
}

resource "azurerm_storage_account" "unity_catalog" {
  name                     = "storagedatabrickss"
  resource_group_name      = data.azurerm_resource_group.this.name
  location                 = data.azurerm_resource_group.this.location
  tags                     = data.azurerm_resource_group.this.tags
  account_tier             = "Standard"
  account_replication_type = "GRS"
  is_hns_enabled           = true
}

resource "azurerm_storage_container" "unity_catalog" {
  name                  = "sample-container"
  storage_account_name  = azurerm_storage_account.unity_catalog.name
  container_access_type = "private"
}

resource "databricks_metastore" "metastore" {
  name          = "samplestore"
  storage_root  = format("abfss://%s@%s.dfs.core.windows.net/",
    azurerm_storage_container.unity_catalog.name,
    azurerm_storage_account.unity_catalog.name)
  force_destroy = true
}

Refer Creating a meta store for Azure Databricks Unity Catalog through terraform fails - Stack Link by Kombajn zbożowy,