Thank you for your answer.
Both pieces of code uses the same SPN.
This version that uses pyapacheatlas works correctly. This version get all data assets based on a criteria search.
from azure.identity import AzureCliCredential
from pyapacheatlas.core import PurviewClient
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient
import json
cred = AzureCliCredential()
# Create a client to connect to your service.
client = PurviewClient(
account_name = "<purview account name>",
authentication = cred
)
auth = ServicePrincipalAuthentication(
tenant_id = "<tenant id>",
client_id = "<client id>",
client_secret = "<client secret>"
)
# Create a client to connect to your service.
client = PurviewClient(
account_name = "<purview account name>",
authentication = auth
)
search = client.discovery.search_entities('name:*')
for entity in search:
print(json.dumps(entity, indent=1))
The version that uses the REST API does not work. This version gets Purview insights about the top files found.
import os
import requests
import json
import jmespath
import pandas as pd
from pprint import pprint
def azuread_auth(tenant_id: str, client_id: str, client_secret: str, resource_url: str):
"""
Authenticates Service Principal to the provided Resource URL, and returns the OAuth Access Token
"""
url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
payload= f'grant_type=client_credentials&client_id={client_id}&client_secret={client_secret}&resource={resource_url}'
headers = {
'Content-Type': 'application/x-www-form-urlencoded'
}
response = requests.request("POST", url, headers=headers, data=payload)
access_token = json.loads(response.text)['access_token']
return access_token
# ==========
# Service Principal with "Purview Data Source Administrator" permissions on Purview
tenant_id = "<tenant id>"
client_id = "<client id>"
client_secret = "<client secret>"
resource_url = "https://purview.azure.net"
data_catalog_name = "<purview account name"
# Retrieve authentication objects
azuread_access_token = azuread_auth(tenant_id, client_id, client_secret, resource_url)
# ==========
url = f"https://{data_catalog_name}.guardian.purview.azure.com/reports/fileExtensions"
headers = {
'Authorization': f'Bearer {azuread_access_token}',
'Content-Type': 'application/json'
}
payload="""{
"Query":{
"StartTime":"2020-01-01T00:00:00.000Z",
"EndTime":"2022-12-31T23:59:00.000Z",
"takeTopCount":30,
"assetTypes":[
]
}
}
"""
response = json.loads((requests.request("POST", url, headers=headers, data=payload)).text)
data = jmespath.search("fileExtensionDetails[].[fileExtension, assets, subscriptions, count]", response)
df = pd.DataFrame(data, columns=['fileExtension', 'assets', 'subscriptions', 'count'],dtype=float)
return df