Data Classification

Download ODBC driver

Overview

For managing sensitive data, SQL Server and Azure SQL Server introduced the ability to provide database columns with sensitivity metadata that allows the client application to handle different types of sensitive data (such as health, financial, etc.) in accordance with data protection policies.

For more information on how to assign classification to columns, see SQL Data Discovery and Classification.

Microsoft ODBC Driver 17.2 or later allows the retrieval of this metadata via SQLGetDescField using the SQL_CA_SS_DATA_CLASSIFICATION field identifier.

Format

SQLGetDescField has the following syntax:

SQLRETURN SQLGetDescField(  
     SQLHDESC        DescriptorHandle,  
     SQLSMALLINT     RecNumber,  
     SQLSMALLINT     FieldIdentifier,  
     SQLPOINTER      ValuePtr,  
     SQLINTEGER      BufferLength,  
     SQLINTEGER *    StringLengthPtr);  

DescriptorHandle
[Input] IRD (Implementation Row Descriptor) handle. Can be retrieved by a call to SQLGetStmtAttr with SQL_ATTR_IMP_ROW_DESC statement attribute

RecNumber
[Input] 0

FieldIdentifier
[Input] SQL_CA_SS_DATA_CLASSIFICATION

ValuePtr
[Output] Output buffer

BufferLength
[Input] Length of output buffer in bytes

StringLengthPtr [Output] Pointer to the buffer in which to return the total number of bytes available to return in ValuePtr.

Note

If the size of the buffer is unknown, it can be determined by calling SQLGetDescField with ValuePtr as NULL and examining the value of StringLengthPtr.

If Data Classification information isn't available, an Invalid Descriptor Field error will be returned.

Upon a successful call to SQLGetDescField, the buffer pointed to by ValuePtr will contain the following data:

nn nn [n sensitivitylabels] tt tt [t informationtypes] cc cc [c columnsensitivitys]

Note

nn nn, tt tt, and cc cc are multibyte integers, which are stored with the least significant byte at the lowest address.

sensitivitylabel and informationtype are both of the form

nn [n bytes name] ii [i bytes id]

columnsensitivity is of the form

nn nn [n sensitivityprops]

For each column (c), n 4-byte sensitivityprops are present:

ss ss tt tt

s - index into the sensitivitylabels array, FF FF if not labeled

t - index into the informationtypes array, FF FF if not labeled



The format of the data can be expressed as the following pseudo-structures:

struct IDnamePair {
 BYTE nameLen;
 USHORT name[nameLen];
 BYTE idLen;
 USHORT id[idLen];
};

struct SensitivityProp {
 USHORT labelIdx;
 USHORT infoTypeIdx;
};

USHORT nLabels;
struct IDnamePair labels[nLabels];
USHORT nInfoTypes;
struct IDnamePair infotypes[nInfoTypes];
USHORT nColumns;
struct {
 USHORT nProps;
 struct SensitivityProp[nProps];
} columnClassification[nColumns];

Code sample

Test application that demonstrates how to read Data Classification metadata. On Windows it can be compiled using cl /MD dataclassification.c /I (directory of msodbcsql.h) /link odbc32.lib and run with a connection string, and a SQL query (that returns classified columns) as parameters:

#ifdef _WIN32
#include <windows.h>
#endif
#include <sql.h>
#include <sqlext.h>
#include <msodbcsql.h>
#include <stdio.h>
SQLHANDLE env, dbc, stmt;
void checkRC_exit(SQLRETURN rc, SQLHANDLE hand, SQLSMALLINT htype, int retcode, char *action)
{
    if ((rc == SQL_ERROR || rc == SQL_SUCCESS_WITH_INFO) && hand)
    {
        char msg[1024], state[6];
        int i = 0;
        SQLRETURN rc2;
        SQLINTEGER err;
        SQLSMALLINT lenout;
        while ((rc2 = SQLGetDiagRec(htype, hand, ++i, state, &err, msg, sizeof(msg), &lenout)) == SQL_SUCCESS ||
            rc2 == SQL_SUCCESS_WITH_INFO)
            printf("%d (%d)[%s]%s\n", i, err, state, msg);
    }
    if (rc == SQL_ERROR && retcode)
    {
        printf("Error occurred%s%s\n", action ? " upon " : "", action ? action : "");
        exit(retcode);
    }
}
void printLabelInfo(char *type, char **pptr)
{
    char *ptr = *pptr;
    unsigned short nlabels;
    printf("----- %s(%u) -----\n", type, nlabels = *(unsigned short*)ptr);
    ptr += sizeof(unsigned short);
    while (nlabels--)
    {
        int namelen, idlen;
        char *nameptr, *idptr;
        namelen = *ptr++;
        nameptr = ptr;
        ptr += namelen * 2;
        idlen = *ptr++;
        idptr = ptr;
        ptr += idlen * 2;
        wprintf(L"Name: \"%.*s\" Id: \"%.*s\"\n", namelen, nameptr, idlen, idptr);
    }
    *pptr = ptr;
}
int main(int argc, char **argv)
{
    unsigned char *dcbuf;
    unsigned int dclen = 0;
    SQLRETURN rc;
    SQLHANDLE ird;
    if (argc < 3)
    {
        fprintf(stderr, "usage: dataclassification connstr query\n");
        return 1;
    }
    checkRC_exit(SQLAllocHandle(SQL_HANDLE_ENV, 0, &env), 0, 0,
        2, "allocate environment");
    checkRC_exit(SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, (SQLPOINTER)SQL_OV_ODBC3, 0), env, SQL_HANDLE_ENV,
        3, "set ODBC version");
    checkRC_exit(SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc), env, SQL_HANDLE_ENV,
        4, "allocate connection");
    checkRC_exit(SQLDriverConnect(dbc, 0, argv[1], SQL_NTS, 0, 0, 0, SQL_DRIVER_NOPROMPT), dbc, SQL_HANDLE_DBC,
        5, "connect to server");
    checkRC_exit(SQLAllocHandle(SQL_HANDLE_STMT, dbc, &stmt), dbc, SQL_HANDLE_DBC,
        6, "allocate statement");
    checkRC_exit(SQLExecDirect(stmt, argv[2], SQL_NTS), stmt, SQL_HANDLE_STMT,
        7, "execute query");
    checkRC_exit(SQLGetStmtAttr(stmt, SQL_ATTR_IMP_ROW_DESC, (SQLPOINTER)&ird, SQL_IS_POINTER, 0), stmt, SQL_HANDLE_STMT,
        8, "get IRD handle");
    rc = SQLGetDescFieldW(ird, 0, SQL_CA_SS_DATA_CLASSIFICATION, dcbuf, 0, &dclen);

    checkRC_exit(rc, ird, SQL_HANDLE_DESC, 0, 0);
  

    SQLINTEGER dclenout;
    unsigned char *dcptr;
    unsigned short ncols;
    printf("Data Classification information (%u bytes):\n", dclen);
    if (!(dcbuf = malloc(dclen)))
    {
        printf("Memory Allocation Error");
        return 9;
    }
    checkRC_exit(SQLGetDescFieldW(ird, 0, SQL_CA_SS_DATA_CLASSIFICATION, dcbuf, dclen, &dclenout),
            ird, SQL_HANDLE_DESC, 10, "reading SQL_CA_SS_DATA_CLASSIFICATION");
    dcptr = dcbuf;
    printLabelInfo("Labels", &dcptr);
    printLabelInfo("Information Types", &dcptr);
    printf("----- Column Sensitivities(%u) -----\n", ncols = *(unsigned short*)dcptr);
    dcptr += sizeof(unsigned short);
    while (ncols--)
    {
        unsigned short nprops = *(unsigned short*)dcptr;
        dcptr += sizeof(unsigned short);
        while (nprops--)
        {
            unsigned short labelidx, typeidx;
            labelidx = *(unsigned short*)dcptr; dcptr += sizeof(unsigned short);
            typeidx = *(unsigned short*)dcptr; dcptr += sizeof(unsigned short);
            printf(labelidx == 0xFFFF ? "(none) " : "%u ", labelidx);
            printf(typeidx == 0xFFFF ? "(none)\n" : "%u\n", typeidx);
        }
        printf("-----\n");
    }
    if (dcptr != dcbuf + dclen)
    {
        printf("Error: unexpected parse of DATACLASSIFICATION data\n");
        return 11;
    }
    free(dcbuf);
    
    return 0;
}

Supported Version

Microsoft ODBC Driver 17.2 allows the retrieval of Data Classification information via SQLGetDescField if FieldIdentifier is set to SQL_CA_SS_DATA_CLASSIFICATION (1237).

Starting from Microsoft ODBC Driver 17.4.1.1, it's possible to retrieve the version of Data Classification supported by a server via SQLGetDescField using the SQL_CA_SS_DATA_CLASSIFICATION_VERSION (1238) field identifier. In 17.4.1.1, the supported data classification version is set to "2".

Starting from 17.4.2.1, the default version of data classification is set to "1" and is the version the driver reports to SQL Server as supported. A new connection attribute SQL_COPT_SS_DATACLASSIFICATION_VERSION (1400) can allow application to change the supported version of Data Classification from "1" up to the maximum supported.

Example:

To set the version, this call should be made right before the SQLConnect or SQLDriverConnect call:

ret = SQLSetConnectAttr(dbc, SQL_COPT_SS_DATACLASSIFICATION_VERSION, (SQLPOINTER)2, SQL_IS_INTEGER);

The value of the currently supported version of Data Classification can be retrieved via SQLGetConnectAttr call:

ret = SQLGetConnectAttr(dbc, SQL_COPT_SS_DATACLASSIFICATION_VERSION, (SQLPOINTER)&dataClassVersion, SQL_IS_INTEGER, 0);