I am using query acceleration to access and filter data lake- https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-query-acceleration-how-to?tabs=dotnet%2Cazure-cli
I followed the same steps as per the document to the letter.
but getting exception when i run the query. I am using .net core application.
Error:
{"XML specified is not syntactically valid.\nRequestId:e3204a59-901e-005f-7612-d2ee5f000000\nTime:2021-11-05T06:58:32.8652708Z\r\nStatus: 400 (XML specified is not syntactically valid.)\r\nErrorCode: InvalidXmlDocument\r\n\r\nContent:\r\n<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Error><Code>InvalidXmlDocument</Code><Message>XML specified is not syntactically valid.\nRequestId:e3204a59-901e-005f-7612-d2ee5f000000\nTime:2021-11-05T06:58:32.8652708Z</Message></Error>\r\n\r\nHeaders:\r\nServer: Windows-Azure-Blob/1.0,Microsoft-HTTPAPI/2.0\r\nx-ms-error-code: InvalidXmlDocument\r\nx-ms-request-id: e3204a59-901e-005f-7612-d2ee5f000000\r\nx-ms-version: 2020-10-02\r\nx-ms-client-request-id: 159c9d5a-2624-4be4-b0ae-f6fbfb3cda5b\r\nDate: Fri, 05 Nov 2021 06:58:32 GMT\r\nContent-Length: 229\r\nContent-Type: application/xml\r\n"}
Code:
static async Task QueryHemingway(BlockBlobClient blob)
{
string query = @"SELECT * FROM BlobStorage WHERE _3 = 'Hemingway, Ernest, 1899-1961'";
await DumpQueryCsv(blob, query, false);
}
private static async Task DumpQueryCsv(BlockBlobClient blob, string query, bool headers)
{
try
{
var options = new BlobQueryOptions() {
InputTextConfiguration = new BlobQueryCsvTextOptions() { HasHeaders = headers },
OutputTextConfiguration = new BlobQueryCsvTextOptions() { HasHeaders = true },
ProgressHandler = new Progress<long>((finishedBytes) => Console.Error.WriteLine($"Data read: {finishedBytes}"))
};
options.ErrorHandler += (BlobQueryError err) => {
Console.ForegroundColor = ConsoleColor.Red;
Console.Error.WriteLine($"Error: {err.Position}:{err.Name}:{err.Description}");
Console.ResetColor();
};
// BlobDownloadInfo exposes a Stream that will make results available when received rather than blocking for the entire response.
using (var reader = new StreamReader((await blob.QueryAsync(
query,
options)).Value.Content))
{
using (var parser = new CsvReader(reader, new CsvConfiguration(CultureInfo.CurrentCulture, hasHeaderRecord: true) { HasHeaderRecord = true }))
{
while (await parser.ReadAsync())
{
Console.Out.WriteLine(String.Join(" ", parser.Parser.Record));
}
}
}
}
catch (Exception ex)
{
Console.Error.WriteLine("Exception: " + ex.ToString());
}
}