DataOperationsCatalog.FilterRowsByMissingValues(IDataView, String[]) DataOperationsCatalog.FilterRowsByMissingValues(IDataView, String[]) DataOperationsCatalog.FilterRowsByMissingValues(IDataView, String[]) Method

Definition

Drop rows where any column in columns contains a missing value.

public Microsoft.ML.IDataView FilterRowsByMissingValues (Microsoft.ML.IDataView input, params string[] columns);
member this.FilterRowsByMissingValues : Microsoft.ML.IDataView * string[] -> Microsoft.ML.IDataView
Public Function FilterRowsByMissingValues (input As IDataView, ParamArray columns As String()) As IDataView

Parameters

input
IDataView IDataView IDataView

The input data.

columns
String[]

Name of the columns to filter on. If a row is has a missing value in any of these columns, it will be dropped from the dataset.

Returns

Examples

using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.SamplesUtils;

namespace Samples.Dynamic
{
    public class FilterRowsByMissingValues
    {
        /// <summary>
        /// Sample class showing how to use FilterRowsByMissingValues.
        /// </summary>
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Create a small dataset as an IEnumerable.
            var samples = new List<DataPoint>()
            {
                new DataPoint(){ Feature1 = 21, Feature2 = new [] { 1, 2, float.NaN} },
                new DataPoint(){ Feature1 = 40, Feature2 = new [] { 1f, 2f, 3f}  },
                new DataPoint(){ Feature1 = float.NaN, Feature2 = new [] { 1, 2, float.NaN}  }
            };

            // Convert training data to IDataView.
            var data = mlContext.Data.LoadFromEnumerable(samples);

            // Filter out any row with an NaN values in either column
            var filteredData = mlContext.Data.FilterRowsByMissingValues(data, new[] { "Feature1", "Feature2" });

            // Take a look at the resulting dataset and note that rows with NaNs are filtered out.
            // Only the second data point is left
            var enumerable = mlContext.Data.CreateEnumerable<DataPoint>(filteredData, reuseRowObject: true);
            Console.WriteLine($"Feature1   Feature2");

            foreach (var row in enumerable)
            {
                Console.WriteLine($"{row.Feature1}\t({string.Join(", ", row.Feature2)})");
            }

            // Feature1     Feature2
            // 
            //   40         (1, 2, 3)

        }

        private class DataPoint
        {
            public float Feature1 { get; set; }

            public float[] Feature2 { get; set; }
        }
    }
}

Applies to