FeatureSelectionCatalog.SelectFeaturesBasedOnCount Method

Definition

Overloads

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], Int64)

Create a CountFeatureSelectingEstimator, which selects the slots for which the count of non-default values is greater than or equal to a threshold.

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, String, String, Int64)

Create a CountFeatureSelectingEstimator, which selects the slots for which the count of non-default values is greater than or equal to a threshold.

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], Int64)

Create a CountFeatureSelectingEstimator, which selects the slots for which the count of non-default values is greater than or equal to a threshold.

public static Microsoft.ML.Transforms.CountFeatureSelectingEstimator SelectFeaturesBasedOnCount (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, Microsoft.ML.InputOutputColumnPair[] columns, long count = 1);
static member SelectFeaturesBasedOnCount : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * Microsoft.ML.InputOutputColumnPair[] * int64 -> Microsoft.ML.Transforms.CountFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnCount (catalog As TransformsCatalog.FeatureSelectionTransforms, columns As InputOutputColumnPair(), Optional count As Long = 1) As CountFeatureSelectingEstimator

Parameters

catalog
TransformsCatalog.FeatureSelectionTransforms

The transform's catalog.

columns
InputOutputColumnPair[]

Specifies the names of the columns on which to apply the transformation. This estimator operates over vector or scalar of numeric, text or keys data types. The output columns' data types will be the same as the input columns' data types.

count
Int64

If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.

Returns

Examples

using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic
{
    public static class SelectFeaturesBasedOnCountMultiColumn
    {
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for
            // exception tracking and logging, as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and convert it to an IDataView.
            var rawData = GetData();

            // Printing the columns of the input data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in rawData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.
                    NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,NaN,6                   A,WA,Male
            // 4,5,6                     A,,Female
            // 4,5,6                     A,NY,
            // 4,NaN,NaN                 A,,Male

            var data = mlContext.Data.LoadFromEnumerable(rawData);

            // We will use the SelectFeaturesBasedOnCount transform estimator, to
            // retain only those slots which have at least 'count' non-default
            // values per slot.

            // Multi column example. This pipeline transform two columns using the
            // provided parameters.
            var pipeline = mlContext.Transforms.FeatureSelection
                .SelectFeaturesBasedOnCount(new InputOutputColumnPair[] { new
                InputOutputColumnPair("NumericVector"), new InputOutputColumnPair(
                "StringVector") }, count: 3);

            var transformedData = pipeline.Fit(data).Transform(data);

            var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
                transformedData, true);

            // Printing the columns of the transformed data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in convertedData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
                    .NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,6                       A,Male
            // 4,6                       A,Female
            // 4,6                       A,
            // 4,NaN                     A,Male
        }

        private class TransformedData
        {
            public float[] NumericVector { get; set; }

            public string[] StringVector { get; set; }
        }

        public class InputData
        {
            [VectorType(3)]
            public float[] NumericVector { get; set; }

            [VectorType(3)]
            public string[] StringVector { get; set; }
        }

        /// <summary>
        /// Returns a few rows of data.
        /// </summary>
        public static IEnumerable<InputData> GetData()
        {
            var data = new List<InputData>
            {
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, 6 },
                    StringVector = new string[] { "A", "WA", "Male"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "", "Female"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "NY", null}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, float.NaN },
                    StringVector = new string[] { "A", null, "Male"}
                }
            };
            return data;
        }
    }
}

Applies to

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, String, String, Int64)

Create a CountFeatureSelectingEstimator, which selects the slots for which the count of non-default values is greater than or equal to a threshold.

public static Microsoft.ML.Transforms.CountFeatureSelectingEstimator SelectFeaturesBasedOnCount (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, string outputColumnName, string inputColumnName = default, long count = 1);
static member SelectFeaturesBasedOnCount : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * string * string * int64 -> Microsoft.ML.Transforms.CountFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnCount (catalog As TransformsCatalog.FeatureSelectionTransforms, outputColumnName As String, Optional inputColumnName As String = Nothing, Optional count As Long = 1) As CountFeatureSelectingEstimator

Parameters

catalog
TransformsCatalog.FeatureSelectionTransforms

The transform's catalog.

outputColumnName
String

Name of the column resulting from the transformation of inputColumnName. This column's data type will be the same as the input column's data type.

inputColumnName
String

Name of column to transform. If set to null, the value of the outputColumnName will be used as source. This estimator operates over vector or scalar of numeric, text or keys data types.

count
Int64

If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.

Returns

Examples

using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic
{
    public static class SelectFeaturesBasedOnCount
    {
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for
            // exception tracking and logging, as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and convert it to an IDataView.
            var rawData = GetData();

            // Printing the columns of the input data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in rawData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
                    .NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,NaN,6                   A,WA,Male
            // 4,5,6                     A,,Female
            // 4,5,6                     A,NY,
            // 4,0,NaN                   A,,Male

            var data = mlContext.Data.LoadFromEnumerable(rawData);

            // We will use the SelectFeaturesBasedOnCount to retain only those slots
            // which have at least 'count' non-default and non-missing values per
            // slot.
            var pipeline =
                mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(
                    outputColumnName: "NumericVector", count: 3) // Usage on numeric 
                                                                 // column.
                .Append(mlContext.Transforms.FeatureSelection
                .SelectFeaturesBasedOnCount(outputColumnName: "StringVector",
                count: 3)); // Usage on text column.

            var transformedData = pipeline.Fit(data).Transform(data);

            var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
                transformedData, true);

            // Printing the columns of the transformed data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in convertedData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.
                    NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,6                       A,Male
            // 4,6                       A,Female
            // 4,6                       A,
            // 4,NaN                     A,Male
        }

        public class TransformedData
        {
            public float[] NumericVector { get; set; }

            public string[] StringVector { get; set; }
        }

        public class InputData
        {
            [VectorType(3)]
            public float[] NumericVector { get; set; }

            [VectorType(3)]
            public string[] StringVector { get; set; }
        }

        /// <summary>
        /// Return a few rows of data.
        /// </summary>
        public static IEnumerable<InputData> GetData()
        {
            var data = new List<InputData>
            {
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, 6 },
                    StringVector = new string[] { "A", "WA", "Male"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", string.Empty, "Female"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "NY", null}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 0, float.NaN },
                    StringVector = new string[] { "A", null, "Male"}
                }
            };
            return data;
        }
    }
}

Applies to