question

JingwenPan-2199 avatar image
0 Votes"
JingwenPan-2199 asked XingyuZhao-MSFT edited

Training Bayesian network with Infer.NET but doesn't know any prior and conditional probability

Hi, I am new to Infer.NET. I know this question might be asked a few years ago but some posts' links were broken so I just ask here again. Let assume that if I have the whole dataset for training and I would like to construct a Bayesian network in Infer.NET without knowing any prior and conditional probabilities. I wonder how can Infer.NET infer those prior and conditional probabilities(because my inferred probabilities were wrong) and if I have multiple parents(e.g., 7 parents) for one child node, what kind of distribution should I define the prior probabilities of parents and the conditional probability of the child? Is it always a Dirichlet distribution represented in terms of Vectors or it should be something else?

Here is my naive code which tried to construct a Bayesian network like iMay ------> iSex:
using System;
using Microsoft.ML.Probabilistic.Models; //For modelling Bernoulli distribution
using Microsoft.ML.Probabilistic.Math;
using Microsoft.ML.Probabilistic.Distributions;
using Range = Microsoft.ML.Probabilistic.Models.Range;
using System.Data;
using System.IO;
using System.Linq;

namespace learningdotnet
{
public class BNmodel
{
// Primary random variables
public Variable<int> NumberOfExamples;
public VariableArray<int> iMay;
public VariableArray<int> iSex;
public Variable<Vector> ProbiMay;
public VariableArray<Vector> CPTiSex;
public Variable<Dirichlet> ProbiMayPrior;
public VariableArray<Dirichlet> CPTiSexPrior;

     public Dirichlet ProbiMayPosterior;
     public Dirichlet[] CPTiSexPosterior;

     public InferenceEngine Engine = new InferenceEngine();

     public BNmodel()
     {
         // Set up the ranges
         NumberOfExamples = Variable.New<int>().Named("NofE");
         Range N = new Range(NumberOfExamples).Named("N");

         // Variables have just 2 states (go to this attribute/not go to this attribute)
         Range M = new Range(2).Named("M");
         Range S = new Range(2).Named("S");

         // Define the priors and the parameters
         ProbiMayPrior = Variable.New<Dirichlet>().Named("ProbiMayPrior");
         ProbiMay = Variable<Vector>.Random(ProbiMayPrior).Named("ProbiMay");
         ProbiMay.SetValueRange(M);

         // iSex probability table conditioned on iMay
         CPTiSexPrior = Variable.Array<Dirichlet>(M).Named("CPTiSexPrior");
         CPTiSex = Variable.Array<Vector>(M).Named("CPTiSex");
         CPTiSex[M] = Variable<Vector>.Random(CPTiSexPrior[M]);
         CPTiSex.SetValueRange(S);

         iMay = Variable.Array<int>(N).Named("iMay");
         iMay[N] = Variable.Discrete(ProbiMay).ForEach(N);
         iSex = AddChildFromOneParent(iMay, CPTiSex).Named("iSex");
     }

     public static VariableArray<int> AddChildFromOneParent(
         VariableArray<int> parent,
         VariableArray<Vector> cpt)
     {
         var n = parent.Range;
         var child = Variable.Array<int>(n);
         ForEachBlock b1 = Variable.ForEach(n);
         SwitchBlock b2 = Variable.Switch(parent[n]);
         child[n] = Variable.Discrete(cpt[parent[n]]);
         b2.CloseBlock();
         b1.CloseBlock();
         return child;
     }

     public void LearnParameters(
         int[] imay,
         int[] isex,
         Dirichlet probiMayPrior,
         Dirichlet[] cptiSexPrior)
     {
         NumberOfExamples.ObservedValue = imay.Length;
         iMay.ObservedValue = imay;
         iSex.ObservedValue = isex;
         ProbiMayPrior.ObservedValue = probiMayPrior;
         CPTiSexPrior.ObservedValue = cptiSexPrior;

         // Inference
         ProbiMayPosterior = Engine.Infer<Dirichlet>(ProbiMay);
         CPTiSexPosterior = Engine.Infer<Dirichlet[]>(CPTiSex);
     }

     public void LearnParameters(
         int[] imay,
         int[] isex)
     {
         Dirichlet probiMayPrior = Dirichlet.Uniform(2);
         Dirichlet[] cptiSexPrior = Enumerable.Repeat(Dirichlet.Uniform(2), 2).ToArray();

         LearnParameters(imay, isex, probiMayPrior, cptiSexPrior);
     }

     public double ProbiSex(
         int? imay,
         Dirichlet probiMayPrior,
         Dirichlet[] cptiSexPrior)
     {
         NumberOfExamples.ObservedValue = 1;
         if (imay.HasValue)
         {
             iMay.ObservedValue = new int[] { imay.Value };
         }
         else
         {
             iMay.ClearObservedValue();
         }

         iSex.ClearObservedValue();


         ProbiMayPrior.ObservedValue = probiMayPrior;
         CPTiSexPrior.ObservedValue = cptiSexPrior;

         // Inference
         var iSexPosterior = Engine.Infer<Discrete[]>(iSex);
         // index 0 is true and index 1 is false
         return iSexPosterior[0].GetProbs()[0];
     }

     public double ProbiSex(
         int? imay,
         Vector probiMay,
         Vector[] cptiSex)
     {
         var probiMayPrior = Dirichlet.PointMass(probiMay);
         var cptiSexPrior = cptiSex.Select(v => Dirichlet.PointMass(v)).ToArray();
         return ProbiSex(imay, probiMayPrior, cptiSexPrior);
     }

 }

 public class BN
 {
     public static void infer()
     {
         Rand.Restart(12347);
         BNmodel model = new BNmodel();

         double a = 1 - 0.9895496250434754;
         Vector probiMay = Vector.FromArray(a, 0.9895496250434754); //iMay, not iMay
         a = 1 - 0.9904474054066148;
         Vector[] cptiSex = new Vector[] { Vector.FromArray(0.9904474054066148,a) /* iMay */, Vector.FromArray(0.9999999961141224, (1 - 0.9999999961141224)) /* not iMay */ };

         double probNotiSexGiveniMay = model.ProbiSex(0, probiMay, cptiSex);
         double probiSexGivenNOTiMay = model.ProbiSex(1, probiMay, cptiSex);

         Console.WriteLine("P(Not iSex | iMay) = {0:0.0000}", probNotiSexGiveniMay);
         Console.WriteLine("P(iSex | not iMay) = {0:0.0000}", probiSexGivenNOTiMay);

         // -------------------------------------------------------------
         // Learn posterior distributions for the parameters
         // -------------------------------------------------------------
         Console.WriteLine("\n*********************************************");
         Console.WriteLine("Learning parameters from data ");
         Console.WriteLine("*********************************************");

         int[] imay_ = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
         int[] isex_ = new int[] { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0 };

         // Now see if we can recover the parameters from the data - assume uniform priors
         model.LearnParameters(imay_, isex_);

         // The posteriors are distributions over the probabilities and CPTs. Print out the means of these
         // distributions, and compare with the ground truth
         Console.WriteLine("Prob. iMay:                         Ground truth: {0:0.00}, Inferred: {1:0.00}", 1 - 0.9895496250434754, model.ProbiMayPosterior.GetMean()[0]);
         Console.WriteLine("Prob. iSex | iMay:                  Ground truth: {0:0.00}, Inferred: {1:0.00}", 0.9904, model.CPTiSexPosterior[0].GetMean()[0]);
         Console.WriteLine("Prob. iSex | Not iMay:              Ground truth: {0:0.00}, Inferred: {1:0.00}", 1.0000, model.CPTiSexPosterior[1].GetMean()[0]);
     }
 }

}

At the last few lines of code, I tried to learn the prior and CPT probabilities from the dataset but the inferred probabilities are too different from the ground truth, which are:
Prob. iMay: Ground truth: 0.01, Inferred: 0.99
Prob. iSex | iMay: Ground truth: 0.99, Inferred: 0.52
Prob. iSex | Not iMay: Ground truth: 1.00, Inferred: 0.33

I wonder what is doing wrong here. Thank you so much for help!

Jingwen

dotnet-csharpdotnet-ml-big-data
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

0 Answers