# 學習關聯規則

James McCaffrey

## 規則發現演算法

``````(0, 1, 2)
(0, 1, 3)
(0, 1, 4)
(0, 2, 3)
(0, 2, 4)
(0, 3, 4)
(1, 2, 3)
(1, 2, 4)
(1, 3, 4)
(2, 3, 4)
``````

## 程式的總體結構

``````using System;
using System.Collections.Generic;
namespace AssocRuleLearn
{
class AssocRuleProgram
{
static void Main(string[] args)
{
try
{
Console.WriteLine("\nBegin demo\n");
List<int[]> transactions = new List<int[]>();
transactions.Add(new int[] { 0, 3, 4, 11 });
transactions.Add(new int[] { 1, 4, 5 });
transactions.Add(new int[] { 3, 4, 6, 7 });
transactions.Add(new int[] { 3, 4, 6, 7 });
transactions.Add(new int[] { 0, 5 });
transactions.Add(new int[] { 3, 5, 9 });
transactions.Add(new int[] { 2, 3, 4, 7 });
transactions.Add(new int[] { 2, 5, 8 });
transactions.Add(new int[] { 0, 1, 2, 5, 10 });
transactions.Add(new int[] { 2, 3, 5, 6, 7, 9 });
ShowList(transactions);
List<int[]> freqItemSets = new List<int[]>();
freqItemSets.Add(new int[] { 2, 5 });
freqItemSets.Add(new int[] { 3, 4 });
freqItemSets.Add(new int[] { 3, 6 });
freqItemSets.Add(new int[] { 3, 7 });
freqItemSets.Add(new int[] { 4, 7 });
freqItemSets.Add(new int[] { 6, 7 });
freqItemSets.Add(new int[] { 3, 4, 7 });
freqItemSets.Add(new int[] { 3, 6, 7 });
ShowList(freqItemSets);
double minConPct = 0.70;
List<Rule> goodRules =
GetHighConfRules(freqItemSets, transactions, minConPct);
Console.WriteLine("\nDone.
Rules are:\n");
for (int i = 0; i < goodRules.Count; ++i)
Console.WriteLine(goodRules[i].ToString());
Console.WriteLine("\nEnd demo\n");
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
} // Main
static void ShowList(List<int[]> trans)
{
for (int i = 0; i < trans.Count; ++i)
{
for (int j = 0; j < trans[i].Length; ++j)
Console.Write(trans[i][j] + " ");
Console.WriteLine(")");
}
}
static List<Rule> GetHighConfRules(List<int[]> freqItemSets,
List<int[]> transactions, double minConfidencePct) { .
.
}
static int[] NewCombination(int k) { .
.
}
static int[] NextCombination(int[] comb, int n) { .
.
}
static int[] MakeAntecedent(int[] itemSet, int[]comb) { .
.
}
static int[] MakeConsequent(int[] itemSet, int[]comb) { .
.
}
static int CountInTrans(int[] itemSet,
List<int[]> trans, Dictionary<int[], int> countDict) { .
.
}
static bool IsSubsetOf(int[] itemSet, int[] trans) { .
.
}
static int IndexOf(int[] array, int item, int startIdx) { .
.
}
}
public class Rule
{
public int[] antecedent; // If part
public int[] consequent; // Then part
public double confidence;
public Rule(int[] antecedent, int[] consequent, double confidence)
{
this.antecedent = new int[antecedent.Length];
Array.Copy(antecedent, this.antecedent, antecedent.Length);
this.consequent = new int[consequent.Length];
Array.Copy(consequent, this.consequent, consequent.Length);
this.confidence = confidence;
}
public override string ToString()
{
string s = "IF ( ";
for (int i = 0; i < antecedent.Length; ++i)
s += antecedent[i] + " ";
s += ")";
string t = " THEN ( ";
for (int i = 0; i < consequent.Length; ++i)
t += consequent[i] + " ";
t += ") ";
return s + t + "conf = " + confidence.ToString("F2");
}
}
}
``````

## 數學組合

``````static int[] NewCombination(int k)
{
int[] result = new int[k];
for (int i = 0; i < result.Length; ++i)
result[i] = i;
return result;
}
``````

``````static int[] NextCombination(int[] comb, int n)
{
int[] result = new int[comb.Length];
int k = comb.Length;
if (comb[0] == n - k) return null;
Array.Copy(comb, result, comb.Length);
int i = k - 1;
while (i > 0 && result[i] == n - k + i) --i;
++result[i];
for (int j = i; j < k - 1; ++j)
result[j + 1] = result[j] + 1;
return result;
}
``````

## 建置規則的先例和 Consequents

``````static int[] MakeAntecedent(int[] itemSet, int[] comb)
{
int[] result = new int[comb.Length];
for (int i = 0; i < comb.Length; ++i) {
int idx = comb[i];
result[i] = itemSet[idx];
}
return result;
}
``````

``````static int[] MakeConsequent(int[] itemSet, int[] comb)
{
int[] result = new int[itemSet.Length - comb.Length];
int j = 0; // ptr into combination
int p = 0; // ptr into result
for (int i = 0; i < itemSet.Length; ++i) {
if (j < comb.Length && i == comb[j])
++j;
else
result[p++] = itemSet[i];
}
return result;
}
``````

## 計數在交易中出現的次數

``````static int CountInTrans(int[] itemSet, List<int[]> trans,
Dictionary<int[], int> countDict)
{
if (countDict.ContainsKey(itemSet) == true)
return countDict[itemSet];
int ct = 0;
for (int i = 0; i < trans.Count; ++i)
if (IsSubsetOf(itemSet, trans[i]) == true)
++ct;
return ct;
}
``````

``````static bool IsSubsetOf(int[] itemSet, int[] trans)
{
int foundIdx = -1;
for (int j = 0; j < itemSet.Length; ++j) {
foundIdx = IndexOf(trans, itemSet[j], foundIdx + 1);
if (foundIdx == -1) return false;
}
return true;
}
``````

``````static int IndexOf(int[] array, int item, int startIdx)
{
for (int i = startIdx; i < array.Length; ++i) {
if (i > item) return -1;
if (array[i] == item) return i;
}
return -1;
}
``````

## 生成高信心規則

``````for each frequent item-set
ctItemSet = count times item-set is in transactions
for subset length 1 to item-set length - 1
create a new math combination
loop over each possible math combination
create candidate rule if-part
create candidate rule then-part
compute confidence of candidate rule
if candidate rule meets min confidence, save
end loop
end for
end for
return saved rules
``````

GetHighConfRules 方法的實現中列出圖 5。 雖然您可以使用 GetHighConfRules 方法中列出圖 5，有很多機會，以提高性能，通常影響記憶體或代碼清晰。 例如，如果你是指圖 2，您會注意到每個候選規則前面的任務隨後對已具有延續和隨後被交換的鏡子候選規則。 例如，如果專案集是 （2 5 7 9），然後一個候選規則與子集大小 k = 2 是如果 (2 7) 然後 (5 9) 和另一名候選人的規則是如果 (5 9) 然後 (2 7)。 因此，而不是計算的所有可能值的頻繁項集子集的大小的先例，只是可以計算的先例和一半的可能子集大小為 consequents。

``````static List<Rule> GetHighConfRules(List<int[]> freqItemSets,
List<int[]> trans, double minConfidencePct)
{
List<Rule> result = new List<Rule>();
Dictionary<int[], int> itemSetCountDict =
new Dictionary<int[], int>();
for (int i = 0; i < freqItemSets.Count; ++i)
{
int[] currItemSet = freqItemSets[i]; // for clarity
int ctItemSet = CountInTrans(currItemSet, trans, itemSetCountDict);
for (int len = 1; len <= currItemSet.Length - 1; ++len)
{
int[] c = NewCombination(len);
while (c != null) // each combination makes a candidate rule
{
int[] ante = MakeAntecedent(currItemSet, c);
int[] cons = MakeConsequent(currItemSet, c); // could defer
int ctAntecendent = CountInTrans(ante, transactions,
itemSetCountDict);
double confidence = (ctItemSet * 1.0) / ctAntecendent;
if (confidence >= minConfidencePct) {
Rule r = new Rule(ante, cons, confidence);
}
c = NextCombination(c, currItemSet.Length);
} // while each combination
} // len each possible antecedent for curr item-set
} // i each freq item-set
return result;
}
``````

## 總結

Dr. James McCaffrey  為微軟在華盛頓州雷德蒙德的研究工作 他曾在幾個 Microsoft 產品，包括互聯網資源管理器和 Bing。 博士。 麥卡弗裡也可以撥打 jammc@microsoft.com