2015 年 10 月

# C# - C# 中的一种分裂与合并表达式分析器

Edsger Dijkstra 算法，发表于 50 多年前的 1961 年，它常用于分析数学表达式（请参阅“参考”中的第 3 项）。不过有一种替代算法也挺好，尽管有着同样的时间复杂度，但我个人认为它更易于实现和扩展。

## 分裂与合并算法

``````class Cell
{
internal Cell(double value, char action)
{
Value = value;
Action = action;
}
internal double Value  { get; set; }
internal char   Action { get; set; }
}
``````

## 合并一列存储单元

``````static int GetPriority(char action)
{
switch (action) {
case '^': return 4;
case '*':
case '/': return 3;
case '+':
case '-': return 2;
}
return 0;
}
``````

``````static bool CanMergeCells(Cell leftCell, Cell rightCell)
{
return GetPriority(leftCell.Action) >= GetPriority(rightCell.Action);
}
``````

``````static void MergeCells(Cell leftCell, Cell rightCell)
{
switch (leftCell.Action)
{
case '^': leftCell.Value = Math.Pow(leftCell.Value, rightCell.Value);
break;
case '*': leftCell.Value *= rightCell.Value;
break;
case '/': leftCell.Value /= rightCell.Value;
break;
case '+': leftCell.Value += rightCell.Value;
break;
case '-': leftCell.Value -= rightCell.Value;
break;
}
leftCell.Action = rightCell.Action;
}
``````

``````static double Merge(Cell current, ref int index, List<Cell> listToMerge,
bool mergeOneOnly = false)
{
while (index < listToMerge.Count)
{
Cell next = listToMerge[index++];
while (!CanMergeCells(current, next))
{
Merge(next, ref index, listToMerge, true /* mergeOneOnly */);
}
MergeCells(current, next);
if (mergeOneOnly)
{
return current.Value;
}
}
return current.Value;
}
``````

## 将一个表达式分裂为一列存储单元

``````Split (“3-2*4”) = { Cell(3, ‘-’), Cell(2, ‘*‘), Cell(3, ‘)’) }
``````

``````const char END_ARG = ')';
``````

``````const char START_ARG = '(';
``````

``````Split(“(3-1)-1”) = Split( “[SplitAndMerge(“3-1”)] - 1”) = { Cell(2, ‘-’), Cell(1, ‘)’) }
``````

``````public static double LoadAndCalculate(string data, ref int from, char to = END_LINE)
{
if (from >= data.Length || data[from] == to)
{
throw new ArgumentException("Loaded invalid data: " + data);
}
List<Cell> listToMerge = new List<Cell>(16);
StringBuilder item = new StringBuilder();
do // Main processing cycle of the first part.
{
char ch = data[from++];
if (StillCollecting(item.ToString(), ch, to))
{ // The char still belongs to the previous operand.
item.Append(ch);
if (from < data.Length && data[from] != to)
{
continue;
}
}
// I am done getting the next token. The getValue() call below may
// recursively call loadAndCalculate(). This will happen if extracted
// item is a function or if the next item is starting with a START_ARG '(.'
ParserFunction func = new ParserFunction(data, ref from, item.ToString(), ch);
double value = func.GetValue(data, ref from);
char action = ValidAction(ch) ? ch
: UpdateAction(data, ref from, ch, to);
item.Clear();
} while (from < data.Length && data[from] != to);
if (from < data.Length && (data[from] == END_ARG || data[from] == to))
{ // This happens when called recursively: move one char forward.
from++;
}
Cell baseCell = listToMerge[0];
int index = 1;
return Merge(baseCell, ref index, listToMerge);
}
``````

LoadAndCalculate 方法会将所有存储单元添加到 listToMerge 列表，然后调用分析算法的第二部分，即合并函数。StringBuilder 项将保留当前标记，当从表达式的字符串中读取字符后，StringBuilder 项就会立刻将字符逐一地添加到标记中。

StillCollecting 方法检查是否仍在收集用于当前标记的字符。如果当前字符为 END_ARG 或者任何其他特殊的“to”字符（比如逗号，如果分析参数由逗号分隔；稍后我将使用幂函数来举一个这样的示例），则不会是这种情况。另外，如果当前字符是一个有效操作或 START_ARG，将不再收集标记：

``````static bool StillCollecting(string item, char ch, char to)
{
char stopCollecting = (to == END_ARG || to == END_LINE) ?
END_ARG : to;
return (item.Length == 0 && (ch == '-' || ch == END_ARG)) ||
!(ValidAction(ch) || ch == START_ARG || ch == stopCollecting);
}
static bool ValidAction(char ch)
{
return ch == '*' || ch == '/' || ch == '+' || ch == '-' || ch == '^';
}
``````

``````char action = ValidAction(ch) ? ch
: UpdateAction(data, ref from, ch);
``````

``````static char UpdateAction(string item, ref int from, char ch, char to)
{
if (from >= item.Length || item[from] == END_ARG || item[from] == to)
{
return END_ARG;
}
int index = from;
char res = ch;
while (!ValidAction(res) && index < item.Length)
{ // Look to the next character in string until finding a valid action.
res = item[index++];
}
from = ValidAction(res) ? index
: index > from ? index - 1
: from;
return res;
}
``````

``````ParserFunction func = new ParserFunction(data, ref from, item.ToString(), ch);
double value = func.GetValue(data, ref from);
``````

## 用户定义的函数和标准函数

``````private ParserFunction m_impl;
``````

``````internal ParserFunction(string data, ref int from, string item, char ch)
{
if (item.Length == 0 && ch == Parser.START_ARG)
{
// There is no function, just an expression in parentheses.
m_impl = s_idFunction;
return;
}
if (m_functions.TryGetValue(item, out m_impl))
{
// Function exists and is registered (e.g. pi, exp, etc.).
return;
}
// Function not found, will try to parse this as a number.
s_strtodFunction.Item = item;
m_impl = s_strtodFunction;
}
``````

``````private static Dictionary<string, ParserFunction> m_functions =
new Dictionary<string, ParserFunction>();
``````

``````public static void AddFunction(string name, ParserFunction function)
{
m_functions[name] = function;
}
``````

GetValue 方法在创建的 ParserFunction 上调用，但实际工作在实现函数中完成，这将会取代基类的求值方法：

``````public double GetValue(string data, ref int from)
{
return m_impl.Evaluate(data, ref from);
}
protected virtual double Evaluate(string data, ref int from)
{
// The real implementation will be in the derived classes.
return 0;
}
``````

``````public ParserFunction()
{
m_impl = this;
}
``````

``````private static IdentityFunction s_idFunction = new IdentityFunction();
private static StrtodFunction s_strtodFunction = new StrtodFunction();
``````

``````class IdentityFunction : ParserFunction
{
protected override double Evaluate(string data, ref int from)
{
}
}
``````

``````class StrtodFunction : ParserFunction
{
protected override double Evaluate(string data, ref int from)
{
double num;
if (!Double.TryParse(Item, out num)) {
throw new ArgumentException("Could not parse token [" + Item + "]");
}
return num;
}
public string Item { private get; set; }
}
``````

``````class PiFunction : ParserFunction
{
protected override double Evaluate(string data, ref int from)
{
return 3.141592653589793;
}
}
``````

``````class ExpFunction : ParserFunction
{
protected override double Evaluate(string data, ref int from)
{
double arg = Parser.LoadAndCalculate(data, ref from, Parser.END_ARG);
return Math.Exp(arg);
}
}
``````

``````class PowFunction : ParserFunction
{
protected override double Evaluate(string data, ref int from)
{
double arg1 = Parser.LoadAndCalculate(data, ref from, ',');
double arg2 = Parser.LoadAndCalculate(data, ref from, Parser.END_ARG);
return Math.Pow(arg1, arg2);
}
}
``````

``````ParserFunction.AddFunction("pi",  new PiFunction());
``````

## 参考

1. V.Kaplan，“用于分析数学表达式的分离与合并算法，”CVu，27-2，2015 年 5月，bit.ly/1Jb470l
2. V.Kaplan，“再论分裂与合并，”CVu，27-3，2015 年 7月，bit.ly/1UYHmE9
3. E.Dijkstra，Shunting-yard 算法，bit.ly/1fEvvLI
4. J.Coplien，“高级 C++ 编程样式和用语”（第 140 页），Addison-Wesley，1992 年
5. E.Gamma，R. Helm，R. Johnson 和 J. Vlissides，“设计模式： 可重复使用的面向对象的软件元素”，Addison-Wesley 专业计算系列，1995 年

Vassili Kaplan是一位前 Microsoft Lync 开发人员。他对 C# 和 C++ 编程充满热情。现居住于瑞士的苏黎世，作为一名自由职业者供职于各银行。你可以通过 iLanguage.ch 与他联系。