C# app to count #ifdef usage

Here's a little tool I wrote to count #ifdef usage in a project. It provides a summary of the #define name and frequency; plus a detailed XML file of each location that the #define occurred at.

For example, when applied to this sample file (in directory c:\temp\1):

  1: void Foo()
 2: {
 3:   #ifdef ABC
 4:   A1();
 5:   #ifndef ABC || defined(DEF)
 6:   A2();
 7:   #elif XYZ
 8:   A3();
 9:   #endif

Run it:
    Counter.exe c:\temp\1 c:\temp\t.xml

This scans through all files in the directory and all subdirectories. It prints the summary below. This includes each #define usage sorted by increasing frequency (highlighted in red). It also includes the preprocessor directives because the parser I use is very simple; and the directives are actually interesting to have.

Usage: counter <directory> <xml output file>
frequence | name
1 elif
1 defined
1 ifdef
1 ifndef

It also produces this more detailed xml file that shows where each usage occurs: 

<?xml version="1.0" encoding="Windows-1252"?>
  <item name="DEF" frequency="1">
  <item name="elif" frequency="1">
  <item name="XYZ" frequency="1">
  <item name="defined" frequency="1">
  <item name="ifdef" frequency="1">
  <item name="ifndef" frequency="1">
  <item name="ABC" frequency="2">

Why is this useful?


The sample file above is simple and contrived; this would be more interesting when applied to larger projects that use #ifdef a lot for features. It can serve as a useful metric to recognize high #ifdef usages and try to drive them down via better componentization models. For example, rather than toggling a feature via #ifdef, consider toggling it via better class or file partitioning.

Here's the source:
It compiles with .NET 2.0: 

// Program to count #ifdef labels.
// http://blogs.msdn.com/jmstall 

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Xml;

namespace Counter
    // stats for a given directory and its subdirectories.
    class DirStats : BaseStats
        // Parse all #ifdefs in this directory and subdirs.
        public DirStats(string directory)

        // Track all #defines within this directory and all subdirectories.
        void ParseDirectory(string directory)
            // Do all files
            foreach (string filename in Directory.GetFiles(directory))

            // Do all subdirectories
            foreach (string subdirectory in Directory.GetDirectories(directory))

        // Track all #defines within the filename.
        void ParseFile(string filename)

            int lineNumber = 1;
            // Read all lines.
            TextReader tr = new StreamReader(filename);

            // Parse all lines
            string line;
            while ((line = tr.ReadLine()) != null)
                ParseLine(line, filename, lineNumber);
        int m_total;

    // Track stats in an arbitrary quanta    
    class BaseStats
        // Single copy of RegEx 
        static Regex m_regex;

        // Update the stats with #defines from a single line.
        // line - input line to search for #defines.
        // filename - filename that line is in. Used for context.
        // lineNumber - line number where the line occurs within the filename. Used for context.
        protected void ParseLine(string line, string filename, int lineNumber)
            if (line == null) return;
            line = line.Trim();

            // Only care about lines like #ifdef, #if defined, #endif, #elif
            // Most lines should fail here. 
            if (!line.StartsWith("#if") && !line.StartsWith("#elif")) return;

            int idx = 0;

            // Regex expressions are NOT cached by default in .NET 2.0.
            // Use static to ensure caching.
            // See : http://blogs.msdn.com/bclteam/archive/2006/10/19/regex-class-caching-changes-between-net-framework-1-1-and-net-framework-2-0-josh-free.aspx
            if (m_regex == null)
                // This just captures identifiers on the line.
                // It's a sufficient approximation of the C preprocessor for #ifdef usage.
                m_regex = new Regex(@"(\w+|\/)");

            // Loop to allow multiple defines in a single line. Eg:
            //    #if defined(ABC) && !defined(DEF_FOO) // some comment
            // should collect both ABC and DEF_FOO, but not anything after '//' since that's a comment.
            // This also includes preprocessor directives like ifdef, ifndef, define, etc.
            // But those can be easily filtered out by the end-user.
            while (true)
                Match m = m_regex.Match(line, idx);
                if (!m.Success)
                string val = m.Groups[1].Value;
                if (val == "/") // hit a comment.

                Increment(val, filename + ":" + lineNumber);

                // Groups.Index is from offset 0, not from idx.
                int i = m.Groups[1].Index;
                System.Diagnostics.Debug.Assert(i >= idx);

                int len = m.Groups[1].Length;

                idx = (i + len);

        // Increment the stat for the given #define.
        // define - the #define name to increment
        // context - a string describing the location of the #define (eg, "c:\myfile.txt:8")
        void Increment(string define, string context)
            List<string> list;

            if (!m_frequency.TryGetValue(define, out list))
                list = new List<string>();
                m_frequency[define] = list;


        // frequency of each #ifdef in the file.
        //   Key = the name of a #define.
        //   Value = List of locations that #define occurs. 
        // The m_frequency[name].Count == # of times 'name' occurs.
        // Using <string, List<string>> gives lets us store where exactly each #define usage occurs.
        // If we just cared about frequency, we could use <string, int> (where the int = List<string>.Count)
        protected Dictionary<string, List<string>> m_frequency = new Dictionary<string, List<string>>();

        // Produces 2 reports.
        // Prints a summary to twSummary, which is just (name, frequency) sorted by frequency.
        // Prints a detailed report to twDetails listing all usages.
        public void PrintReport(TextWriter twSummary, XmlWriter xmlDetails)
            // Lets us avoid null checks everywhere.
            if (twSummary == null)
                twSummary = TextWriter.Null;
            twSummary.WriteLine("frequence | name");

            int len = m_frequency.Values.Count;
            int[] vals = new int[len];
            int idx = 0;
            foreach(List<string> l in m_frequency.Values)
                vals[idx] = l.Count;

            string[] keys = new String[m_frequency.Keys.Count];
            m_frequency.Keys.CopyTo(keys, 0);

            // sort by increasing order of frequency
            Array.Sort(vals, keys);

            for(idx = 0; idx<  len; idx++)
                string name = keys[idx];
                int frequency = vals[idx];
                twSummary.WriteLine("{0} {1}", frequency, name);

                // Write to detailed report, including location of each occurence.
                xmlDetails.WriteAttributeString("name", name);
                xmlDetails.WriteAttributeString("frequency", frequency.ToString());
                // Print each occurence
                foreach(string context in m_frequency[name])
                    xmlDetails.WriteElementString("location", context);
                xmlDetails.WriteEndElement(); // item
            xmlDetails.WriteEndElement(); // report.

    // Entry point
    class Program
        static void Main(string[] args)
            Console.WriteLine("  Usage:  counter <directory> <xml output file>");
            string directory = args[0]; // input directory to search

            string outputPath = args[1]; // XML file to write out details
            XmlWriter x = new XmlTextWriter(outputPath, Encoding.Default);

            BaseStats f = new DirStats(directory);

            f.PrintReport(Console.Out, x);

} // end Counter namespace