如何:使用平行類別逐一查看檔案目錄

在許多情況下,檔案反覆運算是一項可以輕鬆平行處理的作業。 如何:使用 PLINQ 逐一查看檔案目錄這個主題示範針對許多案例執行此工作的最簡單方式。 不過,當您的程式碼需要處理在存取檔案系統時可能會出現的許多類型例外狀況時,便會提升此工作的複雜性。 下列範例會示範處理該問題的其中一種方式。 它會使用以堆疊為基礎的反覆運算來周遊位於特定目錄底下的所有檔案和資料夾,並能使程式碼能夠攔截並處理各種不同的例外狀況。 當然,要如何處理例外狀況仍然取決於您。

範例

下列範例會循序逐一查看目錄,但是以平行方式處理檔案。 當您有大型的檔案/目錄比率時,這應該是最好的方法。 您也可以平行處理目錄反覆運算,並以循序方式存取每個檔案。 除非您是特別鎖定具有大量處理器的電腦,否則同時對兩個迴圈進行平行處理應該不會很有效率。 不過無論如何,您都應該徹底地測試應用程式以判斷最佳方法。

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Security;
using System.Threading;
using System.Threading.Tasks;

class Program
{
    static void Main()
    {
        try
        {
            TraverseTreeParallelForEach(@"C:\Program Files", (f) =>
            {
                // Exceptions are no-ops.
                try
                {
                    // Do nothing with the data except read it.
                    byte[] data = File.ReadAllBytes(f);
                }
                catch (FileNotFoundException) { }
                catch (IOException) { }
                catch (UnauthorizedAccessException) { }
                catch (SecurityException) { }
                // Display the filename.
                Console.WriteLine(f);
            });
        }
        catch (ArgumentException)
        {
            Console.WriteLine(@"The directory 'C:\Program Files' does not exist.");
        }

        // Keep the console window open.
        Console.ReadKey();
    }

    public static void TraverseTreeParallelForEach(string root, Action<string> action)
    {
        //Count of files traversed and timer for diagnostic output
        int fileCount = 0;
        var sw = Stopwatch.StartNew();

        // Determine whether to parallelize file processing on each folder based on processor count.
        int procCount = Environment.ProcessorCount;

        // Data structure to hold names of subfolders to be examined for files.
        Stack<string> dirs = new Stack<string>();

        if (!Directory.Exists(root))
        {
            throw new ArgumentException(
                "The given root directory doesn't exist.", nameof(root));
        }
        dirs.Push(root);

        while (dirs.Count > 0)
        {
            string currentDir = dirs.Pop();
            string[] subDirs = { };
            string[] files = { };

            try
            {
                subDirs = Directory.GetDirectories(currentDir);
            }
            // Thrown if we do not have discovery permission on the directory.
            catch (UnauthorizedAccessException e)
            {
                Console.WriteLine(e.Message);
                continue;
            }
            // Thrown if another process has deleted the directory after we retrieved its name.
            catch (DirectoryNotFoundException e)
            {
                Console.WriteLine(e.Message);
                continue;
            }

            try
            {
                files = Directory.GetFiles(currentDir);
            }
            catch (UnauthorizedAccessException e)
            {
                Console.WriteLine(e.Message);
                continue;
            }
            catch (DirectoryNotFoundException e)
            {
                Console.WriteLine(e.Message);
                continue;
            }
            catch (IOException e)
            {
                Console.WriteLine(e.Message);
                continue;
            }

            // Execute in parallel if there are enough files in the directory.
            // Otherwise, execute sequentially.Files are opened and processed
            // synchronously but this could be modified to perform async I/O.
            try
            {
                if (files.Length < procCount)
                {
                    foreach (var file in files)
                    {
                        action(file);
                        fileCount++;
                    }
                }
                else
                {
                    Parallel.ForEach(files, () => 0,
                        (file, loopState, localCount) =>
                        {
                            action(file);
                            return (int)++localCount;
                        },
                        (c) =>
                        {
                            Interlocked.Add(ref fileCount, c);
                        });
                }
            }
            catch (AggregateException ae)
            {
                ae.Handle((ex) =>
                {
                    if (ex is UnauthorizedAccessException)
                    {
                        // Here we just output a message and go on.
                        Console.WriteLine(ex.Message);
                        return true;
                    }
                    // Handle other exceptions here if necessary...

                    return false;
                });
            }

            // Push the subdirectories onto the stack for traversal.
            // This could also be done before handing the files.
            foreach (string str in subDirs)
                dirs.Push(str);
        }

        // For diagnostic purposes.
        Console.WriteLine("Processed {0} files in {1} milliseconds", fileCount, sw.ElapsedMilliseconds);
    }
}
Imports System.Collections.Generic
Imports System.Diagnostics
Imports System.IO
Imports System.Security
Imports System.Threading
Imports System.Threading.Tasks

Module Example
    Sub Main()
        Try
            TraverseTreeParallelForEach("C:\Program Files",
                                        Sub(f)
                                            ' Exceptions are No-ops.         
                                            Try
                                                ' Do nothing with the data except read it.
                                                Dim data() As Byte = File.ReadAllBytes(f)
                                                ' In the event the file has been deleted.
                                            Catch e As FileNotFoundException

                                                ' General I/O exception, especially if the file is in use.
                                            Catch e As IOException

                                                ' Lack of adequate permissions.
                                            Catch e As UnauthorizedAccessException

                                                ' Lack of adequate permissions.
                                            Catch e As SecurityException

                                            End Try
                                            ' Display the filename.
                                            Console.WriteLine(f)
                                        End Sub)
        Catch e As ArgumentException
            Console.WriteLine("The directory 'C:\Program Files' does not exist.")
        End Try
        ' Keep the console window open.
        Console.ReadKey()
    End Sub

    Public Sub TraverseTreeParallelForEach(ByVal root As String, ByVal action As Action(Of String))
        'Count of files traversed and timer for diagnostic output
        Dim fileCount As Integer = 0
        Dim sw As Stopwatch = Stopwatch.StartNew()

        ' Determine whether to parallelize file processing on each folder based on processor count.
        Dim procCount As Integer = System.Environment.ProcessorCount

        ' Data structure to hold names of subfolders to be examined for files.
        Dim dirs As New Stack(Of String)

        If Not Directory.Exists(root) Then Throw New ArgumentException(
            "The given root directory doesn't exist.", NameOf(root))

        dirs.Push(root)

        While (dirs.Count > 0)
            Dim currentDir As String = dirs.Pop()
            Dim subDirs() As String = Nothing
            Dim files() As String = Nothing

            Try
                subDirs = Directory.GetDirectories(currentDir)
                ' Thrown if we do not have discovery permission on the directory.
            Catch e As UnauthorizedAccessException
                Console.WriteLine(e.Message)
                Continue While
                ' Thrown if another process has deleted the directory after we retrieved its name.
            Catch e As DirectoryNotFoundException
                Console.WriteLine(e.Message)
                Continue While
            End Try

            Try
                files = Directory.GetFiles(currentDir)
            Catch e As UnauthorizedAccessException
                Console.WriteLine(e.Message)
                Continue While
            Catch e As DirectoryNotFoundException
                Console.WriteLine(e.Message)
                Continue While
            Catch e As IOException
                Console.WriteLine(e.Message)
                Continue While
            End Try

            ' Execute in parallel if there are enough files in the directory.
            ' Otherwise, execute sequentially.Files are opened and processed
            ' synchronously but this could be modified to perform async I/O.
            Try
                If files.Length < procCount Then
                    For Each file In files
                        action(file)
                        fileCount += 1
                    Next
                Else
                    Parallel.ForEach(files, Function() 0, Function(file, loopState, localCount)
                                                              action(file)
                                                              localCount = localCount + 1
                                                              Return localCount
                                                          End Function,
                                     Sub(c)
                                         Interlocked.Add(fileCount, c)
                                     End Sub)
                End If
            Catch ae As AggregateException
                ae.Handle(Function(ex)

                              If TypeOf (ex) Is UnauthorizedAccessException Then

                                  ' Here we just output a message and go on.
                                  Console.WriteLine(ex.Message)
                                  Return True
                              End If
                              ' Handle other exceptions here if necessary...

                              Return False
                          End Function)
            End Try
            ' Push the subdirectories onto the stack for traversal.
            ' This could also be done before handing the files.
            For Each str As String In subDirs
                dirs.Push(str)
            Next

            ' For diagnostic purposes.
            Console.WriteLine("Processed {0} files in {1} milliseconds", fileCount, sw.ElapsedMilliseconds)
        End While
    End Sub
End Module

在此範例中,檔案 I/O 是以同步方式執行。 處理大型檔案或低速的網路連線時,您應該考慮以非同步方式存取檔案。 您可以結合非同步 I/O 技術與平行反覆運算。 如需詳細資訊,請參閱 TPL 和傳統 .NET 非同步程式設計 (部分機器翻譯)。

這個範例會使用本機 fileCount 變數維護已處理檔案的總數。 由於可能會有多個工作同時存取這個變數,因此會透過呼叫 Interlocked.Add 方法同步處理其存取。

請注意,若有例外狀況在主執行緒上擲出,由 ForEach 啟動的執行緒可能會繼續執行。 若要停止這些執行緒,您可以在例外處理常式中設定布林值變數,並在平行迴圈的每個反覆運算上檢查其值。 若該值指出有擲出例外狀況,請使用 ParallelLoopState 變數來停止或中斷迴圈。 如需詳細資訊,請參閱如何:停止或中斷 Parallel.For 迴圈

另請參閱