使用多线程或 Parallel.ForEach 加速搜索文件

Speed up search file using Multithreading or Parallel.ForEach

我编写了一个代码来搜索文件和文件夹,并且(检查插入单词的所有可能组合)我有一个子程序,它给出了插入字符串的所有排列。

我的问题是我正在为每个排列的字符串重复代码(4 个单词意味着 24 次)并且我正在尝试使用 MultiThreading 来加速代码。

我已经阅读了很多示例,但由于多种原因我无法真正理解逻辑(一些示例是用 C 编写的;任何示例都是用不同的逻辑编写的)

我试过

Parallel.For  
Parallel.ForEach 
ThreadPool

但是在将列表(包含所有结果)设置为列表框的数据源之前我无法等待所有线程。

我的代码逻辑是:
通过拆分搜索字符串获取单词
如果搜索类型是 "all words in any order" 那么我得到所有排列
我开始搜索每个排列的字符串

我不喜欢在问题中添加太多代码,但我认为在这种情况下有必要了解我的工作方式:

Private Sub Btn_Search_Click(sender As Object, e As EventArgs) Handles Btn_Search.Click
    Select Case True
        Case RBtn_Exact.Checked
            StartSearch(Me.TB_Pattern.Text.Trim)
        Case RBtn_AllInOrder.Checked
            Dim Pattern As String = ""
            For Each Word As String In Me.TB_Pattern.Text.Split(New Char() {" "c})
                If Word.Trim <> "" Then Pattern &= "*" & Word.Trim
            Next
            Pattern &= "*"
            StartSearch(Pattern)
            endsearch()
        Case RBtn_AllWithoutOrder.Checked
            Dim WordHash As New HashSet(Of String)
            For Each Word As String In Split(Me.TB_Pattern.Text, " ")
                If Word.Trim <> "" Then WordHash.Add(Word.Trim)
            Next
            If WordHash.Count > 5 Then
                MessageBox.Show("Max 5 words allowed for this kind of search", MessageBoxButtons.OK, MessageBoxIcon.Exclamation)
                Exit Sub
            End If
            'Get permutations into an array
            StringPermutations()
            'I need to add "*" at the end of each permutated string
            For S As Integer = 0 To PermutationsArr.Length - 1
                PermutationsArr(S) &= "*"
            Next
            'This is for searching without MultiThreading
            For Each Pattern As String In PermutationsArr
                StartSearch(Pattern)
            Next
            'This is my last test
            'Parallel.ForEach(PermutationsArr,
            '                    Sub(Pattern)
            '                        StartSearch(Pattern)
            '                    End Sub
            '                 )
            'Task.WaitAll()
            endsearch()
        Case RBtn_AnyWord.Checked
            Dim WordHash As New HashSet(Of String)
            For Each Word As String In Split(Me.TB_Pattern.Text, " ")
                If Word.Trim <> "" Then WordHash.Add(Word.Trim)
            Next
            If WordHash.Count > 5 Then
                MessageBox.Show("Max 5 words allowed for this kind of search", MessageBoxButtons.OK, MessageBoxIcon.Exclamation)
                Exit Sub
            End If
            For Each Word As String In WordHash
                StartSearch(pattern:="*" & Word & "*")
            Next
            endsearch()
    End Select
End Sub

Private Sub StartSearch(ByVal pattern As String)
    'Search for files
    If Me.CBox_Files.Checked Then
        FileSearch(Me.TB_StartFolder.Text, pattern)
    End If
    'Search for folders
    If Me.CBox_Folders.Checked Then
        ProcessDir(Me.TB_StartFolder.Text, pattern)

        DirSearch(Me.TB_StartFolder.Text, pattern)
    End If
End Sub

Sub endsearch()
    Me.Btn_Search.Text = "Start"
    Me.Btn_Search.BackColor = Me.BackColor
    If Me.LB_Files.Items.Count > 0 Then
        Me.Lbl_FilesFound.Text = Me.LB_Files.Items.Count.ToString
        Me.Lbl_FilesFound.Visible = True
    End If
    If Me.LB_Folders.Items.Count > 0 Then
        Me.Lbl_DirFound.Text = Me.LB_Folders.Items.Count.ToString
        Me.Lbl_DirFound.Visible = True
    End If
End Sub

Sub DirSearch(ByVal sDir As String, ByVal Pattern As String)
    Try
        For Each Dir As String In Directory.GetDirectories(sDir)
            Try
                For Each D As String In Directory.GetDirectories(Dir, Pattern)
                    Try
                        If LimitReached(LB_Folders) Then
                            Me.Lbl_LimitReached.Visible = True
                            Exit Sub
                        Else
                            If Me.CBox_Folders.Checked AndAlso Not LB_Folders.Items.Contains(D) Then LB_Folders.Items.Add(D)
                        End If
                    Catch ex As Exception
                        Continue For
                    End Try
                Next
                DirSearch(Dir, Pattern)
            Catch ex As Exception
                Continue For
            End Try
        Next
    Catch ex As Exception
    End Try
End Sub
Sub FileSearch(ByVal sDir As String, ByVal Pattern As String)
    Dim d As String = ""
    Try
        For Each f As String In Directory.GetFiles(sDir, Pattern)
            Try
                If LimitReached(LB_Files) Then
                    Me.Lbl_LimitReached.Visible = True
                    Exit Sub
                Else
                    If Me.CBox_LastModRange.Checked Then
                        If Me.CBox_Files.Checked AndAlso IntoRangeDate(f) AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
                    Else
                        If Me.CBox_Files.Checked AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
                    End If
                End If
            Catch ex As Exception
                Continue For
            End Try
        Next
        'Search for subfolders
        For Each d In Directory.GetDirectories(sDir)
            Try
                ProcessDir(d, Pattern)
            Catch ex As Exception
            End Try
            Try
                FileSearch(d, Pattern)
            Catch ex As Exception
            End Try
        Next
    Catch excpt As System.Exception
    End Try
End Sub

Private Sub ProcessDir(d As String, ByVal Pattern As String)
    Try
        For Each f As String In Directory.GetFiles(d, Pattern)
            Try
                If LimitReached(LB_Files) Then
                    Me.Lbl_LimitReached.Visible = True
                    Exit Sub
                Else
                    If Me.CBox_LastModRange.Checked Then
                        If Me.CBox_Files.Checked AndAlso IntoRangeDate(f) AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
                    Else
                        If Me.CBox_Files.Checked AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
                    End If
                End If
            Catch ex As Exception
                Continue For
            End Try
        Next
    Catch ex As System.Exception
    End Try
    Try
        For Each d In Directory.GetDirectories(d, Pattern)
            Try
                If Me.CBox_Folders.Checked AndAlso Not LB_Folders.Items.Contains(d) Then LB_Folders.Items.Add(d)
            Catch ex As Exception
                Continue For
            End Try
        Next
    Catch ex As Exception
    End Try
End Sub

编辑
在我获取排列的代码下方(我知道它有一个特定的逻辑但它有效并且看起来足够快):

Private Sub StringPermutations()
    Try
        Dim WordHash As New HashSet(Of String)
        For Each Word As String In Split(Me.TB_Pattern.Text, " ")
            If Word.Trim <> "" Then WordHash.Add(Word.Trim)
        Next
        Dim WordList As List(Of String) = WordHash.ToList
        ReDim PermutationsArr(Factorial(WordList.Count) - 1)
        AddString(WordList, 0)
    Catch ex As Exception
        MsgBox(ex.ToString)
    End Try
End Sub

Private Function Factorial(ByVal Num As Integer) As Integer
    Try
        If Num > 0 AndAlso Num < 12 Then
            Dim Result As Int32 = 1
            Do
                Result *= Num
                Num -= 1
            Loop Until Num <= 1
            Return Result
        Else
            Return 0
        End If
    Catch ex As Exception
        Return Nothing
    End Try
End Function

Private Sub AddString(ByVal WordList As List(Of String), ByVal StartId As Integer)
    Try
        Dim InsLoop As Integer = Factorial(WordList.Count - 1)
        If InsLoop = 0 Then InsLoop = 1
        For Each Word As String In WordList
            For InsWord As Integer = 1 To InsLoop
                PermutationsArr(StartId + InsWord - 1) &= "*" & Word
            Next
            If WordList.Count > 1 Then
                Dim Remaining As New List(Of String)
                For Each RemWord As String In WordList
                    If RemWord <> Word Then Remaining.Add(RemWord)
                Next
                AddString(Remaining, StartId)
            End If
            StartId += InsLoop
        Next
    Catch ex As Exception
        MsgBox(ex.ToString)
    End Try
End Sub

这是我的表格 class,它基于您的表格,但已大大简化。我使用 Tasks 进行多线程处理,使用 ConcurrentDictionarys 来捕获具有容量限制、并发级别且没有重复的结果,并在最后一次调用中填充列表框,以最大限度地减少 UI 更新和相关的缓慢。并发级别是将生成以提供给 ConcurrentDictionary 的任务数。

Imports System.Text.RegularExpressions

Public Class SearchForm
    Private FoldersList As Concurrent.ConcurrentDictionary(Of String, Object)
    Private FilesList As Concurrent.ConcurrentDictionary(Of String, Object)

    Private Tasks As New List(Of Task)
    Private Words As New List(Of String)

    Private StopWatch As New Stopwatch

    ' Capacity of the ConcurrentDictionary objects
    ' Set this from user input on form to limit # of results returned
    Private Capacity As Int32 = 0

    Private PermutationsArr() As String = Nothing

    Private Sub Btn_Search_Click(sender As Object, e As EventArgs) Handles Btn_Search.Click
        Btn_Search.Text = "Wait"

        ' Capacity of the ConcurrentDictionary objects
        ' Set this from user input on form to limit # of results returned
        Capacity = 10000

        Tasks.Clear()
        Words.Clear()

        LB_Folders.DataSource = Nothing
        LB_Files.DataSource = Nothing

        Me.Refresh()

        StopWatch.Restart()

        Words.AddRange(Regex.Split(Regex.Replace(Me.TB_Pattern.Text.Trim, "\*", String.Empty), "\s+"))

        Select Case True
            Case String.IsNullOrWhiteSpace(Me.TB_Pattern.Text.Trim)
                MsgBox("Too few words", vbOKOnly, "Oops")
            Case Words.Count < 1
                MsgBox("Too few words", vbOKOnly, "Oops")
            Case Words.Count > 5
                MsgBox("Too many words", vbOKOnly, "Oops")

            Case Me.CBox_LastModRange.Checked AndAlso Me.DT_ModRangeEnd.Value < Me.DT_ModRangeStart.Value
                MsgBox("Range Start must precede Range End", vbOKOnly, "Oops")

            Case Me.RBtn_Exact.Checked
                FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity)
                FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity)

                With Join(Words.ToArray)
                    If Me.CBox_Folders.Checked Then
                        ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True
                        SearchFolders(Me.TB_StartFolder.Text, .ToString, True)
                    Else
                        ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders
                        If Me.CBox_Files.Checked Then
                            SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True)
                        End If
                    End If
                End With

            Case Me.RBtn_AllInOrder.Checked
                FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity)
                FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity)

                With String.Format("*{0}*", Join(Words.ToArray, "*"))
                    If Me.CBox_Folders.Checked Then
                        ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True
                        SearchFolders(Me.TB_StartFolder.Text, .ToString, True)
                    Else
                        ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders
                        If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True)
                    End If
                End With

            Case Me.RBtn_AllWithoutOrder.Checked
                StringPermutations()

                ' Math.Min caps the concurrency level at 40
                FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(Math.Min(40, PermutationsArr.Count), Capacity)
                FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(Math.Min(40, PermutationsArr.Count), Capacity)

                For Each Pattern As String In PermutationsArr
                    If Me.CBox_Folders.Checked Then
                        ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True
                        SearchFolders(Me.TB_StartFolder.Text, Pattern, True)
                        'Tasks.Add(Task.Run(Sub() SearchFolders(Me.TB_StartFolder.Text, Pattern)))
                    Else
                        ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders
                        If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, Pattern, True, True)
                    End If
                Next

            Case Me.RBtn_AnyWord.Checked
                FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(Words.Count, Capacity)
                FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(Words.Count, Capacity)

                For Each Word In Words
                    With String.Format("*{0}*", Word)
                        If Me.CBox_Folders.Checked Then
                            ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True
                            SearchFolders(Me.TB_StartFolder.Text, .ToString, True)
                        Else
                            ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders
                            If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True)
                        End If
                    End With
                Next
        End Select

        Task.WaitAll(Tasks.ToArray)

        Debug.Print("Tasks Completed in {0}", StopWatch.Elapsed.ToString)

        Debug.Print("Adding {0} Folders", FoldersList.Keys.Count.ToString)
        Me.LB_Folders.DataSource = FoldersList.Keys

        Debug.Print("Adding {0} Files", FilesList.Keys.Count.ToString)
        Me.LB_Files.DataSource = FilesList.Keys

        Btn_Search.Text = "Search"
    End Sub

    Private Sub SearchFolders(FolderPath As String, Pattern As String, Optional FirstCall As Boolean = False)
        Try
            Dim Folders() As String = IO.Directory.GetDirectories(FolderPath)

            For Each Folder As String In Folders
                Dim SubFolders() As String = IO.Directory.GetDirectories(Folder, Pattern)

                For Each SubFolder As String In SubFolders
                    Select Case True
                        Case Not FilesList.Count < Capacity
                            Exit For
                        Case Not Me.CBox_LastModRange.Checked
                            FoldersList.TryAdd(SubFolder, Nothing)
                        Case FolderInModRange(Folder)
                            FoldersList.TryAdd(SubFolder, Nothing)
                    End Select
                Next

                If Me.CBox_Files.Checked Then
                    ' Do NOT call this with Recursive = True from here!
                    SearchFiles(Folder, Pattern)
                End If

                If FirstCall Then
                    ' Perform multithreaded Recursion
                    Tasks.Add(Task.Run(Sub() SearchFolders(Folder, Pattern)))
                Else
                    ' Perform deep recursion within task thread...don't branch further
                    SearchFolders(Folder, Pattern)
                End If
            Next
        Catch ex As UnauthorizedAccessException
            ' Access Denied
        Catch ex As Exception
            Debug.Print("SearchFiles: {0}", ex.ToString)
        End Try
    End Sub

    Private Sub SearchFiles(FolderPath As String, Pattern As String, Optional Recursive As Boolean = False, Optional FirstCall As Boolean = False)
        ' Recursive and FirstCall should only be True if NOT doing SearchFolders
        ' Recursive should only be True if called from the main thread or this method to continue the deep dive
        ' FirstCall should only be True if called from the main thread

        Try
            For Each Filename As String In IO.Directory.GetFiles(FolderPath, Pattern)
                Select Case True
                    Case Not FilesList.Count < Capacity
                        Exit For
                    Case Not Me.CBox_LastModRange.Checked
                        FilesList.TryAdd(Filename, Nothing)
                    Case FileInModRange(Filename)
                        FilesList.TryAdd(Filename, Nothing)
                End Select
            Next

            If Recursive Then
                Try
                    Dim Folders() As String = IO.Directory.GetDirectories(FolderPath)
                    For Each Folder As String In Folders
                        If FirstCall Then
                            ' Perform multithreaded Recursion
                            Tasks.Add(Task.Run(Sub() SearchFiles(Folder, Pattern, Recursive)))
                        Else
                            ' Perform deep recursion within task thread...don't branch further
                            SearchFiles(Folder, Pattern, Recursive)
                        End If
                    Next
                Catch ex As Exception
                    ' Access Denied - Does this happen?
                    Debug.Print("Recursive FolderPath: {0}", ex.Message)
                End Try
            End If
        Catch ex As UnauthorizedAccessException
            ' Access Denied
        Catch ex As Exception
            Debug.Print("SearchFiles: {0}", ex.ToString)
        End Try
    End Sub

    Private Function FolderInModRange(Folder As String) As Boolean
        Try
            With New IO.DirectoryInfo(Folder)
                Select Case True
                    Case .LastWriteTime < Me.DT_ModRangeStart.Value
                        Return False
                    Case .LastWriteTime > Me.DT_ModRangeEnd.Value
                        Return False
                    Case Else
                        Return True
                End Select
            End With
        Catch ex As Exception
            Debug.Print("FolderInModRange: {0}{1}{2}", Folder, Environment.NewLine, ex.ToString)
        End Try

        ' Only if exception is thrown
        Return False
    End Function

    Private Function FileInModRange(Filename As String) As Boolean
        Try
            With New IO.FileInfo(Filename)
                Select Case True
                    Case .LastWriteTime < Me.DT_ModRangeStart.Value
                        Return False
                    Case .LastWriteTime > Me.DT_ModRangeEnd.Value
                        Return False
                    Case Else
                        Return True
                End Select
            End With
        Catch ex As IO.PathTooLongException
            ' Path Too Long
        Catch ex As Exception
            Debug.Print("FileInModRange: {0}{1}{2}", Filename, Environment.NewLine, ex.ToString)
        End Try

        ' Only if exception is thrown
        Return False
    End Function
End Class

递归避免了 .Net 的 GetDirectoriesGetFiles 方法在 运行 进入用户无权访问的文件夹时产生的 UnauthorizedAccessException 错误。

参考文献: