从字符串中提取数字 - VBA
Number Extraction from String - VBA
我有一个包含大约 7000 条记录的列表,这些记录是包含数字的字符串。我需要提取所有号码,尤其是那些以“(”、“#”或 "S " 开头的号码。想法是我们需要提取票号,以便分析存在多少问题每种类型的票。下面是我写的代码。我认为数组可能更好,但我一直无法弄清楚如何让它工作,所以现在,它循环遍历每个中的每个字符行,如果字符是数字,它开始复制并粘贴下一列中的数字,直到字符不再是数字。它将每组数字放在一个新列中,直到完成该行中的所有字符。
问题是它花费的时间非常长。上次一路放手,花了一个多小时。最近我改的时候,放10分钟左右就停了,大概完成了1200条左右的记录。每行最多可包含 100 个字符,但大多数接近 30 个。
有什么办法可以加快计算速度吗?
Sub findNumbers1()
Dim v As Integer, Length As Long, str As String, i As Long, r As Range,
lastRow As Long, nextCol, nextRow As Long, result, ArrayResult As String, ws
As Worksheet
nextRow = 0
nextCol = 0
Set ws = Worksheets("Sheet2")
lastRow = ws.Cells(Rows.Count, "A").End(xlUp).Row
Set r = ws.Range("A2:A6885")
nextRow = 1
For Each cell In r
str = cell.Value
Length = Len(str)
i = 1
nextCol = 2
nextRow = nextRow + 1
Do Until i > Length
If Mid(cell.Value, i, 1) = "(" Then
If IsNumeric(Mid(cell.Value, i + 1, 1)) Then
Do While IsNumeric(Mid(cell.Value, i + 1, 1))
result = Mid(cell.Value, i + 1, 1)
ArrayResult = ArrayResult + result
ws.Cells(nextRow, nextCol).Value = ArrayResult
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Else
nextCol = ws.Cells(nextRow, Columns.Count).End(xlToLeft).Column + 1
End If
End If
If Mid(cell.Value, i, 1) = "#" Then
If IsNumeric(Mid(cell.Value, i + 1, 1)) Then
Do While IsNumeric(Mid(cell.Value, i + 1, 1))
result = Mid(cell.Value, i + 1, 1)
ArrayResult = ArrayResult + result
ws.Cells(nextRow, nextCol).Value = ArrayResult
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Else
nextCol = ws.Cells(nextRow, Columns.Count).End(xlToLeft).Column + 1
End If
End If
If Mid(cell.Value, i, 1) = "S " Then
If IsNumeric(Mid(cell.Value, i + 1, 1)) Then
Do While IsNumeric(Mid(cell.Value, i + 1, 1))
result = Mid(cell.Value, i + 1, 1)
ArrayResult = ArrayResult + result
ws.Cells(nextRow, nextCol).Value = ArrayResult
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Else
nextCol = ws.Cells(nextRow, Columns.Count).End(xlToLeft).Column + 1
End If
End If
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Next cell
result = ""
ArrayResult = ""
Call pasteNoITMS
ws.ShowAllData
End Sub
Sub findNumbers2()
'pull all numbers from remaining applications after findnumbers1 runs.
Dim v As Integer, Length As Long, str As String, i As Long, r As Range, lastRow As Long, nextCol, nextRow As Long, result, ArrayResult As String, ws As Worksheet
result = ""
ArrayResult = ""
nextRow = 0
nextCol = 0
Set ws = Worksheets("2ndPull")
lastRow = ws.Cells(Rows.Count, "A").End(xlUp).Row
Set r = ws.Range("A2:A2000")
nextRow = 1
For Each cell In r
str = cell.Value
Length = Len(str)
i = 1
nextCol = 2
nextRow = nextRow + 1
ArrayResult = ""
Do Until i > Length
If IsNumeric(Mid(cell.Value, i + 1, 1)) Then
Do While IsNumeric(Mid(cell.Value, i + 1, 1))
result = Mid(cell.Value, i + 1, 1)
ArrayResult = ArrayResult + result
ws.Cells(nextRow, nextCol).Value = ArrayResult
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Else
nextCol = ws.Cells(nextRow, Columns.Count).End(xlToLeft).Column + 1
End If
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Next cell
result = ""
ArrayResult = ""
Call sortPulled
End Sub
代码开始位置:
Application.ScreenUpdating = False
然后在最后(在任何关闭之前)位置:
Application.ScreenUpdating = True
这肯定会加快这个过程,因为它不需要向您展示它在做什么。以更有效的方式重写它是理想的,但这可能有助于根据您的喜好加快速度。
您应该避免访问每个字符的 cell.Value
。只需读取每个单元格值一次 - 您已经将其分配给变量 str
。将 mid(cell.value, i, 1)
语句替换为 mid(str, i, 1)
(和类似语句)应该会显着加快例程。
您可以做出的最重要的改进是尽量减少与范围的交互
您也可以将搜索范围缩小到您需要的情况:(, #, S
像这样:
Option Explicit
Public Sub findNumbers2()
Const DELIMS = "(, #, S, , Test"
Dim ws As Worksheet, lc As Long, lr As Long, allFound As Long, nxt As Long
Dim ur As Variant, ubR As Long, ubC As Long, r As Long, c As Long, i As Long
Dim delim As Variant, dMax As Long, found As Long, result As Variant, t As Double
t = Timer '------------------------------------------------------------------------
delim = Split(DELIMS, ","): dMax = UBound(delim)
For i = 0 To dMax
If Len(delim(i)) > 0 Then
If Len(delim(i)) > 1 Then delim(i) = Trim(delim(i))
End If
Next
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws.UsedRange
ur = .Value2
lc = .Columns.Count
lr = .Rows.Count
End With
ubR = UBound(ur, 1): ubC = UBound(ur, 2)
result = ws.UsedRange.Offset(0, lc + 1)
For r = 1 To ubR
For c = 1 To ubC
For i = 0 To dMax
found = InStr(ur(r, c), delim(i))
If found > 0 Then
nxt = found
Do
found = Val(Mid(ur(r, c), nxt + 1))
If found > 0 Then
allFound = allFound + 1
result(r, c) = result(r, c) & found & ", "
End If
nxt = InStr(nxt + 1, ur(r, c), delim(i))
Loop While nxt > 0
End If
Next
Next
Next
ws.UsedRange.Offset(0, lc + 1).Value2 = result: 'ws.UsedRange.EntireColumn.AutoFit
Debug.Print "Rows: " & lr & "; duration: " & Format(Timer - t, "#,###.00") & " secs"
End Sub
包含 7 列数据的测试结果,每个单元格包含一个或多个数字:
Rows: 100,001; duration: 5.77 secs
Rows: 500,005; duration: 28.25 secs
我有一个包含大约 7000 条记录的列表,这些记录是包含数字的字符串。我需要提取所有号码,尤其是那些以“(”、“#”或 "S " 开头的号码。想法是我们需要提取票号,以便分析存在多少问题每种类型的票。下面是我写的代码。我认为数组可能更好,但我一直无法弄清楚如何让它工作,所以现在,它循环遍历每个中的每个字符行,如果字符是数字,它开始复制并粘贴下一列中的数字,直到字符不再是数字。它将每组数字放在一个新列中,直到完成该行中的所有字符。
问题是它花费的时间非常长。上次一路放手,花了一个多小时。最近我改的时候,放10分钟左右就停了,大概完成了1200条左右的记录。每行最多可包含 100 个字符,但大多数接近 30 个。
有什么办法可以加快计算速度吗?
Sub findNumbers1()
Dim v As Integer, Length As Long, str As String, i As Long, r As Range,
lastRow As Long, nextCol, nextRow As Long, result, ArrayResult As String, ws
As Worksheet
nextRow = 0
nextCol = 0
Set ws = Worksheets("Sheet2")
lastRow = ws.Cells(Rows.Count, "A").End(xlUp).Row
Set r = ws.Range("A2:A6885")
nextRow = 1
For Each cell In r
str = cell.Value
Length = Len(str)
i = 1
nextCol = 2
nextRow = nextRow + 1
Do Until i > Length
If Mid(cell.Value, i, 1) = "(" Then
If IsNumeric(Mid(cell.Value, i + 1, 1)) Then
Do While IsNumeric(Mid(cell.Value, i + 1, 1))
result = Mid(cell.Value, i + 1, 1)
ArrayResult = ArrayResult + result
ws.Cells(nextRow, nextCol).Value = ArrayResult
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Else
nextCol = ws.Cells(nextRow, Columns.Count).End(xlToLeft).Column + 1
End If
End If
If Mid(cell.Value, i, 1) = "#" Then
If IsNumeric(Mid(cell.Value, i + 1, 1)) Then
Do While IsNumeric(Mid(cell.Value, i + 1, 1))
result = Mid(cell.Value, i + 1, 1)
ArrayResult = ArrayResult + result
ws.Cells(nextRow, nextCol).Value = ArrayResult
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Else
nextCol = ws.Cells(nextRow, Columns.Count).End(xlToLeft).Column + 1
End If
End If
If Mid(cell.Value, i, 1) = "S " Then
If IsNumeric(Mid(cell.Value, i + 1, 1)) Then
Do While IsNumeric(Mid(cell.Value, i + 1, 1))
result = Mid(cell.Value, i + 1, 1)
ArrayResult = ArrayResult + result
ws.Cells(nextRow, nextCol).Value = ArrayResult
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Else
nextCol = ws.Cells(nextRow, Columns.Count).End(xlToLeft).Column + 1
End If
End If
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Next cell
result = ""
ArrayResult = ""
Call pasteNoITMS
ws.ShowAllData
End Sub
Sub findNumbers2()
'pull all numbers from remaining applications after findnumbers1 runs.
Dim v As Integer, Length As Long, str As String, i As Long, r As Range, lastRow As Long, nextCol, nextRow As Long, result, ArrayResult As String, ws As Worksheet
result = ""
ArrayResult = ""
nextRow = 0
nextCol = 0
Set ws = Worksheets("2ndPull")
lastRow = ws.Cells(Rows.Count, "A").End(xlUp).Row
Set r = ws.Range("A2:A2000")
nextRow = 1
For Each cell In r
str = cell.Value
Length = Len(str)
i = 1
nextCol = 2
nextRow = nextRow + 1
ArrayResult = ""
Do Until i > Length
If IsNumeric(Mid(cell.Value, i + 1, 1)) Then
Do While IsNumeric(Mid(cell.Value, i + 1, 1))
result = Mid(cell.Value, i + 1, 1)
ArrayResult = ArrayResult + result
ws.Cells(nextRow, nextCol).Value = ArrayResult
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Else
nextCol = ws.Cells(nextRow, Columns.Count).End(xlToLeft).Column + 1
End If
i = i + 1
Loop
ArrayResult = ""
nextCol = nextCol + 1
Next cell
result = ""
ArrayResult = ""
Call sortPulled
End Sub
代码开始位置:
Application.ScreenUpdating = False
然后在最后(在任何关闭之前)位置:
Application.ScreenUpdating = True
这肯定会加快这个过程,因为它不需要向您展示它在做什么。以更有效的方式重写它是理想的,但这可能有助于根据您的喜好加快速度。
您应该避免访问每个字符的 cell.Value
。只需读取每个单元格值一次 - 您已经将其分配给变量 str
。将 mid(cell.value, i, 1)
语句替换为 mid(str, i, 1)
(和类似语句)应该会显着加快例程。
您可以做出的最重要的改进是尽量减少与范围的交互
您也可以将搜索范围缩小到您需要的情况:(, #, S
像这样:
Option Explicit
Public Sub findNumbers2()
Const DELIMS = "(, #, S, , Test"
Dim ws As Worksheet, lc As Long, lr As Long, allFound As Long, nxt As Long
Dim ur As Variant, ubR As Long, ubC As Long, r As Long, c As Long, i As Long
Dim delim As Variant, dMax As Long, found As Long, result As Variant, t As Double
t = Timer '------------------------------------------------------------------------
delim = Split(DELIMS, ","): dMax = UBound(delim)
For i = 0 To dMax
If Len(delim(i)) > 0 Then
If Len(delim(i)) > 1 Then delim(i) = Trim(delim(i))
End If
Next
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws.UsedRange
ur = .Value2
lc = .Columns.Count
lr = .Rows.Count
End With
ubR = UBound(ur, 1): ubC = UBound(ur, 2)
result = ws.UsedRange.Offset(0, lc + 1)
For r = 1 To ubR
For c = 1 To ubC
For i = 0 To dMax
found = InStr(ur(r, c), delim(i))
If found > 0 Then
nxt = found
Do
found = Val(Mid(ur(r, c), nxt + 1))
If found > 0 Then
allFound = allFound + 1
result(r, c) = result(r, c) & found & ", "
End If
nxt = InStr(nxt + 1, ur(r, c), delim(i))
Loop While nxt > 0
End If
Next
Next
Next
ws.UsedRange.Offset(0, lc + 1).Value2 = result: 'ws.UsedRange.EntireColumn.AutoFit
Debug.Print "Rows: " & lr & "; duration: " & Format(Timer - t, "#,###.00") & " secs"
End Sub
包含 7 列数据的测试结果,每个单元格包含一个或多个数字:
Rows: 100,001; duration: 5.77 secs Rows: 500,005; duration: 28.25 secs