单引号之间的 RegEx 数字
RegExp numbers between single quotes
我有一个像这样的字符串 "f_details('277095');">
。我只需要得到 277095
部分。我一直在尝试 strPattern = "'[0-9]'+"
的变体,但这要么一无所获,要么找不到错误的东西。
尽管有作弊 sheet 就在我面前,但我还是不懂正则表达式。已经花了一个小时尝试不同的事情。这个正则表达式会是什么样子?
这是我用来抓取该网站和抓取数据的代码:
Set objWshShell = Wscript.CreateObject("Wscript.Shell")
Set IE = CreateObject("internetexplorer.application")
Set fso = CreateObject("Scripting.FileSystemObject")
on error resume next
For i=1 To 77 '77 Counties
If i=77 Then Exit For
IE.Visible = True
IE.Navigate "https://lic.ok.gov/PublicPortal/OREC/FindAssociateEntity.jsp"
Do Until IE.ReadyState = 4: WScript.sleep 15: Loop
Do Until IE.Document.ReadyState = "complete": WScript.sleep 10: Loop
IE.Document.getElementsByTagName("select")("AddrCountyCode").Value = i
Do Until IE.Document.ReadyState = "complete": WScript.sleep 10: Loop
For Each btn In IE.Document.getElementsByTagName("input")
If btn.name = "btnSearch" Then btn.Click()
NEXT
strPattern = "'(\d+)'"
strTestString = ie.document.body.innerhtml
arrAllMatches = fGetMatches(strPattern, strTestString)
If UBound(arrAllMatches) <> 0 Then
filename = CreateObject("Scripting.FileSystemObject").GetParentFolderName(WScript.ScriptFullName) & "\License.txt"
set fso = createobject("scripting.filesystemobject")
set ts = fso.opentextfile(filename,8,true)
ts.write Join(arrAllMatches, vbCrlf)
ts.close
Else
WScript.Echo "-- None Found --"
End if
next
Wscript.echo "DONE!"
'=====================================================================
Function fGetMatches(sPattern, sStr)
Dim regEx, retVal, sMatch, colMatches, temp
Set regEx = New RegExp ' Create a regular expression.
regEx.Pattern = sPattern ' Set pattern.
regEx.IgnoreCase = True ' Set case insensitivity.
regEx.Global = True ' Set global applicability.
Set colMatches = regEx.Execute(sStr) ' Execute search.
If colMatches.Count = 0 Then
temp = Array("")
Else
'# Convert Collection to Array
For Each sMatch In colMatches
temp = temp & sMatch & "¶"
Next
temp = Left(temp, Len(temp) - 1)
temp = Split(temp, "¶")
End If
fGetMatches = temp
End Function
'\d+'
只需将量词添加到 \d
而不是 '
,因为您希望 \d
重复。
如果您只想获得 277095
,请尝试 (?<=')\d+(?=')
查看演示。
https://regex101.com/r/iS6jF6/6
Dim strRegex as String = "'\d+'"
Dim myRegex As New Regex(strRegex, RegexOptions.Multiline)
Dim strTargetString As String = "f_details('277095');"
For Each myMatch As Match In myRegex.Matches(strTargetString)
If myMatch.Success Then
' Add your code here
End If
Next
除了Vks answer,您还可以使用捕获组来捕获您需要的内容。
您可以像这样使用正则表达式:
'(\d+)'
您可以看到匹配内容以蓝色突出显示,捕获的内容以绿色突出显示
比赛信息
MATCH 1
1. [11-17] `277095`
VBScript 的 regexp 实现是有限制的,但如果你遵循一般规则 "Keep it simple",即使在这里你也可以轻松地切割数字序列:
>> Set r = New RegExp
>> r.Pattern = "\d+"
>> s = "f_details('277095');"
>> WScript.Echo r.Execute(s)(0).Value
>>
277095
我有一个像这样的字符串 "f_details('277095');">
。我只需要得到 277095
部分。我一直在尝试 strPattern = "'[0-9]'+"
的变体,但这要么一无所获,要么找不到错误的东西。
尽管有作弊 sheet 就在我面前,但我还是不懂正则表达式。已经花了一个小时尝试不同的事情。这个正则表达式会是什么样子?
这是我用来抓取该网站和抓取数据的代码:
Set objWshShell = Wscript.CreateObject("Wscript.Shell")
Set IE = CreateObject("internetexplorer.application")
Set fso = CreateObject("Scripting.FileSystemObject")
on error resume next
For i=1 To 77 '77 Counties
If i=77 Then Exit For
IE.Visible = True
IE.Navigate "https://lic.ok.gov/PublicPortal/OREC/FindAssociateEntity.jsp"
Do Until IE.ReadyState = 4: WScript.sleep 15: Loop
Do Until IE.Document.ReadyState = "complete": WScript.sleep 10: Loop
IE.Document.getElementsByTagName("select")("AddrCountyCode").Value = i
Do Until IE.Document.ReadyState = "complete": WScript.sleep 10: Loop
For Each btn In IE.Document.getElementsByTagName("input")
If btn.name = "btnSearch" Then btn.Click()
NEXT
strPattern = "'(\d+)'"
strTestString = ie.document.body.innerhtml
arrAllMatches = fGetMatches(strPattern, strTestString)
If UBound(arrAllMatches) <> 0 Then
filename = CreateObject("Scripting.FileSystemObject").GetParentFolderName(WScript.ScriptFullName) & "\License.txt"
set fso = createobject("scripting.filesystemobject")
set ts = fso.opentextfile(filename,8,true)
ts.write Join(arrAllMatches, vbCrlf)
ts.close
Else
WScript.Echo "-- None Found --"
End if
next
Wscript.echo "DONE!"
'=====================================================================
Function fGetMatches(sPattern, sStr)
Dim regEx, retVal, sMatch, colMatches, temp
Set regEx = New RegExp ' Create a regular expression.
regEx.Pattern = sPattern ' Set pattern.
regEx.IgnoreCase = True ' Set case insensitivity.
regEx.Global = True ' Set global applicability.
Set colMatches = regEx.Execute(sStr) ' Execute search.
If colMatches.Count = 0 Then
temp = Array("")
Else
'# Convert Collection to Array
For Each sMatch In colMatches
temp = temp & sMatch & "¶"
Next
temp = Left(temp, Len(temp) - 1)
temp = Split(temp, "¶")
End If
fGetMatches = temp
End Function
'\d+'
只需将量词添加到 \d
而不是 '
,因为您希望 \d
重复。
如果您只想获得 277095
(?<=')\d+(?=')
查看演示。
https://regex101.com/r/iS6jF6/6
Dim strRegex as String = "'\d+'"
Dim myRegex As New Regex(strRegex, RegexOptions.Multiline)
Dim strTargetString As String = "f_details('277095');"
For Each myMatch As Match In myRegex.Matches(strTargetString)
If myMatch.Success Then
' Add your code here
End If
Next
除了Vks answer,您还可以使用捕获组来捕获您需要的内容。
您可以像这样使用正则表达式:
'(\d+)'
您可以看到匹配内容以蓝色突出显示,捕获的内容以绿色突出显示
比赛信息
MATCH 1
1. [11-17] `277095`
VBScript 的 regexp 实现是有限制的,但如果你遵循一般规则 "Keep it simple",即使在这里你也可以轻松地切割数字序列:
>> Set r = New RegExp
>> r.Pattern = "\d+"
>> s = "f_details('277095');"
>> WScript.Echo r.Execute(s)(0).Value
>>
277095