将带有各种分隔符的地址拆分为街道地址、城市、州、邮编和国家/地区
Splitting address with various delimiters into street address, city, state, zip and country
我的数据在 Excel。我有几个 sheet 的数据,其中地址总是在每个 sheet 的同一列中。地址格式的示例包括:
1155 15th Street NW Suite 600 Washington, DC 20005 US
4600 Emperor Blvd #200 Durham, NC 27703-8577 US
200 Stevens Drive 费城, PA 19113 US
505 City Parkway West Orange, CA 92868 US
550 S Caldwell St, Charlotte, NC 28202-2633 US
1643 NW 136th Ave Ste H200 Sunrise, FL 33323-2857 US
我尝试了下面的代码,但此时在代码中出现错误 "sCity = Trim(Mid$(rCell.Value, Len(sAddress) + 1, lStatePos - Len (sAddress) - 1))"
谁能帮我解决这个问题?
Sub SplitAddresses()
Dim vaStates As Variant
Dim vaStreets As Variant
Dim i As Long
Dim rCell As Range
Dim sAddress As String
Dim sCity As String, sState As String
Dim sZip As String
Dim lStreetPos As Long, lStatePos As Long
vaStates = Array(“ AL “, “ AK “, “ AZ “, “ AR “, “ CA “, “ CO “, “ CT “, “ DE “, “ DC “, “ FL “, “ GA “, “ HI “, “ ID “, “ IL “, “ IN “, “ IA “, “ KS “, “ KY “, “ LA “, “ ME “, “ MD “, “ MA “, “ MI “, “ MN “, “ MS “, “ MO “, “ MT “, “ NE “, “ NV “, “ NH “, “ NJ “, “ NM “, “ NY “, “ NC “, “ ND “, “ OH “, “ OK “, “ OR “, “ PA “, “ RI “, “ SC “, “ SD “, “ TN “, “ TX “, “ UT “, “ VT “, “ VA “, “ WA “, “ WV “, “ WI “, “ WY “, “ GU “, “ PR “)
vaStreets = Array(" CR ", " BLVD ", " RD ", " ST ", " AVE ", " CT ")
For Each rCell In Sheet1.Range("A1:A5").Cells
sAddress = "": sCity = "": sZip = "": sState = ""
For i = LBound(vaStreets) To UBound(vaStreets)
lStreetPos = InStr(1, rCell.Value, vaStreets(i))
If lStreetPos > 0 Then
sAddress = Trim(Left$(rCell.Value, lStreetPos + Len(vaStreets(i)) - 1))
Exit For
End If
Next i
For i = LBound(vaStates) To UBound(vaStates)
lStatePos = InStr(1, rCell.Value, vaStates(i))
If lStatePos > 0 Then
sCity = Trim(Mid$(rCell.Value, Len(sAddress) + 1, lStatePos - Len(sAddress) - 1))
sState = Trim(Mid$(rCell.Value, lStatePos + 1, Len(vaStates(i)) - 1))
sZip = Trim(Mid$(rCell.Value, lStatePos + Len(vaStates(i)), Len(rCell.Value)))
Exit For
End If
Next i
rCell.Offset(0, 1).Value = "'" & sAddress
rCell.Offset(0, 2).Value = "'" & sCity
rCell.Offset(0, 3).Value = "'" & sState
rCell.Offset(0, 4).Value = "'" & sZip
Next rCell
End Sub
这是我得到的错误:
error_image
您的拆分逻辑有些不稳定,这还不算您必须将大写街道数组与 Ucase()
字符串值进行比较。
然而,好消息 - 由于您似乎应用了后续地址逻辑,即围绕 last 冒号分隔符将城市、州 + zip 分组,您可以尝试以下代码:
Option Explicit ' declaration head of code module
Enum c ' define column constants
[_Start] = 0
add1
City
State
Zip
End Enum
Sub SplitAddresses()
With Sheet1
'define dataset
Dim lastRow As Long: lastRow = .Range("A" & .Rows.Count).End(xlUp).Row
Dim rng As Range: Set rng = .Range("A2:A" & lastRow)
'assign to variant datafield array (provide for 4 columns: Add+City+State+ZIP)
Dim v: v = rng.Resize(columnsize:=4).Value2
'split data
doSplit v
'write split results to any target, e.g. B:B
.Range("B2").Resize(UBound(v), 4) = v
End With
End Sub
帮助程序doSplit
Sub doSplit(data)
Dim i As Long
For i = LBound(data) To UBound(data)
Dim curAddress As String: curAddress = data(i, c.add1)
Dim tokens, tmp
tokens = Split(curAddress, ",")
'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
'a) analyze string part after last ","
'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
tmp = Split(Trim(tokens(UBound(tokens))) & " ", " ", 2)
'aa) add State + Zip (to columns 3..4)
data(i, c.State) = tmp(0): data(i, c.Zip) = tmp(1)
'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
'b) analyze first string part
'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
tmp = Split(tokens(UBound(tokens) - 1), " ")
'data(i, c.City) = tmp(UBound(tmp)) '<< only for 1-word city names
data(i, c.City) = getCity(tmp) '<< see edit below
'bb) add City + Address
data(i, c.add1) = Split(curAddress, data(i, c.City), 2)(0)
data(i, c.add1) = Replace(data(i, c.add1), ",", "")
Next i
End Sub
帮助功能 // 根据@RonRosenfeld 的评论进行编辑
由于会出现由复合词组成的城市名称,上面sub中的城市字符串赋值必须从data(r, c.City) = tmp(UBound(tmp))
改为
data(r, c.City) = getCity(tmp) ' << function call
函数getCity()
包括检查 common
第一部分为“北”、“西”或“新”,以避免至少检查包含复合城市名称的详尽列表。所有其他需要的多于一个词的城市名称必须在附加列表中定义 cities
:
Function getCity(tmp) As String
'Purp.: return valid city names of either one or two parts
'[1]Definitions
'a) List common first parts of city names like "West" in "West Orange"
Dim common$: common = "North,West,South,East,Grand,New"
'b) List all other needed cities consisting of compound words
Dim cities$: cities = "Sterling Heights,Ann Arbor"
'[2]Get potential city name
'a) Define tmp indices of potential city tokens
Dim first&: first = UBound(tmp) - 1
Dim secnd&: secnd = UBound(tmp)
'b) Build city name as compound string of tmp tokens
Dim City As String
City = Trim(IIf(first < 0, "", tmp(first) & " ") & tmp(secnd))
'[3]Check common first parts plus additional cities list
'a) Check for common name parts like e.g. "West" in "West Orange"
If InStr(common & ",", tmp(first) & ",") Then getCity = City: Exit Function
'b) Check rest in listed cities and return function result
getCity = IIf(InStr(cities, City) > 0, City, tmp(secnd))
End Function
根据您的评论,有一个 return 字符从城市中划出街道地址,地址的常规格式:street|City, State Zip Country
算法变得更加简单,因为一系列 Split
函数可以将地址部分分开。
我还使用了 Type
语句——不是必需的,但可以使代码更清晰,IMO。
根据格式的不同,某些 Trim
语句可能不是必需的,但它们不会造成伤害。
请注意,您可以更改 ranges/sheets 数据源和结果位置以满足您的特定要求。
编辑: 我刚读了你的评论,在 return 离开城市之前可能有多个 returns
街道地址。
.street
的代码相应更改
Option Explicit
Type Address
street As String
city As String
state As String
zip As String
country As String
End Type
Sub splitAddresses()
Dim wsSrc As Worksheet, wsRes As Worksheet, rRes As Range
Dim vSrc As Variant, vRes As Variant
Dim myAdr As Address
Dim v, w, x, y
Dim I As Long
Set wsSrc = Worksheets("sheet1")
'read into vba array for faster processing
With wsSrc
vSrc = .Range(.Cells(2, 1), .Cells(.Rows.Count, 1).End(xlUp))
End With
Set wsRes = Worksheets("Sheet1")
Set rRes = wsRes.Cells(1, 3)
ReDim vRes(0 To UBound(vSrc), 1 To 5)
'Headers
vRes(0, 1) = "Street"
vRes(0, 2) = "City"
vRes(0, 3) = "State"
vRes(0, 4) = "Zip"
vRes(0, 5) = "Country"
For I = 1 To UBound(vSrc)
v = Split(vSrc(I, 1), vbLf)
With myAdr
y = v
ReDim Preserve y(UBound(y) - 1)
.street = WorksheetFunction.Trim(Join(y, " "))
w = Split(Trim(v(UBound(v))), ",")
.city = w(0)
x = Split(Trim(w(1)))
.state = Trim(x(0))
.zip = Trim(x(1))
.country = Trim(x(2))
vRes(I, 1) = .street
vRes(I, 2) = .city
vRes(I, 3) = .state
vRes(I, 4) = .zip
vRes(I, 5) = .country
End With
Set rRes = rRes.Resize(rowsize:=UBound(vRes, 1) + 1, columnsize:=UBound(vRes, 2))
With rRes
.EntireColumn.Clear
.Value = vRes
.Rows(1).Font.Bold = True
.Columns(4).NumberFormat = "@"
.EntireColumn.AutoFit
End With
Next I
End Sub
我的数据在 Excel。我有几个 sheet 的数据,其中地址总是在每个 sheet 的同一列中。地址格式的示例包括:
1155 15th Street NW Suite 600 Washington, DC 20005 US
4600 Emperor Blvd #200 Durham, NC 27703-8577 US
200 Stevens Drive 费城, PA 19113 US
505 City Parkway West Orange, CA 92868 US
550 S Caldwell St, Charlotte, NC 28202-2633 US
1643 NW 136th Ave Ste H200 Sunrise, FL 33323-2857 US
我尝试了下面的代码,但此时在代码中出现错误 "sCity = Trim(Mid$(rCell.Value, Len(sAddress) + 1, lStatePos - Len (sAddress) - 1))"
谁能帮我解决这个问题?
Sub SplitAddresses()
Dim vaStates As Variant
Dim vaStreets As Variant
Dim i As Long
Dim rCell As Range
Dim sAddress As String
Dim sCity As String, sState As String
Dim sZip As String
Dim lStreetPos As Long, lStatePos As Long
vaStates = Array(“ AL “, “ AK “, “ AZ “, “ AR “, “ CA “, “ CO “, “ CT “, “ DE “, “ DC “, “ FL “, “ GA “, “ HI “, “ ID “, “ IL “, “ IN “, “ IA “, “ KS “, “ KY “, “ LA “, “ ME “, “ MD “, “ MA “, “ MI “, “ MN “, “ MS “, “ MO “, “ MT “, “ NE “, “ NV “, “ NH “, “ NJ “, “ NM “, “ NY “, “ NC “, “ ND “, “ OH “, “ OK “, “ OR “, “ PA “, “ RI “, “ SC “, “ SD “, “ TN “, “ TX “, “ UT “, “ VT “, “ VA “, “ WA “, “ WV “, “ WI “, “ WY “, “ GU “, “ PR “)
vaStreets = Array(" CR ", " BLVD ", " RD ", " ST ", " AVE ", " CT ")
For Each rCell In Sheet1.Range("A1:A5").Cells
sAddress = "": sCity = "": sZip = "": sState = ""
For i = LBound(vaStreets) To UBound(vaStreets)
lStreetPos = InStr(1, rCell.Value, vaStreets(i))
If lStreetPos > 0 Then
sAddress = Trim(Left$(rCell.Value, lStreetPos + Len(vaStreets(i)) - 1))
Exit For
End If
Next i
For i = LBound(vaStates) To UBound(vaStates)
lStatePos = InStr(1, rCell.Value, vaStates(i))
If lStatePos > 0 Then
sCity = Trim(Mid$(rCell.Value, Len(sAddress) + 1, lStatePos - Len(sAddress) - 1))
sState = Trim(Mid$(rCell.Value, lStatePos + 1, Len(vaStates(i)) - 1))
sZip = Trim(Mid$(rCell.Value, lStatePos + Len(vaStates(i)), Len(rCell.Value)))
Exit For
End If
Next i
rCell.Offset(0, 1).Value = "'" & sAddress
rCell.Offset(0, 2).Value = "'" & sCity
rCell.Offset(0, 3).Value = "'" & sState
rCell.Offset(0, 4).Value = "'" & sZip
Next rCell
End Sub
这是我得到的错误: error_image
您的拆分逻辑有些不稳定,这还不算您必须将大写街道数组与 Ucase()
字符串值进行比较。
好消息 - 由于您似乎应用了后续地址逻辑,即围绕 last 冒号分隔符将城市、州 + zip 分组,您可以尝试以下代码:
Option Explicit ' declaration head of code module
Enum c ' define column constants
[_Start] = 0
add1
City
State
Zip
End Enum
Sub SplitAddresses()
With Sheet1
'define dataset
Dim lastRow As Long: lastRow = .Range("A" & .Rows.Count).End(xlUp).Row
Dim rng As Range: Set rng = .Range("A2:A" & lastRow)
'assign to variant datafield array (provide for 4 columns: Add+City+State+ZIP)
Dim v: v = rng.Resize(columnsize:=4).Value2
'split data
doSplit v
'write split results to any target, e.g. B:B
.Range("B2").Resize(UBound(v), 4) = v
End With
End Sub
帮助程序doSplit
Sub doSplit(data)
Dim i As Long
For i = LBound(data) To UBound(data)
Dim curAddress As String: curAddress = data(i, c.add1)
Dim tokens, tmp
tokens = Split(curAddress, ",")
'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
'a) analyze string part after last ","
'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
tmp = Split(Trim(tokens(UBound(tokens))) & " ", " ", 2)
'aa) add State + Zip (to columns 3..4)
data(i, c.State) = tmp(0): data(i, c.Zip) = tmp(1)
'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
'b) analyze first string part
'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
tmp = Split(tokens(UBound(tokens) - 1), " ")
'data(i, c.City) = tmp(UBound(tmp)) '<< only for 1-word city names
data(i, c.City) = getCity(tmp) '<< see edit below
'bb) add City + Address
data(i, c.add1) = Split(curAddress, data(i, c.City), 2)(0)
data(i, c.add1) = Replace(data(i, c.add1), ",", "")
Next i
End Sub
帮助功能 // 根据@RonRosenfeld 的评论进行编辑
由于会出现由复合词组成的城市名称,上面sub中的城市字符串赋值必须从data(r, c.City) = tmp(UBound(tmp))
改为
data(r, c.City) = getCity(tmp) ' << function call
函数getCity()
包括检查 common
第一部分为“北”、“西”或“新”,以避免至少检查包含复合城市名称的详尽列表。所有其他需要的多于一个词的城市名称必须在附加列表中定义 cities
:
Function getCity(tmp) As String
'Purp.: return valid city names of either one or two parts
'[1]Definitions
'a) List common first parts of city names like "West" in "West Orange"
Dim common$: common = "North,West,South,East,Grand,New"
'b) List all other needed cities consisting of compound words
Dim cities$: cities = "Sterling Heights,Ann Arbor"
'[2]Get potential city name
'a) Define tmp indices of potential city tokens
Dim first&: first = UBound(tmp) - 1
Dim secnd&: secnd = UBound(tmp)
'b) Build city name as compound string of tmp tokens
Dim City As String
City = Trim(IIf(first < 0, "", tmp(first) & " ") & tmp(secnd))
'[3]Check common first parts plus additional cities list
'a) Check for common name parts like e.g. "West" in "West Orange"
If InStr(common & ",", tmp(first) & ",") Then getCity = City: Exit Function
'b) Check rest in listed cities and return function result
getCity = IIf(InStr(cities, City) > 0, City, tmp(secnd))
End Function
根据您的评论,有一个 return 字符从城市中划出街道地址,地址的常规格式:street|City, State Zip Country
算法变得更加简单,因为一系列 Split
函数可以将地址部分分开。
我还使用了 Type
语句——不是必需的,但可以使代码更清晰,IMO。
根据格式的不同,某些 Trim
语句可能不是必需的,但它们不会造成伤害。
请注意,您可以更改 ranges/sheets 数据源和结果位置以满足您的特定要求。
编辑: 我刚读了你的评论,在 return 离开城市之前可能有多个 returns
街道地址。
.street
的代码相应更改
Option Explicit
Type Address
street As String
city As String
state As String
zip As String
country As String
End Type
Sub splitAddresses()
Dim wsSrc As Worksheet, wsRes As Worksheet, rRes As Range
Dim vSrc As Variant, vRes As Variant
Dim myAdr As Address
Dim v, w, x, y
Dim I As Long
Set wsSrc = Worksheets("sheet1")
'read into vba array for faster processing
With wsSrc
vSrc = .Range(.Cells(2, 1), .Cells(.Rows.Count, 1).End(xlUp))
End With
Set wsRes = Worksheets("Sheet1")
Set rRes = wsRes.Cells(1, 3)
ReDim vRes(0 To UBound(vSrc), 1 To 5)
'Headers
vRes(0, 1) = "Street"
vRes(0, 2) = "City"
vRes(0, 3) = "State"
vRes(0, 4) = "Zip"
vRes(0, 5) = "Country"
For I = 1 To UBound(vSrc)
v = Split(vSrc(I, 1), vbLf)
With myAdr
y = v
ReDim Preserve y(UBound(y) - 1)
.street = WorksheetFunction.Trim(Join(y, " "))
w = Split(Trim(v(UBound(v))), ",")
.city = w(0)
x = Split(Trim(w(1)))
.state = Trim(x(0))
.zip = Trim(x(1))
.country = Trim(x(2))
vRes(I, 1) = .street
vRes(I, 2) = .city
vRes(I, 3) = .state
vRes(I, 4) = .zip
vRes(I, 5) = .country
End With
Set rRes = rRes.Resize(rowsize:=UBound(vRes, 1) + 1, columnsize:=UBound(vRes, 2))
With rRes
.EntireColumn.Clear
.Value = vRes
.Rows(1).Font.Bold = True
.Columns(4).NumberFormat = "@"
.EntireColumn.AutoFit
End With
Next I
End Sub