一维数据集的空间分区算法?
spatial partitioning algorithms for 1D dataset?
这是以 10,000 个方格表示地理区域的网格,其中每个方格为 55225 平方米。
该数据集的每平方交通量在 100 到 1000 之间。
例如:
平方 1 - 100,
平方 2 - 500
.
.
平方 10,000 - 800
现在,我想以这样的方式划分这个区域,每个分区可能有不同的区域但会承载相似的流量,分区之间流量的标准偏差应该是 minimum.Any 对空间分区的建议算法?
为了告知您的程序,您必须做出一些决定。想到的first question
是分区数是否定义了? second question
是对一个组是否有任何几何限制,即它们必须是连续的,或者任何特定的形状是否理想? third question
是关于多好才够好?提供理想答案的算法(可能是贪婪算法)和提供最佳答案的算法(可能是穷举或 "brute force" 方法)的 运行 时间通常存在巨大差异。通过将具有相同体积的任何 2 个扇区分组,您将获得最小标准差,因为您的组将各自具有 0 标准差。无论如何,这听起来很像扩展版 bin packing problem
,您可能应该从那里开始您的文献综述。
您需要按顺序收拾垃圾箱...
在这里,我为我的圈子选择了偏向于最高流量的中心点,并从那里填充它们。
class trafficNode:
def __init__(self,v,i):
self.cluster = None
self.value = v
self.index = i
self.occupied = False
def occupy(self):
self.occupied=True
def tryAdd(xList,mList,irow,icol):
try:
if not(mList[irow][icol] in xList and !mList[irow][icol].occupied):
xlist.append(mList[irow][icol])
except IndexError:
chill = None
return(xlist)
class cluster:
def __init__(self):
self.nodes = []
def getTotal(self):
total = 0
for k in self.nodes:
total += k.value
return(total)
def addNode(self,n):
self.nodes.append(n)
def getNeighbors(self,m,r = 0):
neighbors = []
for k in self.nodes:
i = k.index()
for k2 in range(0,4):
if k2==0:
neighbors = tryAdd(neighbors,m,i[0]+0,i[1]+1)
elif k2==1:
neighbors = tryAdd(neighbors,m,i[0]+1,i[1]+0)
elif k2==2:
neighbors = tryAdd(neighbors,m,i[0]+0,i[1]-1)
elif k2==3:
neighbors = tryAdd(neighbors,m,i[0]-1,i[1]+0)
if r != 0:
if k2==0:
neighbors = tryAdd(neighbors,m,i[0]+1,i[1]+1)
elif k2==1:
neighbors = tryAdd(neighbors,m,i[0]+1,i[1]-1)
elif k2==2:
neighbors = tryAdd(neighbors,m,i[0]-1,i[1]+1)
elif k2==3:
neighbors = tryAdd(neighbors,m,i[0]-1,i[1]-1)
return(neighbors)
def seed(self,m,irow,icol):
self.nodes.append(m[irow][icol])
m[irow][icol].occupy()
def propogate(self,m,target):
total = 0
for k in self.nodes:
total += k.value
s = 1
while total<target:
s = 1 if !s else 0
lastTotal=Total
n = self.getNeighbors(m,s)
if len(n==0):
break;
else:
if(abs(target-(total+sum([k.value for k in n])))<abs(target-total)):
for k in n:
self.nodes.append(k)
m[k.index[0]][k.index[1]].occupy()
else:
break;
def contains(self,i):
ret = False
for k in self.nodes
if k.index == i
ret = False
break;
return(ret)
def parseData(d,s): # Where d is the source datafile and s is the number of units per row.
ret = []
f = open(d,"r")
text = f.read()
lines = f.split("\n")
n = 0
r = 0
temp = []
for k in lines:
v = k.split(" - ")[1]
n+=1
temp.append(trafficNode(v,(r,n)))
if n == s:
n = 0
r += 1
ret.append(temp)
temp = []
return(ret)
def mapTotal(m):
return sum([sum([k2.value for k2 in k]) for k in m])
def pTotal(m,n):
return(mapTotal/n)
import sys
infile = sys.argv[1]
ncols = sys.argv[2]
ntowers = sys.argv[3]
m = parseData(infile,ncols)
s = pTotal(m,ntowers)
spots = [k.index for k in m if !k.occupied]
clusters = []
while len(spots > 0):
spotVals = [m[k[0]][k[1]].value for k in spots]
nextSpotIndex = spots[spotVals.index(max(spotVals))]
clusters.append(cluster)
clusters[n].seed(self,m,nextSpotIndex[0],nextSpotIndex[1])
clusters[n].propogate(m,s)
spots = [k.index for k in m if !k.occupied]
那是说我还没有测试过...您的数据是以该图像还是其他文件的形式出现的?
这是以 10,000 个方格表示地理区域的网格,其中每个方格为 55225 平方米。
该数据集的每平方交通量在 100 到 1000 之间。
例如:
平方 1 - 100,
平方 2 - 500
.
.
平方 10,000 - 800
现在,我想以这样的方式划分这个区域,每个分区可能有不同的区域但会承载相似的流量,分区之间流量的标准偏差应该是 minimum.Any 对空间分区的建议算法?
为了告知您的程序,您必须做出一些决定。想到的first question
是分区数是否定义了? second question
是对一个组是否有任何几何限制,即它们必须是连续的,或者任何特定的形状是否理想? third question
是关于多好才够好?提供理想答案的算法(可能是贪婪算法)和提供最佳答案的算法(可能是穷举或 "brute force" 方法)的 运行 时间通常存在巨大差异。通过将具有相同体积的任何 2 个扇区分组,您将获得最小标准差,因为您的组将各自具有 0 标准差。无论如何,这听起来很像扩展版 bin packing problem
,您可能应该从那里开始您的文献综述。
您需要按顺序收拾垃圾箱...
在这里,我为我的圈子选择了偏向于最高流量的中心点,并从那里填充它们。
class trafficNode:
def __init__(self,v,i):
self.cluster = None
self.value = v
self.index = i
self.occupied = False
def occupy(self):
self.occupied=True
def tryAdd(xList,mList,irow,icol):
try:
if not(mList[irow][icol] in xList and !mList[irow][icol].occupied):
xlist.append(mList[irow][icol])
except IndexError:
chill = None
return(xlist)
class cluster:
def __init__(self):
self.nodes = []
def getTotal(self):
total = 0
for k in self.nodes:
total += k.value
return(total)
def addNode(self,n):
self.nodes.append(n)
def getNeighbors(self,m,r = 0):
neighbors = []
for k in self.nodes:
i = k.index()
for k2 in range(0,4):
if k2==0:
neighbors = tryAdd(neighbors,m,i[0]+0,i[1]+1)
elif k2==1:
neighbors = tryAdd(neighbors,m,i[0]+1,i[1]+0)
elif k2==2:
neighbors = tryAdd(neighbors,m,i[0]+0,i[1]-1)
elif k2==3:
neighbors = tryAdd(neighbors,m,i[0]-1,i[1]+0)
if r != 0:
if k2==0:
neighbors = tryAdd(neighbors,m,i[0]+1,i[1]+1)
elif k2==1:
neighbors = tryAdd(neighbors,m,i[0]+1,i[1]-1)
elif k2==2:
neighbors = tryAdd(neighbors,m,i[0]-1,i[1]+1)
elif k2==3:
neighbors = tryAdd(neighbors,m,i[0]-1,i[1]-1)
return(neighbors)
def seed(self,m,irow,icol):
self.nodes.append(m[irow][icol])
m[irow][icol].occupy()
def propogate(self,m,target):
total = 0
for k in self.nodes:
total += k.value
s = 1
while total<target:
s = 1 if !s else 0
lastTotal=Total
n = self.getNeighbors(m,s)
if len(n==0):
break;
else:
if(abs(target-(total+sum([k.value for k in n])))<abs(target-total)):
for k in n:
self.nodes.append(k)
m[k.index[0]][k.index[1]].occupy()
else:
break;
def contains(self,i):
ret = False
for k in self.nodes
if k.index == i
ret = False
break;
return(ret)
def parseData(d,s): # Where d is the source datafile and s is the number of units per row.
ret = []
f = open(d,"r")
text = f.read()
lines = f.split("\n")
n = 0
r = 0
temp = []
for k in lines:
v = k.split(" - ")[1]
n+=1
temp.append(trafficNode(v,(r,n)))
if n == s:
n = 0
r += 1
ret.append(temp)
temp = []
return(ret)
def mapTotal(m):
return sum([sum([k2.value for k2 in k]) for k in m])
def pTotal(m,n):
return(mapTotal/n)
import sys
infile = sys.argv[1]
ncols = sys.argv[2]
ntowers = sys.argv[3]
m = parseData(infile,ncols)
s = pTotal(m,ntowers)
spots = [k.index for k in m if !k.occupied]
clusters = []
while len(spots > 0):
spotVals = [m[k[0]][k[1]].value for k in spots]
nextSpotIndex = spots[spotVals.index(max(spotVals))]
clusters.append(cluster)
clusters[n].seed(self,m,nextSpotIndex[0],nextSpotIndex[1])
clusters[n].propogate(m,s)
spots = [k.index for k in m if !k.occupied]
那是说我还没有测试过...您的数据是以该图像还是其他文件的形式出现的?