如何使用 GDAL 加速从光栅文件中提取点数据 python
How to speed up extracting data from raster file for points using GDAL python
我有 366 个包含雪数据的 tif 格式光栅图像文件(MODIS 卫星每日数据)和另一个包含 19,000 个位置(纬度和经度)的 csv 文件。我需要从光栅文件中收集雪数据。我尝试使用 GDAL python 库收集数据。但是,该程序从每个文件中收集数据大约需要 30 分钟。这意味着我必须 运行 该代码大约 180 个小时。以下是我正在使用的代码。请建议我是否可以提高程序执行的速度,或者是否有更好的方法可以实现相同的速度。
import gdal
import pandas
import numpy as np
import os,subprocess
def runCmdAndGetOutput(cmd) :
outList = []
proc = subprocess.Popen(cmd,stdout=subprocess.PIPE)
while True:
line = proc.stdout.readline()
if not line:
break
#the real code does filtering here
outList.append(line.rstrip())
print(outList)
# value = float(outList[2].decode("utf-8").replace("<Value>","").replace("</Value>",""))
value = float(outList[0].decode("utf-8"))
return value
# ndsiFile = "2016001.tif"
locs = "hkkhlocations.csv"
ndsFileLoc = r"D:\SrinivasaRao_Docs\MODIS_NDSI_V6_201600000499560\out"
# with open(locs) as f:
# locData = f.readlines()
latLnginfo = pandas.read_csv(locs)
print(latLnginfo.columns)
print(latLnginfo.shape)
# outDf = pandas.DataFrame()
outDf = pandas.DataFrame(np.zeros([len(latLnginfo),370])*np.nan)
day =1
print(os.listdir(ndsFileLoc))
print(type(os.listdir(ndsFileLoc)))
datasetsList = os.listdir(ndsFileLoc)
for eFile in datasetsList:
rCount = 0
# print(eFile)
cCount = int(eFile[4:7])
# print(cCount)
with open("output.csv") as f :
for line in f :
locData = line.split(",")
cmdToRun = ["gdallocationinfo" ,"-valonly", "-wgs84", os.path.join(ndsFileLoc,eFile) ,str(latLnginfo.iloc[rCount,4]), str(latLnginfo.iloc[rCount,3])]# str(locData[0]), str(locData[1])]
v = runCmdAndGetOutput(cmdToRun)
outDf.iloc[rCount,cCount]= float(v)
rCount = rCount + 1
print("rowno: ", rCount, "Dayno :", cCount, "SCF value: ", v)
day = day+1
outDf.to_csv('test.csv')
'''
def run_cmd_processor(efile):
r_count = 0
c_count = int(efile[4:7])
with open("output.csv") as f :
for line in f :
loc_data = line.split(",")
# ~
pool = multiprocessing.Pool(processes=2) # You can add more processes
pool.map(run_cmd_processor, datasetsList)
pool.close()
pool.join()
好像只有"for eFile in datasetsList:"可以多处理分支。可以改成upper.
我有 366 个包含雪数据的 tif 格式光栅图像文件(MODIS 卫星每日数据)和另一个包含 19,000 个位置(纬度和经度)的 csv 文件。我需要从光栅文件中收集雪数据。我尝试使用 GDAL python 库收集数据。但是,该程序从每个文件中收集数据大约需要 30 分钟。这意味着我必须 运行 该代码大约 180 个小时。以下是我正在使用的代码。请建议我是否可以提高程序执行的速度,或者是否有更好的方法可以实现相同的速度。
import gdal
import pandas
import numpy as np
import os,subprocess
def runCmdAndGetOutput(cmd) :
outList = []
proc = subprocess.Popen(cmd,stdout=subprocess.PIPE)
while True:
line = proc.stdout.readline()
if not line:
break
#the real code does filtering here
outList.append(line.rstrip())
print(outList)
# value = float(outList[2].decode("utf-8").replace("<Value>","").replace("</Value>",""))
value = float(outList[0].decode("utf-8"))
return value
# ndsiFile = "2016001.tif"
locs = "hkkhlocations.csv"
ndsFileLoc = r"D:\SrinivasaRao_Docs\MODIS_NDSI_V6_201600000499560\out"
# with open(locs) as f:
# locData = f.readlines()
latLnginfo = pandas.read_csv(locs)
print(latLnginfo.columns)
print(latLnginfo.shape)
# outDf = pandas.DataFrame()
outDf = pandas.DataFrame(np.zeros([len(latLnginfo),370])*np.nan)
day =1
print(os.listdir(ndsFileLoc))
print(type(os.listdir(ndsFileLoc)))
datasetsList = os.listdir(ndsFileLoc)
for eFile in datasetsList:
rCount = 0
# print(eFile)
cCount = int(eFile[4:7])
# print(cCount)
with open("output.csv") as f :
for line in f :
locData = line.split(",")
cmdToRun = ["gdallocationinfo" ,"-valonly", "-wgs84", os.path.join(ndsFileLoc,eFile) ,str(latLnginfo.iloc[rCount,4]), str(latLnginfo.iloc[rCount,3])]# str(locData[0]), str(locData[1])]
v = runCmdAndGetOutput(cmdToRun)
outDf.iloc[rCount,cCount]= float(v)
rCount = rCount + 1
print("rowno: ", rCount, "Dayno :", cCount, "SCF value: ", v)
day = day+1
outDf.to_csv('test.csv')
'''
def run_cmd_processor(efile):
r_count = 0
c_count = int(efile[4:7])
with open("output.csv") as f :
for line in f :
loc_data = line.split(",")
# ~
pool = multiprocessing.Pool(processes=2) # You can add more processes
pool.map(run_cmd_processor, datasetsList)
pool.close()
pool.join()
好像只有"for eFile in datasetsList:"可以多处理分支。可以改成upper.