Plotly 分散大量地理数据
Plotly scatter large volume geographic data
我试图编写一个代码来创建 2021 年发生的所有森林火灾的可视化。包含数据的 CSV 文件大约为 1.5Gb,该程序对我来说看起来是正确的,但是当我尝试 运行 它,它卡住了,没有显示任何可视化或错误消息。上次我尝试了 运行 将近半天,直到 python 崩溃。
我不知道我是否有一个无限循环,如果那是因为文件太大或者我是否遗漏了其他东西。
有人可以提供反馈吗?
这是我的代码:
import csv
from datetime import datetime
from plotly.graph_objs import Scattergeo , Layout
from plotly import offline
filename='fire_nrt_J1V-C2_252284.csv'
with open(filename) as f:
reader=csv.reader(f)
header_row=next(reader)
lats, lons, brights, dates=[],[],[],[]
for row in reader:
date=datetime.strptime(row[5], '%Y-%m-%d')
lat=row[0]
lon=row[1]
bright=row[2]
lats.append(lat)
lons.append(lon)
brights.append(bright)
dates.append(date)
data=[{
'type':'scattergeo',
'lon':lons,
'lat':lats,
'text':dates,
'marker':{
'size':[5*bright for bright in brights],
'color': brights,
'colorscale':'Reds',
'colorbar': {'title':'Fire brightness'},
}
}]
my_layout=Layout(title="Forestfires during the year 2021")
fig={'data':data,'layout':my_layout}
offline.plot(fig, filename='global_fires_2021.html')
- 已找到您在此处描述的数据https://wifire-data.sdsc.edu/dataset/viirs-i-band-375-m-active-fire-data/resource/3ce73b20-f584-44f7-996b-2f319c480294
- plotly 为散点图上绘制的每个点使用资源。所以在你 运行 out resources
之前有一个限制
- 还有其他方法可以绘制更多点
- https://plotly.com/python/mapbox-density-heatmaps/ 更少的限制,但仍然限制在非常大的数据集上
- https://plotly.com/python/datashader/ 在生成图像时可以处理非常大的数据集。使用(安装和导航 API)
更具挑战性
数据来源
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
df = pd.read_csv("https://firms.modaps.eosdis.nasa.gov/data/active_fire/noaa-20-viirs-c2/csv/J1_VIIRS_C2_Global_7d.csv")
df
scatter_geo
- 仅限于 1000 行的随机样本
px.scatter_geo(
df.sample(1000),
lat="latitude",
lon="longitude",
color="bright_ti4",
# size="size",
hover_data=["acq_date"],
color_continuous_scale="reds",
)
密度地图框
px.density_mapbox(
df.sample(5000),
lat="latitude",
lon="longitude",
z="bright_ti4",
radius=3,
color_continuous_scale="reds",
zoom=1,
mapbox_style="carto-positron",
)
数据着色器 Mapbox
- 所有数据
- 一些库更难安装和使用
- 需要处理这个问题https://community.plotly.com/t/datashader-image-distorted-when-passed-to-mapbox/39375/2
import datashader as ds, colorcet
from pyproj import Transformer
t3857_to_4326 = Transformer.from_crs(3857, 4326, always_xy=True)
# project CRS to ensure image overlays appropriately back over mapbox
# https://community.plotly.com/t/datashader-image-distorted-when-passed-to-mapbox/39375/2
df.loc[:, "longitude_3857"], df.loc[:, "latitude_3857"] = ds.utils.lnglat_to_meters(
df.longitude, df.latitude
)
RESOLUTION=1000
cvs = ds.Canvas(plot_width=RESOLUTION, plot_height=RESOLUTION)
agg = cvs.points(df, x="longitude_3857", y="latitude_3857")
img = ds.tf.shade(agg, cmap=colorcet.fire).to_pil()
fig = go.Figure(go.Scattermapbox())
fig.update_layout(
mapbox={
"style": "carto-positron",
"layers": [
{
"sourcetype": "image",
"source": img,
# Sets the coordinates array contains [longitude, latitude] pairs for the image corners listed in
# clockwise order: top left, top right, bottom right, bottom left.
"coordinates": [
t3857_to_4326.transform(
agg.coords["longitude_3857"].values[a],
agg.coords["latitude_3857"].values[b],
)
for a, b in [(0, -1), (-1, -1), (-1, 0), (0, 0)]
],
}
],
},
margin={"l": 0, "r": 0, "t": 0, "r": 0},
)
我试图编写一个代码来创建 2021 年发生的所有森林火灾的可视化。包含数据的 CSV 文件大约为 1.5Gb,该程序对我来说看起来是正确的,但是当我尝试 运行 它,它卡住了,没有显示任何可视化或错误消息。上次我尝试了 运行 将近半天,直到 python 崩溃。 我不知道我是否有一个无限循环,如果那是因为文件太大或者我是否遗漏了其他东西。 有人可以提供反馈吗?
这是我的代码:
import csv
from datetime import datetime
from plotly.graph_objs import Scattergeo , Layout
from plotly import offline
filename='fire_nrt_J1V-C2_252284.csv'
with open(filename) as f:
reader=csv.reader(f)
header_row=next(reader)
lats, lons, brights, dates=[],[],[],[]
for row in reader:
date=datetime.strptime(row[5], '%Y-%m-%d')
lat=row[0]
lon=row[1]
bright=row[2]
lats.append(lat)
lons.append(lon)
brights.append(bright)
dates.append(date)
data=[{
'type':'scattergeo',
'lon':lons,
'lat':lats,
'text':dates,
'marker':{
'size':[5*bright for bright in brights],
'color': brights,
'colorscale':'Reds',
'colorbar': {'title':'Fire brightness'},
}
}]
my_layout=Layout(title="Forestfires during the year 2021")
fig={'data':data,'layout':my_layout}
offline.plot(fig, filename='global_fires_2021.html')
- 已找到您在此处描述的数据https://wifire-data.sdsc.edu/dataset/viirs-i-band-375-m-active-fire-data/resource/3ce73b20-f584-44f7-996b-2f319c480294
- plotly 为散点图上绘制的每个点使用资源。所以在你 运行 out resources 之前有一个限制
- 还有其他方法可以绘制更多点
- https://plotly.com/python/mapbox-density-heatmaps/ 更少的限制,但仍然限制在非常大的数据集上
- https://plotly.com/python/datashader/ 在生成图像时可以处理非常大的数据集。使用(安装和导航 API) 更具挑战性
数据来源
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
df = pd.read_csv("https://firms.modaps.eosdis.nasa.gov/data/active_fire/noaa-20-viirs-c2/csv/J1_VIIRS_C2_Global_7d.csv")
df
scatter_geo
- 仅限于 1000 行的随机样本
px.scatter_geo(
df.sample(1000),
lat="latitude",
lon="longitude",
color="bright_ti4",
# size="size",
hover_data=["acq_date"],
color_continuous_scale="reds",
)
密度地图框
px.density_mapbox(
df.sample(5000),
lat="latitude",
lon="longitude",
z="bright_ti4",
radius=3,
color_continuous_scale="reds",
zoom=1,
mapbox_style="carto-positron",
)
数据着色器 Mapbox
- 所有数据
- 一些库更难安装和使用
- 需要处理这个问题https://community.plotly.com/t/datashader-image-distorted-when-passed-to-mapbox/39375/2
import datashader as ds, colorcet
from pyproj import Transformer
t3857_to_4326 = Transformer.from_crs(3857, 4326, always_xy=True)
# project CRS to ensure image overlays appropriately back over mapbox
# https://community.plotly.com/t/datashader-image-distorted-when-passed-to-mapbox/39375/2
df.loc[:, "longitude_3857"], df.loc[:, "latitude_3857"] = ds.utils.lnglat_to_meters(
df.longitude, df.latitude
)
RESOLUTION=1000
cvs = ds.Canvas(plot_width=RESOLUTION, plot_height=RESOLUTION)
agg = cvs.points(df, x="longitude_3857", y="latitude_3857")
img = ds.tf.shade(agg, cmap=colorcet.fire).to_pil()
fig = go.Figure(go.Scattermapbox())
fig.update_layout(
mapbox={
"style": "carto-positron",
"layers": [
{
"sourcetype": "image",
"source": img,
# Sets the coordinates array contains [longitude, latitude] pairs for the image corners listed in
# clockwise order: top left, top right, bottom right, bottom left.
"coordinates": [
t3857_to_4326.transform(
agg.coords["longitude_3857"].values[a],
agg.coords["latitude_3857"].values[b],
)
for a, b in [(0, -1), (-1, -1), (-1, 0), (0, 0)]
],
}
],
},
margin={"l": 0, "r": 0, "t": 0, "r": 0},
)