多维 xarray 到 dataframe 到 excel
Multidimensional xarray to dataframe to excel
我在 python 中有一个三维 xarray 构建为
import numpy as np
import pandas as pd
import xarray as xr
data_np = np.array([
[ [1, 2, 0, 8], [3, 4, 11, 2], [5, 6, 43, 90] ],
[ [7, 8, 2, 66], [9, 10, 31, 21], [11, 12, 56, 45] ]
])
dims = ['patient', 'parameter', 'day']
coords = {'patient':['Joe','Bill'], 'parameter':['a', 'b', 'c'], 'day':[0, 1, 2, 3]}
data = xr.DataArray(data_np, dims = dims, coords = coords )
我想将其导出到具有以下结构的 excel sheet:
期望的输出
使用 Pandas 数据帧,我可以获得 excel sheet 所有变量(dims)沿同一轴分支:
df = data.to_dataframe('value')
df = df.transpose()
with pd.ExcelWriter("main.xlsx") as writer:
df.to_excel(writer)
获得输出:
怎样才能得到想要的结构?
使用unstack
:
data.to_dataframe('value').unstack(level=[1, 2]).droplevel(level=0, axis=1) \
.rename_axis(index=None, columns=['Parameter', 'Patient / Day'])
您可以使用unstack()
方法来随意排列数据:
import numpy as np
import pandas as pd
import xarray as xr
data_np = np.array([
[ [1, 2, 0, 8], [3, 4, 11, 2], [5, 6, 43, 90] ],
[ [7, 8, 2, 66], [9, 10, 31, 21], [11, 12, 56, 45] ]
])
dims = ['patient', 'parameter', 'day']
coords = {'patient':['Joe','Bill'], 'parameter':['a', 'b', 'c'], 'day':[0, 1, 2, 3]}
data = xr.DataArray(data_np, dims = dims, coords = coords )
df = data.to_series().unstack(level=[1,2])
with pd.ExcelWriter("main.xlsx") as writer:
df.to_excel(writer)
或者您可以避免使用 xarray
并使用 pd.MultiIndex
直接从 NumPy 数组创建 DataFrame:
import numpy as np
import pandas as pd
data_np = np.array([
[ [1, 2, 0, 8], [3, 4, 11, 2], [5, 6, 43, 90] ],
[ [7, 8, 2, 66], [9, 10, 31, 21], [11, 12, 56, 45] ]
])
dims = ['patient', 'parameter', 'day']
coords = {'patient':['Joe','Bill'], 'parameter':['a', 'b', 'c'], 'day':[0, 1, 2, 3]}
mi = pd.MultiIndex.from_product([coords[dims[i]] for i in range(3)], names=dims)
df = pd.Series(data_np.flatten(), index=mi).unstack(level=[1,2])
with pd.ExcelWriter("main.xlsx") as writer:
df.to_excel(writer)
我在 python 中有一个三维 xarray 构建为
import numpy as np
import pandas as pd
import xarray as xr
data_np = np.array([
[ [1, 2, 0, 8], [3, 4, 11, 2], [5, 6, 43, 90] ],
[ [7, 8, 2, 66], [9, 10, 31, 21], [11, 12, 56, 45] ]
])
dims = ['patient', 'parameter', 'day']
coords = {'patient':['Joe','Bill'], 'parameter':['a', 'b', 'c'], 'day':[0, 1, 2, 3]}
data = xr.DataArray(data_np, dims = dims, coords = coords )
我想将其导出到具有以下结构的 excel sheet:
期望的输出
使用 Pandas 数据帧,我可以获得 excel sheet 所有变量(dims)沿同一轴分支:
df = data.to_dataframe('value')
df = df.transpose()
with pd.ExcelWriter("main.xlsx") as writer:
df.to_excel(writer)
获得输出:
怎样才能得到想要的结构?
使用unstack
:
data.to_dataframe('value').unstack(level=[1, 2]).droplevel(level=0, axis=1) \
.rename_axis(index=None, columns=['Parameter', 'Patient / Day'])
您可以使用unstack()
方法来随意排列数据:
import numpy as np
import pandas as pd
import xarray as xr
data_np = np.array([
[ [1, 2, 0, 8], [3, 4, 11, 2], [5, 6, 43, 90] ],
[ [7, 8, 2, 66], [9, 10, 31, 21], [11, 12, 56, 45] ]
])
dims = ['patient', 'parameter', 'day']
coords = {'patient':['Joe','Bill'], 'parameter':['a', 'b', 'c'], 'day':[0, 1, 2, 3]}
data = xr.DataArray(data_np, dims = dims, coords = coords )
df = data.to_series().unstack(level=[1,2])
with pd.ExcelWriter("main.xlsx") as writer:
df.to_excel(writer)
或者您可以避免使用 xarray
并使用 pd.MultiIndex
直接从 NumPy 数组创建 DataFrame:
import numpy as np
import pandas as pd
data_np = np.array([
[ [1, 2, 0, 8], [3, 4, 11, 2], [5, 6, 43, 90] ],
[ [7, 8, 2, 66], [9, 10, 31, 21], [11, 12, 56, 45] ]
])
dims = ['patient', 'parameter', 'day']
coords = {'patient':['Joe','Bill'], 'parameter':['a', 'b', 'c'], 'day':[0, 1, 2, 3]}
mi = pd.MultiIndex.from_product([coords[dims[i]] for i in range(3)], names=dims)
df = pd.Series(data_np.flatten(), index=mi).unstack(level=[1,2])
with pd.ExcelWriter("main.xlsx") as writer:
df.to_excel(writer)