将 xlsxwriter 格式应用于 AWS lambda 函数中的 Excel 文件

Apply xlsxwriter formatting to Excel file in AWS lambda function

在 jupyter notebook 中,我可以创建两个 Pandas 数据框,并将它们导出到 Excel 工作簿的单独工作表中,其中包含一些额外的格式,包括 text-wrapping、冻结窗​​格、粗体 headers 和 auto-filters.

代码如下:

# MODULES
import pandas as pd 
import numpy as np
from datetime import date

#CONSTRUCT DATAFRAME
df = pd.DataFrame({'Data': [10, 22, 31, 43, 57, 99, 65, 74, 88],
                  'Data2':[10, 22, 31, 43, 57, 99, 65, 74, 88],
                  'Data3':[10, 22, 31, 43, 57, 99, 65, 74, 88]})

#CONSTRUCT DATAFRAME
df2 = pd.DataFrame({'df2_Data': ['blue', 'yellow', 'purple', 'orange', 'green', 'brown', 'gray', 'white', 'red'],
                  'df2_Data2':['bike', 'car', 'bus', 'train', 'boat', 'truck', 'plane', 'scooter', 'skateboard'],
                  'df2_Data3':['chicken', 'cow', 'dog', 'crocodile', 'snake', 'pig', 'rat', 'mouse', 'monkey']})


#DATE FOR INCLUSION IN FILENAME 
today = date.today()
d2 = today.strftime("%B %d, %Y")


writer = pd.ExcelWriter('ExcelExample{}.xlsx'.format(d2), engine='xlsxwriter')
sheets_in_writer=['Sheet1','sheet2']

data_frame_for_writer=[df, df2]

for i,j in zip(data_frame_for_writer,sheets_in_writer):
    i.to_excel(writer,j,index=False)
    


### Assign WorkBook
workbook=writer.book

# Add a header format
header_format = workbook.add_format({'bold': True,'text_wrap': True,'size':10,
                                                      'valign': 'top','fg_color': '#c7e7ff','border': 1})


### Apply same format on each sheet being saved
for i,j in zip(data_frame_for_writer,sheets_in_writer):
    for col_num, value in enumerate(i.columns.values):
        writer.sheets[j].set_column(0, max_col - 1, 12)
        writer.sheets[j].write(0, col_num, value, header_format)
        writer.sheets[j].autofilter(0,0,0,i.shape[1]-1)
        writer.sheets[j].freeze_panes(1,0)
writer.save()

这将生成以下 Excel 工作簿,其中 both/all 个工作表应用了格式。

由于我不熟悉 boto3,我很难在 AWS lambda 函数中重现此功能

我的尝试如下,它确实将 Excel 文件保存到指定的目的地,但当然具有 none 所需的格式。

import boto3 

import numpy as np
import pandas as pd

import io 
from io import BytesIO
from io import StringIO 

from datetime import date

def lambda_handler(event, context):

    s3 = boto3.client('s3') 

    # STORE TODAY'S DATE 
    today = date.today()
    d2 = today.strftime("%B %d, %Y")
    print("d2 =", d2)
    
    #SPECIFY OUTPUT EXCEL FILE NAME AND FILEPATH 
    bucket = 'brnddmn-s3'
    filepath = 'output/'ExcelExample{}.xlsx'.format(d2)
    
    # GENERATE AND EXPORT EXCEL FILE TO BUCKET 
    with io.BytesIO() as output:
        with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
            df.to_excel(writer, sheet_name = 'Sheet1')
            df2.to_excel(writer, sheet_name = 'Sheet2')
        data = output.getvalue()
    s3 = boto3.resource('s3')
    s3.Bucket(bucket).put_object(Key=filepath, Body=data)
     

非常感谢任何帮助。

我试图验证这个问题,但除了语法错误和缺少data-frames的定义外,一切正常正如预期的那样。 有效的更正示例版本如下。

我唯一能做的其他评论是确保您为您的 lambda 设置 pandasXlsxWriter,例如,在 lambda 层中。

import boto3

import json
import io

from datetime import date

import pandas as pd

s3 = boto3.resource('s3')

def lambda_handler(event, context):
        
    
    #CONSTRUCT DATAFRAME
    df = pd.DataFrame({'Data': [10, 22, 31, 43, 57, 99, 65, 74, 88],
                      'Data2':[10, 22, 31, 43, 57, 99, 65, 74, 88],
                      'Data3':[10, 22, 31, 43, 57, 99, 65, 74, 88]})
    
    #CONSTRUCT DATAFRAME
    df2 = pd.DataFrame({'df2_Data': ['blue', 'yellow', 'purple', 'orange', 'green', 'brown', 'gray', 'white', 'red'],
                      'df2_Data2':['bike', 'car', 'bus', 'train', 'boat', 'truck', 'plane', 'scooter', 'skateboard'],
                      'df2_Data3':['chicken', 'cow', 'dog', 'crocodile', 'snake', 'pig', 'rat', 'mouse', 'monkey']})
    
    today = date.today()
    d2 = today.strftime("%B %d, %Y")
    
    
    io_buffer = io.BytesIO()
    
    writer = pd.ExcelWriter(io_buffer, engine='xlsxwriter')
    sheets_in_writer=['Sheet1','sheet2']
    
    data_frame_for_writer=[df, df2]
    
    for i,j in zip(data_frame_for_writer,sheets_in_writer):
        i.to_excel(writer,j,index=False)
    
    ### Assign WorkBook
    workbook=writer.book
    
    # Add a header format
    header_format = workbook.add_format({'bold': True,'text_wrap': True,'size':10,
                                                          'valign': 'top','fg_color': '#c7e7ff','border': 1})
    max_col=3
    
    ### Apply same format on each sheet being saved
    for i,j in zip(data_frame_for_writer,sheets_in_writer):
        for col_num, value in enumerate(i.columns.values):
            writer.sheets[j].set_column(0, max_col - 1, 12)
            writer.sheets[j].write(0, col_num, value, header_format)
            writer.sheets[j].autofilter(0,0,0,i.shape[1]-1)
            writer.sheets[j].freeze_panes(1,0)
    writer.save()
    
    bucket = 'brnddmn-s3' 
    filepath = 'output/ExcelExample{}.xlsx'.format(d2)
    
    data = io_buffer.getvalue()        
    
    s3.Bucket(bucket).put_object(Key=filepath, Body=data)
    
    return  {
        'statusCode': 200,  
        'body': json.dumps(filepath)
        }

AWS Lambdas 保证有 512mb /tmp space,因此一个可以帮助您在本地开发的潜在解决方案是更改

writer = pd.ExcelWriter('ExcelExample{}.xlsx'.format(d2), engine='xlsxwriter')

output_file_name = "ExcelExample{}.xlsx".format(d2)
local_file_path = f"/tmp/{output_file_name}"
writer = pd.ExcelWriter(local_file_path, engine="xlsxwriter")

这样您就可以使用

import os

import boto3


bucket = "brnddmn-s3"
s3_file_path = f"/output/{output_file_name}"

boto3.resource("s3").Object(bucket, s3_file_path).upload_file(local_file_path)
os.remove(local_file_path)

这应该是让你继续前进的最简单的改变,假设 ~500mb 对每个工作簿来说足够了。